xref: /xnu-8796.141.3/osfmk/i386/locks_i386.c (revision 1b191cb58250d0705d8a51287127505aa4bc0789)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  *	File:	kern/lock.c
58  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
59  *	Date:	1985
60  *
61  *	Locking primitives implementation
62  */
63 
64 #define LOCK_PRIVATE 1
65 
66 #include <mach_ldebug.h>
67 
68 #include <kern/lock_stat.h>
69 #include <kern/locks.h>
70 #include <kern/zalloc.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/cpu_data.h>
75 #include <kern/cpu_number.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <string.h>
79 
80 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
81 #include <i386/tsc.h>
82 #include <machine/atomic.h>
83 #include <machine/machine_cpu.h>
84 #include <i386/mp.h>
85 #include <machine/atomic.h>
86 #include <sys/kdebug.h>
87 #if LCK_MTX_USE_ARCH
88 #include <i386/locks_i386_inlines.h>
89 #endif /* LCK_MTX_USE_ARCH */
90 #include <kern/cpu_number.h>
91 #include <os/hash.h>
92 #include <i386/cpuid.h>
93 
94 #define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
95 
96 uint64_t _Atomic lock_panic_timeout = 0xf000000;  /* 251e6 TSC ticks */
97 
98 /* Forwards */
99 
100 #if     USLOCK_DEBUG
101 /*
102  *	Perform simple lock checks.
103  */
104 int     uslock_check = 1;
105 int     max_lock_loops  = 100000000;
106 decl_simple_lock_data(extern, printf_lock);
107 decl_simple_lock_data(extern, panic_lock);
108 #endif  /* USLOCK_DEBUG */
109 
110 extern unsigned int not_in_kdp;
111 
112 #if !LCK_GRP_USE_ARG
113 #define usimple_lock_nopreempt(lck, grp) \
114 	usimple_lock_nopreempt(lck)
115 #define usimple_lock_try_nopreempt(lck, grp) \
116 	usimple_lock_try_nopreempt(lck)
117 #endif
118 static void usimple_lock_nopreempt(usimple_lock_t, lck_grp_t *);
119 static unsigned int usimple_lock_try_nopreempt(usimple_lock_t, lck_grp_t *);
120 
121 /*
122  *	We often want to know the addresses of the callers
123  *	of the various lock routines.  However, this information
124  *	is only used for debugging and statistics.
125  */
126 typedef void    *pc_t;
127 #define INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
128 #define INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
129 #if     ANY_LOCK_DEBUG
130 #define OBTAIN_PC(pc)   ((pc) = GET_RETURN_PC())
131 #define DECL_PC(pc)     pc_t pc;
132 #else   /* ANY_LOCK_DEBUG */
133 #define DECL_PC(pc)
134 #ifdef  lint
135 /*
136  *	Eliminate lint complaints about unused local pc variables.
137  */
138 #define OBTAIN_PC(pc)   ++pc
139 #else   /* lint */
140 #define OBTAIN_PC(pc)
141 #endif  /* lint */
142 #endif  /* USLOCK_DEBUG */
143 
144 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
145 
146 #if LCK_MTX_USE_ARCH
147 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
148 #endif /* LCK_MTX_USE_ARCH */
149 
150 /*
151  * atomic exchange API is a low level abstraction of the operations
152  * to atomically read, modify, and write a pointer.  This abstraction works
153  * for both Intel and ARMv8.1 compare and exchange atomic instructions as
154  * well as the ARM exclusive instructions.
155  *
156  * atomic_exchange_begin() - begin exchange and retrieve current value
157  * atomic_exchange_complete() - conclude an exchange
158  * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
159  */
160 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)161 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
162 {
163 	uint32_t        val;
164 
165 	(void)ord;                      // Memory order not used
166 	val = os_atomic_load(target, relaxed);
167 	*previous = val;
168 	return val;
169 }
170 
171 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)172 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
173 {
174 	return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
175 }
176 
177 void
atomic_exchange_abort(void)178 atomic_exchange_abort(void)
179 {
180 }
181 
182 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)183 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
184 {
185 	uint32_t        value, prev;
186 
187 	for (;;) {
188 		value = atomic_exchange_begin32(target, &prev, ord);
189 		if (value & test_mask) {
190 			if (wait) {
191 				cpu_pause();
192 			} else {
193 				atomic_exchange_abort();
194 			}
195 			return FALSE;
196 		}
197 		value |= set_mask;
198 		if (atomic_exchange_complete32(target, prev, value, ord)) {
199 			return TRUE;
200 		}
201 	}
202 }
203 
204 /*
205  *	Portable lock package implementation of usimple_locks.
206  */
207 
208 #if     USLOCK_DEBUG
209 #define USLDBG(stmt)    stmt
210 void            usld_lock_init(usimple_lock_t, unsigned short);
211 void            usld_lock_pre(usimple_lock_t, pc_t);
212 void            usld_lock_post(usimple_lock_t, pc_t);
213 void            usld_unlock(usimple_lock_t, pc_t);
214 void            usld_lock_try_pre(usimple_lock_t, pc_t);
215 void            usld_lock_try_post(usimple_lock_t, pc_t);
216 int             usld_lock_common_checks(usimple_lock_t, char *);
217 #else   /* USLOCK_DEBUG */
218 #define USLDBG(stmt)
219 #endif  /* USLOCK_DEBUG */
220 
221 #if LCK_MTX_USE_ARCH
222 /*
223  * Forward definitions
224  */
225 
226 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state);
227 static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
228 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
229 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
230 static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
231 static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
232 #endif /* LCK_MTX_USE_ARCH */
233 
234 
235 /*
236  *      Routine:        lck_spin_alloc_init
237  */
238 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)239 lck_spin_alloc_init(
240 	lck_grp_t       *grp,
241 	lck_attr_t      *attr)
242 {
243 	lck_spin_t *lck;
244 
245 	lck = zalloc(KT_LCK_SPIN);
246 	lck_spin_init(lck, grp, attr);
247 	return lck;
248 }
249 
250 /*
251  *      Routine:        lck_spin_free
252  */
253 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)254 lck_spin_free(
255 	lck_spin_t      *lck,
256 	lck_grp_t       *grp)
257 {
258 	lck_spin_destroy(lck, grp);
259 	zfree(KT_LCK_SPIN, lck);
260 }
261 
262 /*
263  *      Routine:        lck_spin_init
264  */
265 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)266 lck_spin_init(
267 	lck_spin_t      *lck,
268 	lck_grp_t       *grp,
269 	__unused lck_attr_t     *attr)
270 {
271 	usimple_lock_init((usimple_lock_t) lck, 0);
272 	if (grp) {
273 		lck_grp_reference(grp, &grp->lck_grp_spincnt);
274 	}
275 }
276 
277 /*
278  *      Routine:        lck_spin_destroy
279  */
280 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)281 lck_spin_destroy(
282 	lck_spin_t      *lck,
283 	lck_grp_t       *grp)
284 {
285 	if (lck->interlock == LCK_SPIN_TAG_DESTROYED) {
286 		return;
287 	}
288 	lck->interlock = LCK_SPIN_TAG_DESTROYED;
289 	if (grp) {
290 		lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
291 	}
292 	return;
293 }
294 
295 /*
296  *      Routine:        lck_spin_lock
297  */
298 void
lck_spin_lock_grp(lck_spin_t * lck,lck_grp_t * grp)299 lck_spin_lock_grp(
300 	lck_spin_t      *lck,
301 	lck_grp_t       *grp)
302 {
303 #pragma unused(grp)
304 	usimple_lock((usimple_lock_t) lck, grp);
305 }
306 
307 void
lck_spin_lock(lck_spin_t * lck)308 lck_spin_lock(
309 	lck_spin_t      *lck)
310 {
311 	usimple_lock((usimple_lock_t) lck, NULL);
312 }
313 
314 void
lck_spin_lock_nopreempt(lck_spin_t * lck)315 lck_spin_lock_nopreempt(
316 	lck_spin_t      *lck)
317 {
318 	usimple_lock_nopreempt((usimple_lock_t) lck, NULL);
319 }
320 
321 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)322 lck_spin_lock_nopreempt_grp(
323 	lck_spin_t      *lck,
324 	lck_grp_t       *grp)
325 {
326 #pragma unused(grp)
327 	usimple_lock_nopreempt((usimple_lock_t) lck, grp);
328 }
329 
330 /*
331  *      Routine:        lck_spin_unlock
332  */
333 void
lck_spin_unlock(lck_spin_t * lck)334 lck_spin_unlock(
335 	lck_spin_t      *lck)
336 {
337 	usimple_unlock((usimple_lock_t) lck);
338 }
339 
340 void
lck_spin_unlock_nopreempt(lck_spin_t * lck)341 lck_spin_unlock_nopreempt(
342 	lck_spin_t      *lck)
343 {
344 	usimple_unlock_nopreempt((usimple_lock_t) lck);
345 }
346 
347 boolean_t
lck_spin_try_lock_grp(lck_spin_t * lck,lck_grp_t * grp)348 lck_spin_try_lock_grp(
349 	lck_spin_t      *lck,
350 	lck_grp_t       *grp)
351 {
352 #pragma unused(grp)
353 	boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
354 #if     DEVELOPMENT || DEBUG
355 	if (lrval) {
356 		pltrace(FALSE);
357 	}
358 #endif
359 	return lrval;
360 }
361 
362 
363 /*
364  *      Routine:        lck_spin_try_lock
365  */
366 boolean_t
lck_spin_try_lock(lck_spin_t * lck)367 lck_spin_try_lock(
368 	lck_spin_t      *lck)
369 {
370 	boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
371 #if     DEVELOPMENT || DEBUG
372 	if (lrval) {
373 		pltrace(FALSE);
374 	}
375 #endif
376 	return lrval;
377 }
378 
379 int
lck_spin_try_lock_nopreempt(lck_spin_t * lck)380 lck_spin_try_lock_nopreempt(
381 	lck_spin_t      *lck)
382 {
383 	boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, LCK_GRP_NULL);
384 #if     DEVELOPMENT || DEBUG
385 	if (lrval) {
386 		pltrace(FALSE);
387 	}
388 #endif
389 	return lrval;
390 }
391 
392 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)393 lck_spin_try_lock_nopreempt_grp(
394 	lck_spin_t      *lck,
395 	lck_grp_t       *grp)
396 {
397 #pragma unused(grp)
398 	boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, grp);
399 #if     DEVELOPMENT || DEBUG
400 	if (lrval) {
401 		pltrace(FALSE);
402 	}
403 #endif
404 	return lrval;
405 }
406 
407 /*
408  *	Routine:	lck_spin_assert
409  */
410 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)411 lck_spin_assert(lck_spin_t *lock, unsigned int type)
412 {
413 	thread_t thread, holder;
414 	uintptr_t state;
415 
416 	if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
417 		panic("lck_spin_assert(): invalid arg (%u)", type);
418 	}
419 
420 	state = lock->interlock;
421 	holder = (thread_t)state;
422 	thread = current_thread();
423 	if (type == LCK_ASSERT_OWNED) {
424 		if (__improbable(holder == THREAD_NULL)) {
425 			panic("Lock not owned %p = %lx", lock, state);
426 		}
427 		if (__improbable(holder != thread)) {
428 			panic("Lock not owned by current thread %p = %lx", lock, state);
429 		}
430 	} else if (type == LCK_ASSERT_NOTOWNED) {
431 		if (__improbable(holder != THREAD_NULL)) {
432 			if (holder == thread) {
433 				panic("Lock owned by current thread %p = %lx", lock, state);
434 			}
435 		}
436 	}
437 }
438 
439 /*
440  *      Routine: kdp_lck_spin_is_acquired
441  *      NOT SAFE: To be used only by kernel debugger to avoid deadlock.
442  *      Returns: TRUE if lock is acquired.
443  */
444 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)445 kdp_lck_spin_is_acquired(lck_spin_t *lck)
446 {
447 	if (not_in_kdp) {
448 		panic("panic: spinlock acquired check done outside of kernel debugger");
449 	}
450 	return (lck->interlock != 0)? TRUE : FALSE;
451 }
452 
453 /*
454  *	Initialize a usimple_lock.
455  *
456  *	No change in preemption state.
457  */
458 void
usimple_lock_init(usimple_lock_t l,__unused unsigned short tag)459 usimple_lock_init(
460 	usimple_lock_t  l,
461 	__unused unsigned short tag)
462 {
463 	USLDBG(usld_lock_init(l, tag));
464 	hw_lock_init(&l->interlock);
465 }
466 
467 static hw_spin_timeout_status_t
usimple_lock_acquire_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)468 usimple_lock_acquire_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
469 {
470 	usimple_lock_t l = _lock;
471 	uintptr_t lowner;
472 	lck_spinlock_to_info_t lsti;
473 
474 	if (machine_timeout_suspended()) {
475 		return HW_LOCK_TIMEOUT_CONTINUE;
476 	}
477 
478 	lowner = (uintptr_t)l->interlock.lock_data;
479 	lsti = lck_spinlock_timeout_hit(l, lowner);
480 
481 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT " ;"
482 	    "lock owner thread=0x%lx, current_thread: %p, "
483 	    "lock owner active on CPU %d, "
484 	    HW_SPIN_TIMEOUT_DETAILS_FMT,
485 	    l, HW_SPIN_TIMEOUT_ARG(to, st),
486 	    lowner, current_thread(), lsti->owner_cpu,
487 	    HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
488 }
489 
490 static const struct hw_spin_policy usimple_lock_spin_policy = {
491 	.hwsp_name              = "usimple_lock_t",
492 	.hwsp_timeout           = &LockTimeOutTSC,
493 	.hwsp_op_timeout        = usimple_lock_acquire_timeout_panic,
494 };
495 
496 /*
497  *	Acquire a usimple_lock.
498  *
499  *	Returns with preemption disabled.  Note
500  *	that the hw_lock routines are responsible for
501  *	maintaining preemption state.
502  */
503 void
504 (usimple_lock)(
505 	usimple_lock_t  l
506 	LCK_GRP_ARG(lck_grp_t *grp))
507 {
508 	DECL_PC(pc);
509 
510 	OBTAIN_PC(pc);
511 	USLDBG(usld_lock_pre(l, pc));
512 
513 	(void)hw_lock_to(&l->interlock, &usimple_lock_spin_policy, grp);
514 #if DEVELOPMENT || DEBUG
515 	pltrace(FALSE);
516 #endif
517 
518 	USLDBG(usld_lock_post(l, pc));
519 #if CONFIG_DTRACE
520 	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
521 #endif
522 }
523 
524 /*
525  *	Acquire a usimple_lock_nopreempt
526  *
527  *	Called and returns with preemption disabled.  Note
528  *	that the hw_lock routines are responsible for
529  *	maintaining preemption state.
530  */
531 static void
usimple_lock_nopreempt(usimple_lock_t l,lck_grp_t * grp)532 usimple_lock_nopreempt(
533 	usimple_lock_t  l,
534 	lck_grp_t *grp)
535 {
536 	DECL_PC(pc);
537 
538 	OBTAIN_PC(pc);
539 	USLDBG(usld_lock_pre(l, pc));
540 
541 	(void)hw_lock_to_nopreempt(&l->interlock, &usimple_lock_spin_policy, grp);
542 
543 #if DEVELOPMENT || DEBUG
544 	pltrace(FALSE);
545 #endif
546 
547 	USLDBG(usld_lock_post(l, pc));
548 #if CONFIG_DTRACE
549 	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
550 #endif
551 }
552 
553 
554 /*
555  *	Release a usimple_lock.
556  *
557  *	Returns with preemption enabled.  Note
558  *	that the hw_lock routines are responsible for
559  *	maintaining preemption state.
560  */
561 void
usimple_unlock(usimple_lock_t l)562 usimple_unlock(
563 	usimple_lock_t  l)
564 {
565 	DECL_PC(pc);
566 
567 	OBTAIN_PC(pc);
568 	USLDBG(usld_unlock(l, pc));
569 #if DEVELOPMENT || DEBUG
570 	pltrace(TRUE);
571 #endif
572 	hw_lock_unlock(&l->interlock);
573 }
574 
575 /*
576  *	Release a usimple_unlock_nopreempt.
577  *
578  *	Called and returns with preemption enabled.  Note
579  *	that the hw_lock routines are responsible for
580  *	maintaining preemption state.
581  */
582 void
usimple_unlock_nopreempt(usimple_lock_t l)583 usimple_unlock_nopreempt(
584 	usimple_lock_t  l)
585 {
586 	DECL_PC(pc);
587 
588 	OBTAIN_PC(pc);
589 	USLDBG(usld_unlock(l, pc));
590 #if DEVELOPMENT || DEBUG
591 	pltrace(TRUE);
592 #endif
593 	hw_lock_unlock_nopreempt(&l->interlock);
594 }
595 
596 /*
597  *	Conditionally acquire a usimple_lock.
598  *
599  *	On success, returns with preemption disabled.
600  *	On failure, returns with preemption in the same state
601  *	as when first invoked.  Note that the hw_lock routines
602  *	are responsible for maintaining preemption state.
603  *
604  *	XXX No stats are gathered on a miss; I preserved this
605  *	behavior from the original assembly-language code, but
606  *	doesn't it make sense to log misses?  XXX
607  */
608 unsigned int
usimple_lock_try(usimple_lock_t l,lck_grp_t * grp)609 usimple_lock_try(
610 	usimple_lock_t  l,
611 	lck_grp_t *grp)
612 {
613 	unsigned int    success;
614 	DECL_PC(pc);
615 
616 	OBTAIN_PC(pc);
617 	USLDBG(usld_lock_try_pre(l, pc));
618 	if ((success = hw_lock_try(&l->interlock, grp))) {
619 #if DEVELOPMENT || DEBUG
620 		pltrace(FALSE);
621 #endif
622 		USLDBG(usld_lock_try_post(l, pc));
623 	}
624 	return success;
625 }
626 
627 void
usimple_lock_assert(usimple_lock_t l,unsigned int type)628 usimple_lock_assert(usimple_lock_t l, unsigned int type)
629 {
630 	hw_lock_assert(&l->interlock, type);
631 }
632 
633 /*
634  *	Conditionally acquire a usimple_lock.
635  *
636  *	Called and returns with preemption disabled.  Note
637  *	that the hw_lock routines are responsible for
638  *	maintaining preemption state.
639  *
640  *	XXX No stats are gathered on a miss; I preserved this
641  *	behavior from the original assembly-language code, but
642  *	doesn't it make sense to log misses?  XXX
643  */
644 static unsigned int
usimple_lock_try_nopreempt(usimple_lock_t l,lck_grp_t * grp)645 usimple_lock_try_nopreempt(
646 	usimple_lock_t  l,
647 	lck_grp_t *grp)
648 {
649 	unsigned int    success;
650 	DECL_PC(pc);
651 
652 	OBTAIN_PC(pc);
653 	USLDBG(usld_lock_try_pre(l, pc));
654 	if ((success = hw_lock_try_nopreempt(&l->interlock, grp))) {
655 #if DEVELOPMENT || DEBUG
656 		pltrace(FALSE);
657 #endif
658 		USLDBG(usld_lock_try_post(l, pc));
659 	}
660 	return success;
661 }
662 
663 /*
664  * Acquire a usimple_lock while polling for pending cpu signals
665  * and spinning on a lock.
666  *
667  */
668 unsigned
669 int
670 (usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
671     uint64_t deadline
672     LCK_GRP_ARG(lck_grp_t *grp))
673 {
674 	boolean_t istate = ml_get_interrupts_enabled();
675 
676 	if (deadline < mach_absolute_time()) {
677 		return 0;
678 	}
679 
680 	while (!simple_lock_try(l, grp)) {
681 		if (!istate) {
682 			cpu_signal_handler(NULL);
683 		}
684 
685 		if (deadline < mach_absolute_time()) {
686 			return 0;
687 		}
688 
689 		cpu_pause();
690 	}
691 
692 	return 1;
693 }
694 
695 void
696 (usimple_lock_try_lock_loop)(usimple_lock_t l
697     LCK_GRP_ARG(lck_grp_t *grp))
698 {
699 	/* When the lock is not contended, grab the lock and go. */
700 	if (!simple_lock_try(l, grp)) {
701 		usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
702 	}
703 }
704 
705 unsigned
706 int
707 (usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
708     uint64_t duration
709     LCK_GRP_ARG(lck_grp_t *grp))
710 {
711 	uint64_t deadline;
712 	uint64_t base_at;
713 	uint64_t duration_at;
714 
715 	/* Fast track for uncontended locks */
716 	if (simple_lock_try(l, grp)) {
717 		return 1;
718 	}
719 
720 	base_at = mach_absolute_time();
721 
722 	nanoseconds_to_absolutetime(duration, &duration_at);
723 	deadline = base_at + duration_at;
724 	if (deadline < base_at) {
725 		/* deadline has overflowed, make it saturate */
726 		deadline = ULLONG_MAX;
727 	}
728 
729 	return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
730 }
731 
732 #if     USLOCK_DEBUG
733 /*
734  *	States of a usimple_lock.  The default when initializing
735  *	a usimple_lock is setting it up for debug checking.
736  */
737 #define USLOCK_CHECKED          0x0001          /* lock is being checked */
738 #define USLOCK_TAKEN            0x0002          /* lock has been taken */
739 #define USLOCK_INIT             0xBAA0          /* lock has been initialized */
740 #define USLOCK_INITIALIZED      (USLOCK_INIT|USLOCK_CHECKED)
741 #define USLOCK_CHECKING(l)      (uslock_check &&                        \
742 	                         ((l)->debug.state & USLOCK_CHECKED))
743 
744 /*
745  *	Initialize the debugging information contained
746  *	in a usimple_lock.
747  */
748 void
usld_lock_init(usimple_lock_t l,__unused unsigned short tag)749 usld_lock_init(
750 	usimple_lock_t  l,
751 	__unused unsigned short tag)
752 {
753 	if (l == USIMPLE_LOCK_NULL) {
754 		panic("lock initialization:  null lock pointer");
755 	}
756 	l->lock_type = USLOCK_TAG;
757 	l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
758 	l->debug.lock_cpu = l->debug.unlock_cpu = 0;
759 	l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
760 	l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
761 	l->debug.duration[0] = l->debug.duration[1] = 0;
762 	l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
763 	l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
764 	l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
765 }
766 
767 
768 /*
769  *	These checks apply to all usimple_locks, not just
770  *	those with USLOCK_CHECKED turned on.
771  */
772 int
usld_lock_common_checks(usimple_lock_t l,char * caller)773 usld_lock_common_checks(
774 	usimple_lock_t  l,
775 	char            *caller)
776 {
777 	if (l == USIMPLE_LOCK_NULL) {
778 		panic("%s:  null lock pointer", caller);
779 	}
780 	if (l->lock_type != USLOCK_TAG) {
781 		panic("%s:  %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
782 	}
783 	if (!(l->debug.state & USLOCK_INIT)) {
784 		panic("%s:  %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
785 	}
786 	return USLOCK_CHECKING(l);
787 }
788 
789 
790 /*
791  *	Debug checks on a usimple_lock just before attempting
792  *	to acquire it.
793  */
794 /* ARGSUSED */
795 void
usld_lock_pre(usimple_lock_t l,pc_t pc)796 usld_lock_pre(
797 	usimple_lock_t  l,
798 	pc_t            pc)
799 {
800 	char    caller[] = "usimple_lock";
801 
802 
803 	if (!usld_lock_common_checks(l, caller)) {
804 		return;
805 	}
806 
807 /*
808  *	Note that we have a weird case where we are getting a lock when we are]
809  *	in the process of putting the system to sleep. We are running with no
810  *	current threads, therefore we can't tell if we are trying to retake a lock
811  *	we have or someone on the other processor has it.  Therefore we just
812  *	ignore this test if the locking thread is 0.
813  */
814 
815 	if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
816 	    l->debug.lock_thread == (void *) current_thread()) {
817 		printf("%s:  lock %p already locked (at %p) by",
818 		    caller, l, l->debug.lock_pc);
819 		printf(" current thread %p (new attempt at pc %p)\n",
820 		    l->debug.lock_thread, pc);
821 		panic("%s", caller);
822 	}
823 	mp_disable_preemption();
824 	mp_enable_preemption();
825 }
826 
827 
828 /*
829  *	Debug checks on a usimple_lock just after acquiring it.
830  *
831  *	Pre-emption has been disabled at this point,
832  *	so we are safe in using cpu_number.
833  */
834 void
usld_lock_post(usimple_lock_t l,pc_t pc)835 usld_lock_post(
836 	usimple_lock_t  l,
837 	pc_t            pc)
838 {
839 	unsigned int mycpu;
840 	char    caller[] = "successful usimple_lock";
841 
842 
843 	if (!usld_lock_common_checks(l, caller)) {
844 		return;
845 	}
846 
847 	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
848 		panic("%s:  lock %p became uninitialized",
849 		    caller, l);
850 	}
851 	if ((l->debug.state & USLOCK_TAKEN)) {
852 		panic("%s:  lock 0x%p became TAKEN by someone else",
853 		    caller, l);
854 	}
855 
856 	mycpu = (unsigned int)cpu_number();
857 	assert(mycpu <= UCHAR_MAX);
858 
859 	l->debug.lock_thread = (void *)current_thread();
860 	l->debug.state |= USLOCK_TAKEN;
861 	l->debug.lock_pc = pc;
862 	l->debug.lock_cpu = (unsigned char)mycpu;
863 }
864 
865 
866 /*
867  *	Debug checks on a usimple_lock just before
868  *	releasing it.  Note that the caller has not
869  *	yet released the hardware lock.
870  *
871  *	Preemption is still disabled, so there's
872  *	no problem using cpu_number.
873  */
874 void
usld_unlock(usimple_lock_t l,pc_t pc)875 usld_unlock(
876 	usimple_lock_t  l,
877 	pc_t            pc)
878 {
879 	unsigned int mycpu;
880 	char    caller[] = "usimple_unlock";
881 
882 
883 	if (!usld_lock_common_checks(l, caller)) {
884 		return;
885 	}
886 
887 	mycpu = cpu_number();
888 	assert(mycpu <= UCHAR_MAX);
889 
890 	if (!(l->debug.state & USLOCK_TAKEN)) {
891 		panic("%s:  lock 0x%p hasn't been taken",
892 		    caller, l);
893 	}
894 	if (l->debug.lock_thread != (void *) current_thread()) {
895 		panic("%s:  unlocking lock 0x%p, owned by thread %p",
896 		    caller, l, l->debug.lock_thread);
897 	}
898 	if (l->debug.lock_cpu != mycpu) {
899 		printf("%s:  unlocking lock 0x%p on cpu 0x%x",
900 		    caller, l, mycpu);
901 		printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
902 		panic("%s", caller);
903 	}
904 
905 	l->debug.unlock_thread = l->debug.lock_thread;
906 	l->debug.lock_thread = INVALID_PC;
907 	l->debug.state &= ~USLOCK_TAKEN;
908 	l->debug.unlock_pc = pc;
909 	l->debug.unlock_cpu = (unsigned char)mycpu;
910 }
911 
912 
913 /*
914  *	Debug checks on a usimple_lock just before
915  *	attempting to acquire it.
916  *
917  *	Preemption isn't guaranteed to be disabled.
918  */
919 void
usld_lock_try_pre(usimple_lock_t l,__unused pc_t pc)920 usld_lock_try_pre(
921 	usimple_lock_t  l,
922 	__unused pc_t   pc)
923 {
924 	char    caller[] = "usimple_lock_try";
925 
926 	if (!usld_lock_common_checks(l, caller)) {
927 		return;
928 	}
929 }
930 
931 
932 /*
933  *	Debug checks on a usimple_lock just after
934  *	successfully attempting to acquire it.
935  *
936  *	Preemption has been disabled by the
937  *	lock acquisition attempt, so it's safe
938  *	to use cpu_number.
939  */
940 void
usld_lock_try_post(usimple_lock_t l,pc_t pc)941 usld_lock_try_post(
942 	usimple_lock_t  l,
943 	pc_t            pc)
944 {
945 	unsigned int mycpu;
946 	char    caller[] = "successful usimple_lock_try";
947 
948 	if (!usld_lock_common_checks(l, caller)) {
949 		return;
950 	}
951 
952 	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
953 		panic("%s:  lock 0x%p became uninitialized",
954 		    caller, l);
955 	}
956 	if ((l->debug.state & USLOCK_TAKEN)) {
957 		panic("%s:  lock 0x%p became TAKEN by someone else",
958 		    caller, l);
959 	}
960 
961 	mycpu = cpu_number();
962 	assert(mycpu <= UCHAR_MAX);
963 
964 	l->debug.lock_thread = (void *) current_thread();
965 	l->debug.state |= USLOCK_TAKEN;
966 	l->debug.lock_pc = pc;
967 	l->debug.lock_cpu = (unsigned char)mycpu;
968 }
969 #endif  /* USLOCK_DEBUG */
970 #if LCK_MTX_USE_ARCH
971 
972 /*
973  * Slow path routines for lck_mtx locking and unlocking functions.
974  *
975  * These functions were previously implemented in x86 assembly,
976  * and some optimizations are in place in this c code to obtain a compiled code
977  * as performant and compact as the assembly version.
978  *
979  * To avoid to inline these functions on the fast path, all functions directly called by
980  * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
981  * in such a way the fast path can tail call into them. In this way the return address
982  * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
983  *
984  * Slow path code is structured in such a way there are no calls to functions that will return
985  * on the context of the caller function, i.e. all functions called are or tail call functions
986  * or inline functions. The number of arguments of the tail call functions are less then six,
987  * so that they can be passed over registers and do not need to be pushed on stack.
988  * This allows the compiler to not create a stack frame for the functions.
989  *
990  * __improbable and __probable are used to compile the slow path code in such a way
991  * the fast path case will be on a sequence of instructions with as less jumps as possible,
992  * to make this case the most optimized even if falling through the slow path.
993  */
994 
995 /*
996  * Intel lock invariants:
997  *
998  * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
999  *
1000  * The lock owner is promoted to the max priority of all its waiters only if it
1001  * was a lower priority when it acquired or was an owner when a waiter waited.
1002  * Max priority is capped at MAXPRI_PROMOTE.
1003  *
1004  * The last waiter will not be promoted as it is woken up, but the last
1005  * lock owner may not have been the last thread to have been woken up depending on the
1006  * luck of the draw.  Therefore a last-owner may still have the promoted-on-wakeup
1007  * flag set.
1008  *
1009  * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
1010  *       priority from dropping priority in the future without having to take thread lock
1011  *       on acquire.
1012  */
1013 
1014 /*
1015  *      Routine:        lck_mtx_alloc_init
1016  */
1017 lck_mtx_t *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)1018 lck_mtx_alloc_init(
1019 	lck_grp_t       *grp,
1020 	lck_attr_t      *attr)
1021 {
1022 	lck_mtx_t *lck;
1023 
1024 	lck = zalloc(KT_LCK_MTX);
1025 	lck_mtx_init(lck, grp, attr);
1026 	return lck;
1027 }
1028 
1029 /*
1030  *      Routine:        lck_mtx_free
1031  */
1032 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)1033 lck_mtx_free(
1034 	lck_mtx_t       *lck,
1035 	lck_grp_t       *grp)
1036 {
1037 	lck_mtx_destroy(lck, grp);
1038 	zfree(KT_LCK_MTX, lck);
1039 }
1040 
1041 /*
1042  *      Routine:        lck_mtx_init
1043  */
1044 void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)1045 lck_mtx_init(
1046 	lck_mtx_t       *lck,
1047 	lck_grp_t       *grp,
1048 	lck_attr_t      *attr)
1049 {
1050 	if (attr == LCK_ATTR_NULL) {
1051 		attr = &lck_attr_default;
1052 	}
1053 
1054 	*lck = (lck_mtx_t){
1055 		.lck_mtx_grp = grp->lck_grp_attr_id,
1056 	};
1057 
1058 	if (__improbable((attr->lck_attr_val & LCK_ATTR_DEBUG) ||
1059 	    (grp->lck_grp_attr_id & LCK_GRP_ATTR_DEBUG))) {
1060 		lck->lck_mtx_profile = 1;
1061 	}
1062 
1063 	lck_grp_reference(grp, &grp->lck_grp_mtxcnt);
1064 }
1065 
1066 /*
1067  *      Routine:        lck_mtx_destroy
1068  */
1069 void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)1070 lck_mtx_destroy(
1071 	lck_mtx_t       *lck,
1072 	lck_grp_t       *grp)
1073 {
1074 	if (lck->lck_mtx_state == LCK_MTX_TAG_DESTROYED) {
1075 		return;
1076 	}
1077 #if MACH_LDEBUG
1078 	lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1079 #endif
1080 	ordered_store_mtx_state_release(lck, LCK_MTX_TAG_DESTROYED);
1081 	lck_grp_deallocate(grp, &grp->lck_grp_mtxcnt);
1082 }
1083 
1084 
1085 #if DEVELOPMENT | DEBUG
1086 __attribute__((noinline))
1087 void
lck_mtx_owner_check_panic(lck_mtx_t * lock)1088 lck_mtx_owner_check_panic(
1089 	lck_mtx_t       *lock)
1090 {
1091 	thread_t owner = ctid_get_thread_unsafe(lock->lck_mtx_owner);
1092 	panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner, lock);
1093 }
1094 #endif
1095 
1096 /*
1097  * Routine:     lck_mtx_unlock_slow
1098  *
1099  * Unlocks a mutex held by current thread.
1100  *
1101  * It will wake up waiters if necessary.
1102  *
1103  * Interlock can be held.
1104  */
1105 __attribute__((noinline))
1106 void
lck_mtx_unlock_slow(lck_mtx_t * lock)1107 lck_mtx_unlock_slow(
1108 	lck_mtx_t       *lock)
1109 {
1110 	thread_t        thread;
1111 	uint32_t        state;
1112 
1113 	state = ordered_load_mtx_state(lock);
1114 
1115 	thread = current_thread();
1116 
1117 #if DEVELOPMENT | DEBUG
1118 	if (__improbable(lock->lck_mtx_owner != thread->ctid)) {
1119 		lck_mtx_owner_check_panic(lock);
1120 	}
1121 #endif
1122 
1123 	/* check if it is held as a spinlock */
1124 	if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0)) {
1125 		goto unlock;
1126 	}
1127 
1128 	lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
1129 
1130 unlock:
1131 	/* preemption disabled, interlock held and mutex not held */
1132 
1133 	/* clear owner */
1134 	ordered_store_mtx_owner(lock, 0);
1135 
1136 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1137 #if     MACH_LDEBUG
1138 		if (thread) {
1139 			thread->mutex_count--;
1140 		}
1141 #endif
1142 		return lck_mtx_unlock_wakeup_tail(lock, state);
1143 	}
1144 
1145 	/* release interlock, promotion and clear spin flag */
1146 	state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
1147 	ordered_store_mtx_state_release(lock, state);           /* since I own the interlock, I don't need an atomic update */
1148 
1149 #if     MACH_LDEBUG
1150 	/* perform lock statistics after drop to prevent delay */
1151 	if (thread) {
1152 		thread->mutex_count--;          /* lock statistic */
1153 	}
1154 #endif  /* MACH_LDEBUG */
1155 
1156 	/* re-enable preemption */
1157 	lck_mtx_unlock_finish_inline(lock, state);
1158 }
1159 
1160 #define LCK_MTX_LCK_WAIT_CODE           0x20
1161 #define LCK_MTX_LCK_WAKEUP_CODE         0x21
1162 #define LCK_MTX_LCK_SPIN_CODE           0x22
1163 #define LCK_MTX_LCK_ACQUIRE_CODE        0x23
1164 #define LCK_MTX_LCK_DEMOTE_CODE         0x24
1165 
1166 /*
1167  * Routine:    lck_mtx_unlock_wakeup_tail
1168  *
1169  * Invoked on unlock when there is
1170  * contention, i.e. the assembly routine sees
1171  * that mutex->lck_mtx_waiters != 0
1172  *
1173  * neither the mutex or interlock is held
1174  *
1175  * Note that this routine might not be called if there are pending
1176  * waiters which have previously been woken up, and they didn't
1177  * end up boosting the old owner.
1178  *
1179  * assembly routine previously did the following to mutex:
1180  * (after saving the state in prior_lock_state)
1181  *      decremented lck_mtx_waiters if nonzero
1182  *
1183  * This function needs to be called as a tail call
1184  * to optimize the compiled code.
1185  */
1186 __attribute__((noinline))
1187 static void
lck_mtx_unlock_wakeup_tail(lck_mtx_t * mutex,uint32_t state)1188 lck_mtx_unlock_wakeup_tail(
1189 	lck_mtx_t       *mutex,
1190 	uint32_t        state)
1191 {
1192 	struct turnstile *ts;
1193 
1194 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1195 	kern_return_t did_wake;
1196 
1197 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1198 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1199 
1200 	ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1201 
1202 	did_wake = waitq_wakeup64_one(&ts->ts_waitq, LCK_MTX_EVENT(mutex),
1203 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
1204 	assert(did_wake == KERN_SUCCESS);
1205 
1206 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1207 	turnstile_complete_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1208 
1209 	state -= LCK_MTX_WAITER;
1210 	state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
1211 	ordered_store_mtx_state_release(mutex, state);
1212 
1213 	assert(current_thread()->turnstile != NULL);
1214 
1215 	turnstile_cleanup();
1216 
1217 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1218 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1219 
1220 	lck_mtx_unlock_finish_inline(mutex, state);
1221 }
1222 
1223 /*
1224  * Routine:     lck_mtx_lock_acquire_x86
1225  *
1226  * Invoked on acquiring the mutex when there is
1227  * contention (i.e. the assembly routine sees that
1228  * that mutex->lck_mtx_waiters != 0
1229  *
1230  * mutex is owned...  interlock is held... preemption is disabled
1231  */
1232 __attribute__((always_inline))
1233 static void
lck_mtx_lock_acquire_inline(lck_mtx_t * mutex,struct turnstile * ts)1234 lck_mtx_lock_acquire_inline(
1235 	lck_mtx_t       *mutex,
1236 	struct turnstile *ts)
1237 {
1238 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1239 
1240 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1241 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1242 
1243 	if (mutex->lck_mtx_waiters > 0) {
1244 		if (ts == NULL) {
1245 			ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1246 		}
1247 
1248 		turnstile_update_inheritor(ts, current_thread(),
1249 		    TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
1250 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1251 	}
1252 
1253 	if (ts != NULL) {
1254 		turnstile_complete_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1255 	}
1256 
1257 	assert(current_thread()->turnstile != NULL);
1258 
1259 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1260 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1261 }
1262 
1263 void
lck_mtx_lock_acquire_x86(lck_mtx_t * mutex)1264 lck_mtx_lock_acquire_x86(
1265 	lck_mtx_t       *mutex)
1266 {
1267 	return lck_mtx_lock_acquire_inline(mutex, NULL);
1268 }
1269 
1270 /*
1271  * Tail call helpers for lock functions that perform
1272  * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
1273  * the caller's compiled code.
1274  */
1275 
1276 __attribute__((noinline))
1277 static void
lck_mtx_lock_acquire_tail(lck_mtx_t * mutex,struct turnstile * ts)1278 lck_mtx_lock_acquire_tail(
1279 	lck_mtx_t       *mutex,
1280 	struct turnstile *ts)
1281 {
1282 	lck_mtx_lock_acquire_inline(mutex, ts);
1283 	lck_mtx_lock_finish_inline_with_cleanup(mutex, ordered_load_mtx_state(mutex));
1284 }
1285 
1286 __attribute__((noinline))
1287 static boolean_t
lck_mtx_try_lock_acquire_tail(lck_mtx_t * mutex)1288 lck_mtx_try_lock_acquire_tail(
1289 	lck_mtx_t       *mutex)
1290 {
1291 	lck_mtx_lock_acquire_inline(mutex, NULL);
1292 	lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
1293 
1294 	return TRUE;
1295 }
1296 
1297 __attribute__((noinline))
1298 static void
lck_mtx_convert_spin_acquire_tail(lck_mtx_t * mutex)1299 lck_mtx_convert_spin_acquire_tail(
1300 	lck_mtx_t       *mutex)
1301 {
1302 	lck_mtx_lock_acquire_inline(mutex, NULL);
1303 	lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
1304 }
1305 
1306 static void
lck_mtx_ilk_unlock(lck_mtx_t * mutex)1307 lck_mtx_ilk_unlock(
1308 	lck_mtx_t       *mutex)
1309 {
1310 	lck_mtx_ilk_unlock_inline(mutex, ordered_load_mtx_state(mutex));
1311 }
1312 
1313 static inline void
lck_mtx_interlock_lock_set_and_clear_flags(lck_mtx_t * mutex,uint32_t xor_flags,uint32_t and_flags,uint32_t * new_state)1314 lck_mtx_interlock_lock_set_and_clear_flags(
1315 	lck_mtx_t *mutex,
1316 	uint32_t xor_flags,
1317 	uint32_t and_flags,
1318 	uint32_t *new_state)
1319 {
1320 	uint32_t state, prev;
1321 	state = *new_state;
1322 
1323 	for (;;) {
1324 		/* have to wait for interlock to clear */
1325 		while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
1326 			cpu_pause();
1327 			state = ordered_load_mtx_state(mutex);
1328 		}
1329 		prev = state;                                   /* prev contains snapshot for exchange */
1330 		state |= LCK_MTX_ILOCKED_MSK | xor_flags;       /* pick up interlock */
1331 		state &= ~and_flags;                            /* clear flags */
1332 
1333 		disable_preemption();
1334 		if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1335 			break;
1336 		}
1337 		enable_preemption();
1338 		cpu_pause();
1339 		state = ordered_load_mtx_state(mutex);
1340 	}
1341 	*new_state = state;
1342 	return;
1343 }
1344 
1345 static inline void
lck_mtx_interlock_lock_clear_flags(lck_mtx_t * mutex,uint32_t and_flags,uint32_t * new_state)1346 lck_mtx_interlock_lock_clear_flags(
1347 	lck_mtx_t *mutex,
1348 	uint32_t and_flags,
1349 	uint32_t *new_state)
1350 {
1351 	return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, and_flags, new_state);
1352 }
1353 
1354 static inline void
lck_mtx_interlock_lock(lck_mtx_t * mutex,uint32_t * new_state)1355 lck_mtx_interlock_lock(
1356 	lck_mtx_t *mutex,
1357 	uint32_t *new_state)
1358 {
1359 	return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, 0, new_state);
1360 }
1361 
1362 static inline int
lck_mtx_interlock_try_lock_set_flags(lck_mtx_t * mutex,uint32_t or_flags,uint32_t * new_state)1363 lck_mtx_interlock_try_lock_set_flags(
1364 	lck_mtx_t *mutex,
1365 	uint32_t or_flags,
1366 	uint32_t *new_state)
1367 {
1368 	uint32_t state, prev;
1369 	state = *new_state;
1370 
1371 	/* have to wait for interlock to clear */
1372 	if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
1373 		return 0;
1374 	}
1375 	prev = state;                                   /* prev contains snapshot for exchange */
1376 	state |= LCK_MTX_ILOCKED_MSK | or_flags;        /* pick up interlock */
1377 	disable_preemption();
1378 	if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1379 		*new_state = state;
1380 		return 1;
1381 	}
1382 
1383 	enable_preemption();
1384 	return 0;
1385 }
1386 
1387 __attribute__((noinline))
1388 static void
lck_mtx_lock_contended(lck_mtx_t * lock,bool profile,boolean_t * first_miss)1389 lck_mtx_lock_contended(
1390 	lck_mtx_t *lock,
1391 	bool       profile,
1392 	boolean_t *first_miss)
1393 {
1394 	lck_mtx_spinwait_ret_type_t ret;
1395 	uint32_t state;
1396 	thread_t thread;
1397 	struct turnstile *ts = NULL;
1398 
1399 try_again:
1400 
1401 	if (profile) {
1402 		LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, first_miss);
1403 	}
1404 
1405 	ret = lck_mtx_lock_spinwait_x86(lock);
1406 	state = ordered_load_mtx_state(lock);
1407 	switch (ret) {
1408 	case LCK_MTX_SPINWAIT_NO_SPIN:
1409 		/*
1410 		 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
1411 		 * try to spin.
1412 		 */
1413 		/* just fall through case LCK_MTX_SPINWAIT_SPUN */
1414 		OS_FALLTHROUGH;
1415 	case LCK_MTX_SPINWAIT_SPUN_HIGH_THR:
1416 	case LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE:
1417 	case LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION:
1418 	case LCK_MTX_SPINWAIT_SPUN_SLIDING_THR:
1419 		/*
1420 		 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
1421 		 * interlock not held
1422 		 */
1423 		lck_mtx_interlock_lock(lock, &state);
1424 		assert(state & LCK_MTX_ILOCKED_MSK);
1425 
1426 		if (state & LCK_MTX_MLOCKED_MSK) {
1427 			if (profile) {
1428 				LCK_MTX_PROF_WAIT(lock, lock->lck_mtx_grp,
1429 				    ret == LCK_MTX_SPINWAIT_NO_SPIN, first_miss);
1430 			}
1431 			lck_mtx_lock_wait_x86(lock, &ts);
1432 			/*
1433 			 * interlock is not held here.
1434 			 */
1435 			goto try_again;
1436 		} else {
1437 			/* grab the mutex */
1438 			state |= LCK_MTX_MLOCKED_MSK;
1439 			ordered_store_mtx_state_release(lock, state);
1440 			thread = current_thread();
1441 			ordered_store_mtx_owner(lock, thread->ctid);
1442 #if     MACH_LDEBUG
1443 			if (thread) {
1444 				thread->mutex_count++;
1445 			}
1446 #endif  /* MACH_LDEBUG */
1447 		}
1448 
1449 		break;
1450 	case LCK_MTX_SPINWAIT_ACQUIRED:
1451 		/*
1452 		 * mutex has been acquired by lck_mtx_lock_spinwait_x86
1453 		 * interlock is held and preemption disabled
1454 		 * owner is set and mutex marked as locked
1455 		 * statistics updated too
1456 		 */
1457 		break;
1458 	default:
1459 		panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p", ret, lock);
1460 	}
1461 
1462 	/*
1463 	 * interlock is already acquired here
1464 	 */
1465 
1466 	/* mutex has been acquired */
1467 	if (state & LCK_MTX_WAITERS_MSK) {
1468 		/*
1469 		 * lck_mtx_lock_acquire_tail will call
1470 		 * turnstile_complete.
1471 		 */
1472 		return lck_mtx_lock_acquire_tail(lock, ts);
1473 	}
1474 
1475 	if (ts != NULL) {
1476 		turnstile_complete_hash((uintptr_t)lock, TURNSTILE_KERNEL_MUTEX);
1477 	}
1478 
1479 	assert(current_thread()->turnstile != NULL);
1480 
1481 	/* release the interlock */
1482 	lck_mtx_lock_finish_inline_with_cleanup(lock, ordered_load_mtx_state(lock));
1483 }
1484 
1485 /*
1486  * Helper noinline functions for calling
1487  * panic to optimize compiled code.
1488  */
1489 
1490 __attribute__((noinline)) __abortlike
1491 static void
lck_mtx_destroyed(lck_mtx_t * lock)1492 lck_mtx_destroyed(
1493 	lck_mtx_t       *lock)
1494 {
1495 	panic("trying to interlock destroyed mutex (%p)", lock);
1496 }
1497 
1498 __attribute__((noinline))
1499 static boolean_t
lck_mtx_try_destroyed(lck_mtx_t * lock)1500 lck_mtx_try_destroyed(
1501 	lck_mtx_t       *lock)
1502 {
1503 	panic("trying to interlock destroyed mutex (%p)", lock);
1504 	return FALSE;
1505 }
1506 
1507 __attribute__((always_inline))
1508 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1509 lck_mtx_lock_wait_interlock_to_clear(
1510 	lck_mtx_t       *lock,
1511 	uint32_t*        new_state)
1512 {
1513 	uint32_t state;
1514 
1515 	for (;;) {
1516 		cpu_pause();
1517 		state = ordered_load_mtx_state(lock);
1518 		if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1519 			*new_state = state;
1520 			return TRUE;
1521 		}
1522 		if (state & LCK_MTX_MLOCKED_MSK) {
1523 			/* if it is held as mutex, just fail */
1524 			return FALSE;
1525 		}
1526 	}
1527 }
1528 
1529 __attribute__((always_inline))
1530 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1531 lck_mtx_try_lock_wait_interlock_to_clear(
1532 	lck_mtx_t       *lock,
1533 	uint32_t*        new_state)
1534 {
1535 	uint32_t state;
1536 
1537 	for (;;) {
1538 		cpu_pause();
1539 		state = ordered_load_mtx_state(lock);
1540 		if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
1541 			/* if it is held as mutex or spin, just fail */
1542 			return FALSE;
1543 		}
1544 		if (!(state & LCK_MTX_ILOCKED_MSK)) {
1545 			*new_state = state;
1546 			return TRUE;
1547 		}
1548 	}
1549 }
1550 
1551 /*
1552  * Routine:	lck_mtx_lock_slow
1553  *
1554  * Locks a mutex for current thread.
1555  * If the lock is contended this function might
1556  * sleep.
1557  *
1558  * Called with interlock not held.
1559  */
1560 __attribute__((noinline))
1561 void
lck_mtx_lock_slow(lck_mtx_t * lock)1562 lck_mtx_lock_slow(
1563 	lck_mtx_t       *lock)
1564 {
1565 	bool            profile;
1566 	uint32_t        state;
1567 	int             first_miss = 0;
1568 
1569 	state = ordered_load_mtx_state(lock);
1570 	profile = (state & LCK_MTX_PROFILE_MSK);
1571 
1572 	if (__improbable(profile)) {
1573 		if (state & LCK_MTX_SPIN_MSK) {
1574 			/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1575 			assert(state & LCK_MTX_ILOCKED_MSK);
1576 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1577 		}
1578 	}
1579 
1580 	/* is the interlock or mutex held */
1581 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1582 		/*
1583 		 * Note: LCK_MTX_TAG_DESTROYED has
1584 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1585 		 */
1586 
1587 		/* is the mutex already held */
1588 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1589 			/* no, must have been the mutex */
1590 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1591 		}
1592 
1593 		/* check to see if it is marked destroyed */
1594 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1595 			lck_mtx_destroyed(lock);
1596 		}
1597 	}
1598 
1599 	/* no - can't be DESTROYED or locked */
1600 	while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1601 		if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1602 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1603 		}
1604 	}
1605 
1606 	/* lock and interlock acquired */
1607 
1608 	/* record owner of mutex */
1609 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1610 
1611 #if MACH_LDEBUG
1612 	if (thread) {
1613 		thread->mutex_count++;          /* lock statistic */
1614 	}
1615 #endif
1616 	/*
1617 	 * Check if there are waiters to
1618 	 * inherit their priority.
1619 	 */
1620 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1621 		return lck_mtx_lock_acquire_tail(lock, NULL);
1622 	}
1623 
1624 	/* release the interlock */
1625 	lck_mtx_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1626 }
1627 
1628 __attribute__((noinline))
1629 boolean_t
lck_mtx_try_lock_slow(lck_mtx_t * lock)1630 lck_mtx_try_lock_slow(
1631 	lck_mtx_t       *lock)
1632 {
1633 	bool            profile;
1634 	uint32_t        state;
1635 	int             first_miss = 0;
1636 
1637 	state = ordered_load_mtx_state(lock);
1638 	profile = (state & LCK_MTX_PROFILE_MSK);
1639 
1640 	/* is the interlock or mutex held */
1641 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1642 		/*
1643 		 * Note: LCK_MTX_TAG_DESTROYED has
1644 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1645 		 */
1646 
1647 		/* is the mutex already held */
1648 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1649 			return FALSE;
1650 		}
1651 
1652 		/* check to see if it is marked destroyed */
1653 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1654 			lck_mtx_try_destroyed(lock);
1655 		}
1656 
1657 		if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1658 			if (__improbable(profile)) {
1659 				LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1660 			}
1661 			return FALSE;
1662 		}
1663 	}
1664 
1665 	if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1666 		if (__improbable(profile)) {
1667 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1668 		}
1669 		return FALSE;
1670 	}
1671 
1672 	/* lock and interlock acquired */
1673 
1674 	/* record owner of mutex */
1675 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1676 
1677 #if MACH_LDEBUG
1678 	if (thread) {
1679 		thread->mutex_count++;          /* lock statistic */
1680 	}
1681 #endif
1682 	/*
1683 	 * Check if there are waiters to
1684 	 * inherit their priority.
1685 	 */
1686 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1687 		return lck_mtx_try_lock_acquire_tail(lock);
1688 	}
1689 
1690 	/* release the interlock */
1691 	lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1692 
1693 	return TRUE;
1694 }
1695 
1696 __attribute__((noinline))
1697 void
lck_mtx_lock_spin_slow(lck_mtx_t * lock)1698 lck_mtx_lock_spin_slow(
1699 	lck_mtx_t       *lock)
1700 {
1701 	bool            profile;
1702 	uint32_t        state;
1703 	int             first_miss = 0;
1704 
1705 	state = ordered_load_mtx_state(lock);
1706 	profile = (state & LCK_MTX_PROFILE_MSK);
1707 
1708 	if (__improbable(profile)) {
1709 		if (state & LCK_MTX_SPIN_MSK) {
1710 			/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1711 			assert(state & LCK_MTX_ILOCKED_MSK);
1712 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1713 		}
1714 	}
1715 
1716 	/* is interlock or mutex held */
1717 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1718 		/*
1719 		 * Note: LCK_MTX_TAG_DESTROYED has
1720 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1721 		 */
1722 
1723 		/* is the mutex already held */
1724 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1725 			/* no, must have been the mutex */
1726 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1727 		}
1728 
1729 		/* check to see if it is marked destroyed */
1730 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1731 			lck_mtx_destroyed(lock);
1732 		}
1733 	}
1734 
1735 	bool acquired = true;
1736 
1737 #if CONFIG_DTRACE
1738 	uint64_t spin_start;
1739 
1740 	if (__probable(lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1741 		goto past_spin;
1742 	}
1743 
1744 	spin_start = LCK_MTX_SPIN_SPIN_BEGIN();
1745 #endif /* CONFIG_DTRACE */
1746 
1747 	/* note - can't be DESTROYED or locked */
1748 	/* note - preemption is not disabled while spinning */
1749 	while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1750 		if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1751 			acquired = false;
1752 			break;
1753 		}
1754 	}
1755 
1756 	LCK_MTX_SPIN_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
1757 #if CONFIG_DTRACE
1758 past_spin:
1759 #endif /* CONFIG_DTRACE */
1760 
1761 	if (__improbable(!acquired)) {
1762 		return lck_mtx_lock_contended(lock, profile, &first_miss);
1763 	}
1764 
1765 	/* lock as spinlock and interlock acquired */
1766 
1767 	/* record owner of mutex */
1768 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1769 
1770 #if MACH_LDEBUG
1771 	if (thread) {
1772 		thread->mutex_count++;          /* lock statistic */
1773 	}
1774 #endif
1775 	LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp, true, profile);
1776 
1777 	/* return with the interlock held and preemption disabled */
1778 	return;
1779 }
1780 
1781 __attribute__((noinline))
1782 boolean_t
lck_mtx_try_lock_spin_slow(lck_mtx_t * lock)1783 lck_mtx_try_lock_spin_slow(
1784 	lck_mtx_t       *lock)
1785 {
1786 	bool            profile;
1787 	uint32_t        state;
1788 	int             first_miss = 0;
1789 
1790 	state = ordered_load_mtx_state(lock);
1791 	profile = (state & LCK_MTX_PROFILE_MSK);
1792 
1793 	/* is the interlock or mutex held */
1794 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1795 		/*
1796 		 * Note: LCK_MTX_TAG_DESTROYED has
1797 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1798 		 */
1799 
1800 		/* is the mutex already held */
1801 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1802 			return FALSE;
1803 		}
1804 
1805 		/* check to see if it is marked destroyed */
1806 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1807 			lck_mtx_try_destroyed(lock);
1808 		}
1809 	}
1810 
1811 	/* note - can't be DESTROYED or locked */
1812 	if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1813 		if (__improbable(profile)) {
1814 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1815 		}
1816 		return FALSE;
1817 	}
1818 
1819 	/* lock and interlock acquired */
1820 
1821 	/* record owner of mutex */
1822 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1823 
1824 #if MACH_LDEBUG
1825 	if (thread) {
1826 		thread->mutex_count++;          /* lock statistic */
1827 	}
1828 #endif
1829 
1830 #if     CONFIG_DTRACE
1831 	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_SPIN_ACQUIRE, lock, 0);
1832 #endif
1833 	return TRUE;
1834 }
1835 
1836 __attribute__((noinline))
1837 void
lck_mtx_convert_spin(lck_mtx_t * lock)1838 lck_mtx_convert_spin(
1839 	lck_mtx_t       *lock)
1840 {
1841 	uint32_t state;
1842 
1843 	state = ordered_load_mtx_state(lock);
1844 
1845 	assertf(lock->lck_mtx_owner == current_thread()->ctid,
1846 	    "lock %p not owned by thread ctid %d (current owner %d)",
1847 	    lock, current_thread()->ctid, lock->lck_mtx_owner);
1848 
1849 	if (__improbable(state & LCK_MTX_MLOCKED_MSK)) {
1850 		/* already owned as a mutex, just return */
1851 		return;
1852 	}
1853 
1854 	assert(get_preemption_level() > 0);
1855 	assert(state & LCK_MTX_ILOCKED_MSK);
1856 	assert(state & LCK_MTX_SPIN_MSK);
1857 
1858 	/*
1859 	 * Check if there are waiters to
1860 	 * inherit their priority.
1861 	 */
1862 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1863 		return lck_mtx_convert_spin_acquire_tail(lock);
1864 	}
1865 
1866 	lck_mtx_convert_spin_finish_inline(lock, ordered_load_mtx_state(lock));
1867 
1868 	return;
1869 }
1870 
1871 static inline boolean_t
lck_mtx_lock_grab_mutex(lck_mtx_t * lock)1872 lck_mtx_lock_grab_mutex(
1873 	lck_mtx_t       *lock)
1874 {
1875 	uint32_t state;
1876 
1877 	state = ordered_load_mtx_state(lock);
1878 
1879 	if (!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state)) {
1880 		return FALSE;
1881 	}
1882 
1883 	/* lock and interlock acquired */
1884 
1885 	/* record owner of mutex */
1886 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1887 
1888 #if MACH_LDEBUG
1889 	if (thread) {
1890 		thread->mutex_count++;          /* lock statistic */
1891 	}
1892 #endif
1893 	return TRUE;
1894 }
1895 
1896 __attribute__((noinline))
1897 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)1898 lck_mtx_assert(
1899 	lck_mtx_t       *lock,
1900 	unsigned int    type)
1901 {
1902 	thread_t thread;
1903 	uint32_t state;
1904 
1905 	thread = current_thread();
1906 	state = ordered_load_mtx_state(lock);
1907 
1908 	if (type == LCK_MTX_ASSERT_OWNED) {
1909 		if ((lock->lck_mtx_owner != thread->ctid) ||
1910 		    !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1911 			panic("mutex (%p) not owned", lock);
1912 		}
1913 	} else {
1914 		assert(type == LCK_MTX_ASSERT_NOTOWNED);
1915 		if (lock->lck_mtx_owner == thread->ctid) {
1916 			panic("mutex (%p) owned", lock);
1917 		}
1918 	}
1919 }
1920 
1921 /*
1922  * Routine:     lck_mtx_lock_spinwait_x86
1923  *
1924  * Invoked trying to acquire a mutex when there is contention but
1925  * the holder is running on another processor. We spin for up to a maximum
1926  * time waiting for the lock to be released.
1927  *
1928  * Called with the interlock unlocked.
1929  * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
1930  * returns LCK_MTX_SPINWAIT_SPUN if we spun
1931  * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
1932  */
1933 __attribute__((noinline))
1934 lck_mtx_spinwait_ret_type_t
lck_mtx_lock_spinwait_x86(lck_mtx_t * mutex)1935 lck_mtx_lock_spinwait_x86(
1936 	lck_mtx_t       *mutex)
1937 {
1938 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1939 	ctid_t          owner, prev_owner;
1940 	uint64_t        window_deadline, sliding_deadline, high_deadline;
1941 	uint64_t        spin_start, start_time, cur_time, avg_hold_time, bias, delta;
1942 	lck_mtx_spinwait_ret_type_t             retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
1943 	int             loopcount = 0;
1944 	int             total_hold_time_samples, window_hold_time_samples, unfairness;
1945 	uint            prev_owner_cpu;
1946 	bool            adjust;
1947 
1948 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1949 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
1950 	    mutex->lck_mtx_waiters, 0, 0);
1951 
1952 	spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
1953 	start_time = mach_absolute_time();
1954 
1955 	/*
1956 	 * window_deadline represents the "learning" phase.
1957 	 * The thread collects statistics about the lock during
1958 	 * window_deadline and then it makes a decision on whether to spin more
1959 	 * or block according to the concurrency behavior
1960 	 * observed.
1961 	 *
1962 	 * Every thread can spin at least low_MutexSpin.
1963 	 */
1964 	window_deadline = start_time + low_MutexSpin;
1965 	/*
1966 	 * Sliding_deadline is the adjusted spin deadline
1967 	 * computed after the "learning" phase.
1968 	 */
1969 	sliding_deadline = window_deadline;
1970 	/*
1971 	 * High_deadline is a hard deadline. No thread
1972 	 * can spin more than this deadline.
1973 	 */
1974 	if (high_MutexSpin >= 0) {
1975 		high_deadline = start_time + high_MutexSpin;
1976 	} else {
1977 		high_deadline = start_time + low_MutexSpin * real_ncpus;
1978 	}
1979 
1980 	/*
1981 	 * Do not know yet which is the owner cpu.
1982 	 * Initialize prev_owner_cpu with next cpu.
1983 	 */
1984 	prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
1985 	total_hold_time_samples = 0;
1986 	window_hold_time_samples = 0;
1987 	avg_hold_time = 0;
1988 	adjust = TRUE;
1989 	bias = (os_hash_kernel_pointer(mutex) + cpu_number()) % real_ncpus;
1990 
1991 	prev_owner = mutex->lck_mtx_owner;
1992 
1993 	/*
1994 	 * Spin while:
1995 	 *   - mutex is locked, and
1996 	 *   - it's locked as a spin lock, and
1997 	 *   - owner is running on another processor, and
1998 	 *   - we haven't spun for long enough.
1999 	 */
2000 	do {
2001 		/*
2002 		 * Try to acquire the lock.
2003 		 */
2004 		if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
2005 			retval = LCK_MTX_SPINWAIT_ACQUIRED;
2006 			break;
2007 		}
2008 
2009 		cur_time = mach_absolute_time();
2010 
2011 		/*
2012 		 * Never spin past high_deadline.
2013 		 */
2014 		if (cur_time >= high_deadline) {
2015 			retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
2016 			break;
2017 		}
2018 
2019 		/*
2020 		 * Check if owner is on core. If not block.
2021 		 */
2022 		owner = mutex->lck_mtx_owner;
2023 		if (owner) {
2024 			uint32_t i = prev_owner_cpu;
2025 			bool     owner_on_core = false;
2026 
2027 			disable_preemption();
2028 			owner = mutex->lck_mtx_owner;
2029 
2030 			/*
2031 			 * For scalability we want to check if the owner is on core
2032 			 * without locking the mutex interlock.
2033 			 * If we do not lock the mutex interlock, the owner that we see might be
2034 			 * invalid, so we cannot dereference it. Therefore we cannot check
2035 			 * any field of the thread to tell us if it is on core.
2036 			 * Check if the thread that is running on the other cpus matches the owner.
2037 			 */
2038 			if (owner) {
2039 				thread_t owner_thread = ctid_get_thread_unsafe(owner);
2040 
2041 				do {
2042 					if ((cpu_data_ptr[i] != NULL) && (cpu_data_ptr[i]->cpu_active_thread == owner_thread)) {
2043 						owner_on_core = true;
2044 						break;
2045 					}
2046 					if (++i >= real_ncpus) {
2047 						i = 0;
2048 					}
2049 				} while (i != prev_owner_cpu);
2050 			}
2051 
2052 			enable_preemption();
2053 
2054 			if (owner == 0) {
2055 				/* nothing to do, we didn't find an owner */
2056 			} else if (owner_on_core) {
2057 				prev_owner_cpu = i;
2058 			} else {
2059 				prev_owner = owner;
2060 				owner = mutex->lck_mtx_owner;
2061 				if (owner == prev_owner) {
2062 					/*
2063 					 * Owner is not on core.
2064 					 * Stop spinning.
2065 					 */
2066 					if (loopcount == 0) {
2067 						retval = LCK_MTX_SPINWAIT_NO_SPIN;
2068 					} else {
2069 						retval = LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE;
2070 					}
2071 					break;
2072 				}
2073 				/*
2074 				 * Fall through if the owner changed while we were scanning.
2075 				 * The new owner could potentially be on core, so loop
2076 				 * again.
2077 				 */
2078 			}
2079 		}
2080 
2081 		/*
2082 		 * Save how many times we see the owner changing.
2083 		 * We can roughly estimate the mutex hold
2084 		 * time and the fairness with that.
2085 		 */
2086 		if (owner != prev_owner) {
2087 			prev_owner = owner;
2088 			total_hold_time_samples++;
2089 			window_hold_time_samples++;
2090 		}
2091 
2092 		/*
2093 		 * Learning window expired.
2094 		 * Try to adjust the sliding_deadline.
2095 		 */
2096 		if (cur_time >= window_deadline) {
2097 			/*
2098 			 * If there was not contention during the window
2099 			 * stop spinning.
2100 			 */
2101 			if (window_hold_time_samples < 1) {
2102 				retval = LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION;
2103 				break;
2104 			}
2105 
2106 			if (adjust) {
2107 				/*
2108 				 * For a fair lock, we'd wait for at most (NCPU-1) periods,
2109 				 * but the lock is unfair, so let's try to estimate by how much.
2110 				 */
2111 				unfairness = total_hold_time_samples / real_ncpus;
2112 
2113 				if (unfairness == 0) {
2114 					/*
2115 					 * We observed the owner changing `total_hold_time_samples` times which
2116 					 * let us estimate the average hold time of this mutex for the duration
2117 					 * of the spin time.
2118 					 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
2119 					 *
2120 					 * In this case spin at max avg_hold_time * (real_ncpus - 1)
2121 					 */
2122 					delta = cur_time - start_time;
2123 					sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
2124 				} else {
2125 					/*
2126 					 * In this case at least one of the other cpus was able to get the lock twice
2127 					 * while I was spinning.
2128 					 * We could spin longer but it won't necessarily help if the system is unfair.
2129 					 * Try to randomize the wait to reduce contention.
2130 					 *
2131 					 * We compute how much time we could potentially spin
2132 					 * and distribute it over the cpus.
2133 					 *
2134 					 * bias is an integer between 0 and real_ncpus.
2135 					 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
2136 					 */
2137 					delta = high_deadline - cur_time;
2138 					sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
2139 					adjust = FALSE;
2140 				}
2141 			}
2142 
2143 			window_deadline += low_MutexSpin;
2144 			window_hold_time_samples = 0;
2145 		}
2146 
2147 		/*
2148 		 * Stop spinning if we past
2149 		 * the adjusted deadline.
2150 		 */
2151 		if (cur_time >= sliding_deadline) {
2152 			retval = LCK_MTX_SPINWAIT_SPUN_SLIDING_THR;
2153 			break;
2154 		}
2155 
2156 		if (mutex->lck_mtx_owner != 0) {
2157 			cpu_pause();
2158 		}
2159 
2160 		loopcount++;
2161 	} while (TRUE);
2162 
2163 	LCK_MTX_ADAPTIVE_SPIN_END(mutex, mutex->lck_mtx_grp, spin_start);
2164 
2165 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2166 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
2167 	    mutex->lck_mtx_waiters, retval, 0);
2168 
2169 	return retval;
2170 }
2171 
2172 
2173 
2174 /*
2175  * Routine:     lck_mtx_lock_wait_x86
2176  *
2177  * Invoked in order to wait on contention.
2178  *
2179  * Called with the interlock locked and
2180  * preemption disabled...
2181  * returns it unlocked and with preemption enabled
2182  *
2183  * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
2184  *      A runnable waiter can exist between wait and acquire
2185  *      without a waiters count being set.
2186  *      This allows us to never make a spurious wakeup call.
2187  *
2188  * Priority:
2189  *      This avoids taking the thread lock if the owning thread is the same priority.
2190  *      This optimizes the case of same-priority threads contending on a lock.
2191  *      However, that allows the owning thread to drop in priority while holding the lock,
2192  *      because there is no state that the priority change can notice that
2193  *      says that the targeted thread holds a contended mutex.
2194  *
2195  *      One possible solution: priority changes could look for some atomic tag
2196  *      on the thread saying 'holding contended lock', and then set up a promotion.
2197  *      Needs a story for dropping that promotion - the last contended unlock
2198  *      has to notice that this has happened.
2199  */
2200 __attribute__((noinline))
2201 void
lck_mtx_lock_wait_x86(lck_mtx_t * mutex,struct turnstile ** ts)2202 lck_mtx_lock_wait_x86(
2203 	lck_mtx_t       *mutex,
2204 	struct turnstile **ts)
2205 {
2206 	thread_t self = current_thread();
2207 	uint64_t sleep_start;
2208 
2209 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2210 
2211 	thread_t holder = ctid_get_thread(mutex->lck_mtx_owner);
2212 
2213 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2214 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder),
2215 	    mutex->lck_mtx_waiters, 0, 0);
2216 	sleep_start = LCK_MTX_BLOCK_BEGIN();
2217 
2218 	mutex->lck_mtx_waiters++;
2219 
2220 	/*
2221 	 * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
2222 	 * the same turnstile while looping, the matching turnstile compleate will be called
2223 	 * by lck_mtx_lock_contended when finally acquiring the lock.
2224 	 */
2225 	if (*ts == NULL) {
2226 		*ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
2227 	}
2228 
2229 	struct turnstile *turnstile = *ts;
2230 	thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
2231 	turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2232 
2233 	waitq_assert_wait64(&turnstile->ts_waitq, LCK_MTX_EVENT(mutex), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
2234 
2235 	lck_mtx_ilk_unlock(mutex);
2236 
2237 	turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
2238 
2239 	thread_block(THREAD_CONTINUE_NULL);
2240 
2241 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2242 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
2243 	    mutex->lck_mtx_waiters, 0, 0);
2244 
2245 	LCK_MTX_BLOCK_END(mutex, mutex->lck_mtx_grp, sleep_start);
2246 }
2247 
2248 /*
2249  *      Routine: kdp_lck_mtx_lock_spin_is_acquired
2250  *      NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2251  *      Returns: TRUE if lock is acquired.
2252  */
2253 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)2254 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t     *lck)
2255 {
2256 	if (not_in_kdp) {
2257 		panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2258 	}
2259 
2260 	if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
2261 		return TRUE;
2262 	}
2263 
2264 	return FALSE;
2265 }
2266 
2267 void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2268 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2269 {
2270 	lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2271 	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2272 	waitinfo->owner   = thread_tid(ctid_get_thread(mutex->lck_mtx_owner));
2273 }
2274 
2275 #endif /* LCK_MTX_USE_ARCH */
2276 #if CONFIG_PV_TICKET
2277 __startup_func
2278 void
lck_init_pv(void)2279 lck_init_pv(void)
2280 {
2281 	uint32_t pvtck = 1;
2282 	PE_parse_boot_argn("pvticket", &pvtck, sizeof(pvtck));
2283 	if (pvtck == 0) {
2284 		return;
2285 	}
2286 	has_lock_pv = cpuid_vmm_present() &&
2287 	    (cpuid_vmm_get_kvm_features() & CPUID_KVM_FEATURE_PV_UNHALT) != 0;
2288 }
2289 STARTUP(LOCKS, STARTUP_RANK_FIRST, lck_init_pv);
2290 #endif
2291