xref: /xnu-8792.41.9/osfmk/i386/locks_i386.c (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  *	File:	kern/lock.c
58  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
59  *	Date:	1985
60  *
61  *	Locking primitives implementation
62  */
63 
64 #define LOCK_PRIVATE 1
65 
66 #include <mach_ldebug.h>
67 
68 #include <kern/lock_stat.h>
69 #include <kern/locks.h>
70 #include <kern/zalloc.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/cpu_data.h>
75 #include <kern/cpu_number.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <string.h>
79 
80 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
81 #include <i386/tsc.h>
82 #include <machine/atomic.h>
83 #include <machine/machine_cpu.h>
84 #include <i386/mp.h>
85 #include <machine/atomic.h>
86 #include <sys/kdebug.h>
87 #if LCK_MTX_USE_ARCH
88 #include <i386/locks_i386_inlines.h>
89 #endif /* LCK_MTX_USE_ARCH */
90 #include <kern/cpu_number.h>
91 #include <os/hash.h>
92 #include <i386/cpuid.h>
93 
94 #define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
95 
96 uint64_t _Atomic lock_panic_timeout = 0xf000000;  /* 251e6 TSC ticks */
97 
98 /* Forwards */
99 
100 #if     USLOCK_DEBUG
101 /*
102  *	Perform simple lock checks.
103  */
104 int     uslock_check = 1;
105 int     max_lock_loops  = 100000000;
106 decl_simple_lock_data(extern, printf_lock);
107 decl_simple_lock_data(extern, panic_lock);
108 #endif  /* USLOCK_DEBUG */
109 
110 extern unsigned int not_in_kdp;
111 
112 #if !LCK_GRP_USE_ARG
113 #define usimple_lock_nopreempt(lck, grp) \
114 	usimple_lock_nopreempt(lck)
115 #define usimple_lock_try_nopreempt(lck, grp) \
116 	usimple_lock_try_nopreempt(lck)
117 #endif
118 static void usimple_lock_nopreempt(usimple_lock_t, lck_grp_t *);
119 static unsigned int usimple_lock_try_nopreempt(usimple_lock_t, lck_grp_t *);
120 
121 /*
122  *	We often want to know the addresses of the callers
123  *	of the various lock routines.  However, this information
124  *	is only used for debugging and statistics.
125  */
126 typedef void    *pc_t;
127 #define INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
128 #define INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
129 #if     ANY_LOCK_DEBUG
130 #define OBTAIN_PC(pc)   ((pc) = GET_RETURN_PC())
131 #define DECL_PC(pc)     pc_t pc;
132 #else   /* ANY_LOCK_DEBUG */
133 #define DECL_PC(pc)
134 #ifdef  lint
135 /*
136  *	Eliminate lint complaints about unused local pc variables.
137  */
138 #define OBTAIN_PC(pc)   ++pc
139 #else   /* lint */
140 #define OBTAIN_PC(pc)
141 #endif  /* lint */
142 #endif  /* USLOCK_DEBUG */
143 
144 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
145 
146 #if LCK_MTX_USE_ARCH
147 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
148 #endif /* LCK_MTX_USE_ARCH */
149 
150 /*
151  * atomic exchange API is a low level abstraction of the operations
152  * to atomically read, modify, and write a pointer.  This abstraction works
153  * for both Intel and ARMv8.1 compare and exchange atomic instructions as
154  * well as the ARM exclusive instructions.
155  *
156  * atomic_exchange_begin() - begin exchange and retrieve current value
157  * atomic_exchange_complete() - conclude an exchange
158  * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
159  */
160 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)161 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
162 {
163 	uint32_t        val;
164 
165 	(void)ord;                      // Memory order not used
166 	val = os_atomic_load(target, relaxed);
167 	*previous = val;
168 	return val;
169 }
170 
171 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)172 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
173 {
174 	return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
175 }
176 
177 void
atomic_exchange_abort(void)178 atomic_exchange_abort(void)
179 {
180 }
181 
182 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)183 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
184 {
185 	uint32_t        value, prev;
186 
187 	for (;;) {
188 		value = atomic_exchange_begin32(target, &prev, ord);
189 		if (value & test_mask) {
190 			if (wait) {
191 				cpu_pause();
192 			} else {
193 				atomic_exchange_abort();
194 			}
195 			return FALSE;
196 		}
197 		value |= set_mask;
198 		if (atomic_exchange_complete32(target, prev, value, ord)) {
199 			return TRUE;
200 		}
201 	}
202 }
203 
204 /*
205  *	Portable lock package implementation of usimple_locks.
206  */
207 
208 #if     USLOCK_DEBUG
209 #define USLDBG(stmt)    stmt
210 void            usld_lock_init(usimple_lock_t, unsigned short);
211 void            usld_lock_pre(usimple_lock_t, pc_t);
212 void            usld_lock_post(usimple_lock_t, pc_t);
213 void            usld_unlock(usimple_lock_t, pc_t);
214 void            usld_lock_try_pre(usimple_lock_t, pc_t);
215 void            usld_lock_try_post(usimple_lock_t, pc_t);
216 int             usld_lock_common_checks(usimple_lock_t, char *);
217 #else   /* USLOCK_DEBUG */
218 #define USLDBG(stmt)
219 #endif  /* USLOCK_DEBUG */
220 
221 #if LCK_MTX_USE_ARCH
222 /*
223  * Forward definitions
224  */
225 
226 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state);
227 static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
228 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
229 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
230 static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
231 static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
232 #endif /* LCK_MTX_USE_ARCH */
233 
234 
235 /*
236  *      Routine:        lck_spin_alloc_init
237  */
238 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)239 lck_spin_alloc_init(
240 	lck_grp_t       *grp,
241 	lck_attr_t      *attr)
242 {
243 	lck_spin_t *lck;
244 
245 	lck = zalloc(KT_LCK_SPIN);
246 	lck_spin_init(lck, grp, attr);
247 	return lck;
248 }
249 
250 /*
251  *      Routine:        lck_spin_free
252  */
253 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)254 lck_spin_free(
255 	lck_spin_t      *lck,
256 	lck_grp_t       *grp)
257 {
258 	lck_spin_destroy(lck, grp);
259 	zfree(KT_LCK_SPIN, lck);
260 }
261 
262 /*
263  *      Routine:        lck_spin_init
264  */
265 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)266 lck_spin_init(
267 	lck_spin_t      *lck,
268 	lck_grp_t       *grp,
269 	__unused lck_attr_t     *attr)
270 {
271 	usimple_lock_init((usimple_lock_t) lck, 0);
272 	if (grp) {
273 		lck_grp_reference(grp, &grp->lck_grp_spincnt);
274 	}
275 }
276 
277 /*
278  *      Routine:        lck_spin_destroy
279  */
280 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)281 lck_spin_destroy(
282 	lck_spin_t      *lck,
283 	lck_grp_t       *grp)
284 {
285 	if (lck->interlock == LCK_SPIN_TAG_DESTROYED) {
286 		return;
287 	}
288 	lck->interlock = LCK_SPIN_TAG_DESTROYED;
289 	if (grp) {
290 		lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
291 	}
292 	return;
293 }
294 
295 /*
296  *      Routine:        lck_spin_lock
297  */
298 void
lck_spin_lock_grp(lck_spin_t * lck,lck_grp_t * grp)299 lck_spin_lock_grp(
300 	lck_spin_t      *lck,
301 	lck_grp_t       *grp)
302 {
303 #pragma unused(grp)
304 	usimple_lock((usimple_lock_t) lck, grp);
305 }
306 
307 void
lck_spin_lock(lck_spin_t * lck)308 lck_spin_lock(
309 	lck_spin_t      *lck)
310 {
311 	usimple_lock((usimple_lock_t) lck, NULL);
312 }
313 
314 void
lck_spin_lock_nopreempt(lck_spin_t * lck)315 lck_spin_lock_nopreempt(
316 	lck_spin_t      *lck)
317 {
318 	usimple_lock_nopreempt((usimple_lock_t) lck, NULL);
319 }
320 
321 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)322 lck_spin_lock_nopreempt_grp(
323 	lck_spin_t      *lck,
324 	lck_grp_t       *grp)
325 {
326 #pragma unused(grp)
327 	usimple_lock_nopreempt((usimple_lock_t) lck, grp);
328 }
329 
330 /*
331  *      Routine:        lck_spin_unlock
332  */
333 void
lck_spin_unlock(lck_spin_t * lck)334 lck_spin_unlock(
335 	lck_spin_t      *lck)
336 {
337 	usimple_unlock((usimple_lock_t) lck);
338 }
339 
340 void
lck_spin_unlock_nopreempt(lck_spin_t * lck)341 lck_spin_unlock_nopreempt(
342 	lck_spin_t      *lck)
343 {
344 	usimple_unlock_nopreempt((usimple_lock_t) lck);
345 }
346 
347 boolean_t
lck_spin_try_lock_grp(lck_spin_t * lck,lck_grp_t * grp)348 lck_spin_try_lock_grp(
349 	lck_spin_t      *lck,
350 	lck_grp_t       *grp)
351 {
352 #pragma unused(grp)
353 	boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
354 #if     DEVELOPMENT || DEBUG
355 	if (lrval) {
356 		pltrace(FALSE);
357 	}
358 #endif
359 	return lrval;
360 }
361 
362 
363 /*
364  *      Routine:        lck_spin_try_lock
365  */
366 boolean_t
lck_spin_try_lock(lck_spin_t * lck)367 lck_spin_try_lock(
368 	lck_spin_t      *lck)
369 {
370 	boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
371 #if     DEVELOPMENT || DEBUG
372 	if (lrval) {
373 		pltrace(FALSE);
374 	}
375 #endif
376 	return lrval;
377 }
378 
379 int
lck_spin_try_lock_nopreempt(lck_spin_t * lck)380 lck_spin_try_lock_nopreempt(
381 	lck_spin_t      *lck)
382 {
383 	boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, LCK_GRP_NULL);
384 #if     DEVELOPMENT || DEBUG
385 	if (lrval) {
386 		pltrace(FALSE);
387 	}
388 #endif
389 	return lrval;
390 }
391 
392 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)393 lck_spin_try_lock_nopreempt_grp(
394 	lck_spin_t      *lck,
395 	lck_grp_t       *grp)
396 {
397 #pragma unused(grp)
398 	boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, grp);
399 #if     DEVELOPMENT || DEBUG
400 	if (lrval) {
401 		pltrace(FALSE);
402 	}
403 #endif
404 	return lrval;
405 }
406 
407 /*
408  *	Routine:	lck_spin_assert
409  */
410 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)411 lck_spin_assert(lck_spin_t *lock, unsigned int type)
412 {
413 	thread_t thread, holder;
414 	uintptr_t state;
415 
416 	if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
417 		panic("lck_spin_assert(): invalid arg (%u)", type);
418 	}
419 
420 	state = lock->interlock;
421 	holder = (thread_t)state;
422 	thread = current_thread();
423 	if (type == LCK_ASSERT_OWNED) {
424 		if (__improbable(holder == THREAD_NULL)) {
425 			panic("Lock not owned %p = %lx", lock, state);
426 		}
427 		if (__improbable(holder != thread)) {
428 			panic("Lock not owned by current thread %p = %lx", lock, state);
429 		}
430 	} else if (type == LCK_ASSERT_NOTOWNED) {
431 		if (__improbable(holder != THREAD_NULL)) {
432 			if (holder == thread) {
433 				panic("Lock owned by current thread %p = %lx", lock, state);
434 			}
435 		}
436 	}
437 }
438 
439 /*
440  *      Routine: kdp_lck_spin_is_acquired
441  *      NOT SAFE: To be used only by kernel debugger to avoid deadlock.
442  *      Returns: TRUE if lock is acquired.
443  */
444 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)445 kdp_lck_spin_is_acquired(lck_spin_t *lck)
446 {
447 	if (not_in_kdp) {
448 		panic("panic: spinlock acquired check done outside of kernel debugger");
449 	}
450 	return (lck->interlock != 0)? TRUE : FALSE;
451 }
452 
453 /*
454  *	Initialize a usimple_lock.
455  *
456  *	No change in preemption state.
457  */
458 void
usimple_lock_init(usimple_lock_t l,__unused unsigned short tag)459 usimple_lock_init(
460 	usimple_lock_t  l,
461 	__unused unsigned short tag)
462 {
463 	USLDBG(usld_lock_init(l, tag));
464 	hw_lock_init(&l->interlock);
465 }
466 
467 static hw_spin_timeout_status_t
usimple_lock_acquire_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)468 usimple_lock_acquire_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
469 {
470 	usimple_lock_t l = _lock;
471 	uintptr_t lowner;
472 	lck_spinlock_to_info_t lsti;
473 
474 	if (machine_timeout_suspended()) {
475 		return HW_LOCK_TIMEOUT_CONTINUE;
476 	}
477 
478 	lowner = (uintptr_t)l->interlock.lock_data;
479 	lsti = lck_spinlock_timeout_hit(l, lowner);
480 
481 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT " ;"
482 	    "lock owner thread=0x%lx, current_thread: %p, "
483 	    "lock owner active on CPU %d, "
484 	    HW_SPIN_TIMEOUT_DETAILS_FMT,
485 	    l, HW_SPIN_TIMEOUT_ARG(to, st),
486 	    lowner, current_thread(), lsti->owner_cpu,
487 	    HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
488 }
489 
490 static const struct hw_spin_policy usimple_lock_spin_policy = {
491 	.hwsp_name              = "usimple_lock_t",
492 	.hwsp_timeout           = &LockTimeOutTSC,
493 	.hwsp_op_timeout        = usimple_lock_acquire_timeout_panic,
494 };
495 
496 /*
497  *	Acquire a usimple_lock.
498  *
499  *	Returns with preemption disabled.  Note
500  *	that the hw_lock routines are responsible for
501  *	maintaining preemption state.
502  */
503 void
504 (usimple_lock)(
505 	usimple_lock_t  l
506 	LCK_GRP_ARG(lck_grp_t *grp))
507 {
508 	DECL_PC(pc);
509 
510 	OBTAIN_PC(pc);
511 	USLDBG(usld_lock_pre(l, pc));
512 
513 	(void)hw_lock_to(&l->interlock, &usimple_lock_spin_policy, grp);
514 #if DEVELOPMENT || DEBUG
515 	pltrace(FALSE);
516 #endif
517 
518 	USLDBG(usld_lock_post(l, pc));
519 #if CONFIG_DTRACE
520 	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
521 #endif
522 }
523 
524 /*
525  *	Acquire a usimple_lock_nopreempt
526  *
527  *	Called and returns with preemption disabled.  Note
528  *	that the hw_lock routines are responsible for
529  *	maintaining preemption state.
530  */
531 static void
usimple_lock_nopreempt(usimple_lock_t l,lck_grp_t * grp)532 usimple_lock_nopreempt(
533 	usimple_lock_t  l,
534 	lck_grp_t *grp)
535 {
536 	DECL_PC(pc);
537 
538 	OBTAIN_PC(pc);
539 	USLDBG(usld_lock_pre(l, pc));
540 
541 	(void)hw_lock_to_nopreempt(&l->interlock, &usimple_lock_spin_policy, grp);
542 
543 #if DEVELOPMENT || DEBUG
544 	pltrace(FALSE);
545 #endif
546 
547 	USLDBG(usld_lock_post(l, pc));
548 #if CONFIG_DTRACE
549 	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
550 #endif
551 }
552 
553 
554 /*
555  *	Release a usimple_lock.
556  *
557  *	Returns with preemption enabled.  Note
558  *	that the hw_lock routines are responsible for
559  *	maintaining preemption state.
560  */
561 void
usimple_unlock(usimple_lock_t l)562 usimple_unlock(
563 	usimple_lock_t  l)
564 {
565 	DECL_PC(pc);
566 
567 	OBTAIN_PC(pc);
568 	USLDBG(usld_unlock(l, pc));
569 #if DEVELOPMENT || DEBUG
570 	pltrace(TRUE);
571 #endif
572 	hw_lock_unlock(&l->interlock);
573 }
574 
575 /*
576  *	Release a usimple_unlock_nopreempt.
577  *
578  *	Called and returns with preemption enabled.  Note
579  *	that the hw_lock routines are responsible for
580  *	maintaining preemption state.
581  */
582 void
usimple_unlock_nopreempt(usimple_lock_t l)583 usimple_unlock_nopreempt(
584 	usimple_lock_t  l)
585 {
586 	DECL_PC(pc);
587 
588 	OBTAIN_PC(pc);
589 	USLDBG(usld_unlock(l, pc));
590 #if DEVELOPMENT || DEBUG
591 	pltrace(TRUE);
592 #endif
593 	hw_lock_unlock_nopreempt(&l->interlock);
594 }
595 
596 /*
597  *	Conditionally acquire a usimple_lock.
598  *
599  *	On success, returns with preemption disabled.
600  *	On failure, returns with preemption in the same state
601  *	as when first invoked.  Note that the hw_lock routines
602  *	are responsible for maintaining preemption state.
603  *
604  *	XXX No stats are gathered on a miss; I preserved this
605  *	behavior from the original assembly-language code, but
606  *	doesn't it make sense to log misses?  XXX
607  */
608 unsigned int
usimple_lock_try(usimple_lock_t l,lck_grp_t * grp)609 usimple_lock_try(
610 	usimple_lock_t  l,
611 	lck_grp_t *grp)
612 {
613 	unsigned int    success;
614 	DECL_PC(pc);
615 
616 	OBTAIN_PC(pc);
617 	USLDBG(usld_lock_try_pre(l, pc));
618 	if ((success = hw_lock_try(&l->interlock, grp))) {
619 #if DEVELOPMENT || DEBUG
620 		pltrace(FALSE);
621 #endif
622 		USLDBG(usld_lock_try_post(l, pc));
623 	}
624 	return success;
625 }
626 
627 /*
628  *	Conditionally acquire a usimple_lock.
629  *
630  *	Called and returns with preemption disabled.  Note
631  *	that the hw_lock routines are responsible for
632  *	maintaining preemption state.
633  *
634  *	XXX No stats are gathered on a miss; I preserved this
635  *	behavior from the original assembly-language code, but
636  *	doesn't it make sense to log misses?  XXX
637  */
638 static unsigned int
usimple_lock_try_nopreempt(usimple_lock_t l,lck_grp_t * grp)639 usimple_lock_try_nopreempt(
640 	usimple_lock_t  l,
641 	lck_grp_t *grp)
642 {
643 	unsigned int    success;
644 	DECL_PC(pc);
645 
646 	OBTAIN_PC(pc);
647 	USLDBG(usld_lock_try_pre(l, pc));
648 	if ((success = hw_lock_try_nopreempt(&l->interlock, grp))) {
649 #if DEVELOPMENT || DEBUG
650 		pltrace(FALSE);
651 #endif
652 		USLDBG(usld_lock_try_post(l, pc));
653 	}
654 	return success;
655 }
656 
657 /*
658  * Acquire a usimple_lock while polling for pending cpu signals
659  * and spinning on a lock.
660  *
661  */
662 unsigned
663 int
664 (usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
665     uint64_t deadline
666     LCK_GRP_ARG(lck_grp_t *grp))
667 {
668 	boolean_t istate = ml_get_interrupts_enabled();
669 
670 	if (deadline < mach_absolute_time()) {
671 		return 0;
672 	}
673 
674 	while (!simple_lock_try(l, grp)) {
675 		if (!istate) {
676 			cpu_signal_handler(NULL);
677 		}
678 
679 		if (deadline < mach_absolute_time()) {
680 			return 0;
681 		}
682 
683 		cpu_pause();
684 	}
685 
686 	return 1;
687 }
688 
689 void
690 (usimple_lock_try_lock_loop)(usimple_lock_t l
691     LCK_GRP_ARG(lck_grp_t *grp))
692 {
693 	/* When the lock is not contended, grab the lock and go. */
694 	if (!simple_lock_try(l, grp)) {
695 		usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
696 	}
697 }
698 
699 unsigned
700 int
701 (usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
702     uint64_t duration
703     LCK_GRP_ARG(lck_grp_t *grp))
704 {
705 	uint64_t deadline;
706 	uint64_t base_at;
707 	uint64_t duration_at;
708 
709 	/* Fast track for uncontended locks */
710 	if (simple_lock_try(l, grp)) {
711 		return 1;
712 	}
713 
714 	base_at = mach_absolute_time();
715 
716 	nanoseconds_to_absolutetime(duration, &duration_at);
717 	deadline = base_at + duration_at;
718 	if (deadline < base_at) {
719 		/* deadline has overflowed, make it saturate */
720 		deadline = ULLONG_MAX;
721 	}
722 
723 	return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
724 }
725 
726 #if     USLOCK_DEBUG
727 /*
728  *	States of a usimple_lock.  The default when initializing
729  *	a usimple_lock is setting it up for debug checking.
730  */
731 #define USLOCK_CHECKED          0x0001          /* lock is being checked */
732 #define USLOCK_TAKEN            0x0002          /* lock has been taken */
733 #define USLOCK_INIT             0xBAA0          /* lock has been initialized */
734 #define USLOCK_INITIALIZED      (USLOCK_INIT|USLOCK_CHECKED)
735 #define USLOCK_CHECKING(l)      (uslock_check &&                        \
736 	                         ((l)->debug.state & USLOCK_CHECKED))
737 
738 /*
739  *	Initialize the debugging information contained
740  *	in a usimple_lock.
741  */
742 void
usld_lock_init(usimple_lock_t l,__unused unsigned short tag)743 usld_lock_init(
744 	usimple_lock_t  l,
745 	__unused unsigned short tag)
746 {
747 	if (l == USIMPLE_LOCK_NULL) {
748 		panic("lock initialization:  null lock pointer");
749 	}
750 	l->lock_type = USLOCK_TAG;
751 	l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
752 	l->debug.lock_cpu = l->debug.unlock_cpu = 0;
753 	l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
754 	l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
755 	l->debug.duration[0] = l->debug.duration[1] = 0;
756 	l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
757 	l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
758 	l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
759 }
760 
761 
762 /*
763  *	These checks apply to all usimple_locks, not just
764  *	those with USLOCK_CHECKED turned on.
765  */
766 int
usld_lock_common_checks(usimple_lock_t l,char * caller)767 usld_lock_common_checks(
768 	usimple_lock_t  l,
769 	char            *caller)
770 {
771 	if (l == USIMPLE_LOCK_NULL) {
772 		panic("%s:  null lock pointer", caller);
773 	}
774 	if (l->lock_type != USLOCK_TAG) {
775 		panic("%s:  %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
776 	}
777 	if (!(l->debug.state & USLOCK_INIT)) {
778 		panic("%s:  %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
779 	}
780 	return USLOCK_CHECKING(l);
781 }
782 
783 
784 /*
785  *	Debug checks on a usimple_lock just before attempting
786  *	to acquire it.
787  */
788 /* ARGSUSED */
789 void
usld_lock_pre(usimple_lock_t l,pc_t pc)790 usld_lock_pre(
791 	usimple_lock_t  l,
792 	pc_t            pc)
793 {
794 	char    caller[] = "usimple_lock";
795 
796 
797 	if (!usld_lock_common_checks(l, caller)) {
798 		return;
799 	}
800 
801 /*
802  *	Note that we have a weird case where we are getting a lock when we are]
803  *	in the process of putting the system to sleep. We are running with no
804  *	current threads, therefore we can't tell if we are trying to retake a lock
805  *	we have or someone on the other processor has it.  Therefore we just
806  *	ignore this test if the locking thread is 0.
807  */
808 
809 	if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
810 	    l->debug.lock_thread == (void *) current_thread()) {
811 		printf("%s:  lock %p already locked (at %p) by",
812 		    caller, l, l->debug.lock_pc);
813 		printf(" current thread %p (new attempt at pc %p)\n",
814 		    l->debug.lock_thread, pc);
815 		panic("%s", caller);
816 	}
817 	mp_disable_preemption();
818 	mp_enable_preemption();
819 }
820 
821 
822 /*
823  *	Debug checks on a usimple_lock just after acquiring it.
824  *
825  *	Pre-emption has been disabled at this point,
826  *	so we are safe in using cpu_number.
827  */
828 void
usld_lock_post(usimple_lock_t l,pc_t pc)829 usld_lock_post(
830 	usimple_lock_t  l,
831 	pc_t            pc)
832 {
833 	unsigned int mycpu;
834 	char    caller[] = "successful usimple_lock";
835 
836 
837 	if (!usld_lock_common_checks(l, caller)) {
838 		return;
839 	}
840 
841 	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
842 		panic("%s:  lock %p became uninitialized",
843 		    caller, l);
844 	}
845 	if ((l->debug.state & USLOCK_TAKEN)) {
846 		panic("%s:  lock 0x%p became TAKEN by someone else",
847 		    caller, l);
848 	}
849 
850 	mycpu = (unsigned int)cpu_number();
851 	assert(mycpu <= UCHAR_MAX);
852 
853 	l->debug.lock_thread = (void *)current_thread();
854 	l->debug.state |= USLOCK_TAKEN;
855 	l->debug.lock_pc = pc;
856 	l->debug.lock_cpu = (unsigned char)mycpu;
857 }
858 
859 
860 /*
861  *	Debug checks on a usimple_lock just before
862  *	releasing it.  Note that the caller has not
863  *	yet released the hardware lock.
864  *
865  *	Preemption is still disabled, so there's
866  *	no problem using cpu_number.
867  */
868 void
usld_unlock(usimple_lock_t l,pc_t pc)869 usld_unlock(
870 	usimple_lock_t  l,
871 	pc_t            pc)
872 {
873 	unsigned int mycpu;
874 	char    caller[] = "usimple_unlock";
875 
876 
877 	if (!usld_lock_common_checks(l, caller)) {
878 		return;
879 	}
880 
881 	mycpu = cpu_number();
882 	assert(mycpu <= UCHAR_MAX);
883 
884 	if (!(l->debug.state & USLOCK_TAKEN)) {
885 		panic("%s:  lock 0x%p hasn't been taken",
886 		    caller, l);
887 	}
888 	if (l->debug.lock_thread != (void *) current_thread()) {
889 		panic("%s:  unlocking lock 0x%p, owned by thread %p",
890 		    caller, l, l->debug.lock_thread);
891 	}
892 	if (l->debug.lock_cpu != mycpu) {
893 		printf("%s:  unlocking lock 0x%p on cpu 0x%x",
894 		    caller, l, mycpu);
895 		printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
896 		panic("%s", caller);
897 	}
898 
899 	l->debug.unlock_thread = l->debug.lock_thread;
900 	l->debug.lock_thread = INVALID_PC;
901 	l->debug.state &= ~USLOCK_TAKEN;
902 	l->debug.unlock_pc = pc;
903 	l->debug.unlock_cpu = (unsigned char)mycpu;
904 }
905 
906 
907 /*
908  *	Debug checks on a usimple_lock just before
909  *	attempting to acquire it.
910  *
911  *	Preemption isn't guaranteed to be disabled.
912  */
913 void
usld_lock_try_pre(usimple_lock_t l,__unused pc_t pc)914 usld_lock_try_pre(
915 	usimple_lock_t  l,
916 	__unused pc_t   pc)
917 {
918 	char    caller[] = "usimple_lock_try";
919 
920 	if (!usld_lock_common_checks(l, caller)) {
921 		return;
922 	}
923 }
924 
925 
926 /*
927  *	Debug checks on a usimple_lock just after
928  *	successfully attempting to acquire it.
929  *
930  *	Preemption has been disabled by the
931  *	lock acquisition attempt, so it's safe
932  *	to use cpu_number.
933  */
934 void
usld_lock_try_post(usimple_lock_t l,pc_t pc)935 usld_lock_try_post(
936 	usimple_lock_t  l,
937 	pc_t            pc)
938 {
939 	unsigned int mycpu;
940 	char    caller[] = "successful usimple_lock_try";
941 
942 	if (!usld_lock_common_checks(l, caller)) {
943 		return;
944 	}
945 
946 	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
947 		panic("%s:  lock 0x%p became uninitialized",
948 		    caller, l);
949 	}
950 	if ((l->debug.state & USLOCK_TAKEN)) {
951 		panic("%s:  lock 0x%p became TAKEN by someone else",
952 		    caller, l);
953 	}
954 
955 	mycpu = cpu_number();
956 	assert(mycpu <= UCHAR_MAX);
957 
958 	l->debug.lock_thread = (void *) current_thread();
959 	l->debug.state |= USLOCK_TAKEN;
960 	l->debug.lock_pc = pc;
961 	l->debug.lock_cpu = (unsigned char)mycpu;
962 }
963 #endif  /* USLOCK_DEBUG */
964 #if LCK_MTX_USE_ARCH
965 
966 /*
967  * Slow path routines for lck_mtx locking and unlocking functions.
968  *
969  * These functions were previously implemented in x86 assembly,
970  * and some optimizations are in place in this c code to obtain a compiled code
971  * as performant and compact as the assembly version.
972  *
973  * To avoid to inline these functions on the fast path, all functions directly called by
974  * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
975  * in such a way the fast path can tail call into them. In this way the return address
976  * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
977  *
978  * Slow path code is structured in such a way there are no calls to functions that will return
979  * on the context of the caller function, i.e. all functions called are or tail call functions
980  * or inline functions. The number of arguments of the tail call functions are less then six,
981  * so that they can be passed over registers and do not need to be pushed on stack.
982  * This allows the compiler to not create a stack frame for the functions.
983  *
984  * __improbable and __probable are used to compile the slow path code in such a way
985  * the fast path case will be on a sequence of instructions with as less jumps as possible,
986  * to make this case the most optimized even if falling through the slow path.
987  */
988 
989 /*
990  * Intel lock invariants:
991  *
992  * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
993  *
994  * The lock owner is promoted to the max priority of all its waiters only if it
995  * was a lower priority when it acquired or was an owner when a waiter waited.
996  * Max priority is capped at MAXPRI_PROMOTE.
997  *
998  * The last waiter will not be promoted as it is woken up, but the last
999  * lock owner may not have been the last thread to have been woken up depending on the
1000  * luck of the draw.  Therefore a last-owner may still have the promoted-on-wakeup
1001  * flag set.
1002  *
1003  * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
1004  *       priority from dropping priority in the future without having to take thread lock
1005  *       on acquire.
1006  */
1007 
1008 /*
1009  *      Routine:        lck_mtx_alloc_init
1010  */
1011 lck_mtx_t *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)1012 lck_mtx_alloc_init(
1013 	lck_grp_t       *grp,
1014 	lck_attr_t      *attr)
1015 {
1016 	lck_mtx_t *lck;
1017 
1018 	lck = zalloc(KT_LCK_MTX);
1019 	lck_mtx_init(lck, grp, attr);
1020 	return lck;
1021 }
1022 
1023 /*
1024  *      Routine:        lck_mtx_free
1025  */
1026 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)1027 lck_mtx_free(
1028 	lck_mtx_t       *lck,
1029 	lck_grp_t       *grp)
1030 {
1031 	lck_mtx_destroy(lck, grp);
1032 	zfree(KT_LCK_MTX, lck);
1033 }
1034 
1035 /*
1036  *      Routine:        lck_mtx_init
1037  */
1038 void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)1039 lck_mtx_init(
1040 	lck_mtx_t       *lck,
1041 	lck_grp_t       *grp,
1042 	lck_attr_t      *attr)
1043 {
1044 	if (attr == LCK_ATTR_NULL) {
1045 		attr = &lck_attr_default;
1046 	}
1047 
1048 	*lck = (lck_mtx_t){
1049 		.lck_mtx_grp = grp->lck_grp_attr_id,
1050 	};
1051 
1052 	if (__improbable((attr->lck_attr_val & LCK_ATTR_DEBUG) ||
1053 	    (grp->lck_grp_attr_id & LCK_GRP_ATTR_DEBUG))) {
1054 		lck->lck_mtx_profile = 1;
1055 	}
1056 
1057 	lck_grp_reference(grp, &grp->lck_grp_mtxcnt);
1058 }
1059 
1060 /*
1061  *      Routine:        lck_mtx_destroy
1062  */
1063 void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)1064 lck_mtx_destroy(
1065 	lck_mtx_t       *lck,
1066 	lck_grp_t       *grp)
1067 {
1068 	if (lck->lck_mtx_state == LCK_MTX_TAG_DESTROYED) {
1069 		return;
1070 	}
1071 #if MACH_LDEBUG
1072 	lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1073 #endif
1074 	ordered_store_mtx_state_release(lck, LCK_MTX_TAG_DESTROYED);
1075 	lck_grp_deallocate(grp, &grp->lck_grp_mtxcnt);
1076 }
1077 
1078 
1079 #if DEVELOPMENT | DEBUG
1080 __attribute__((noinline))
1081 void
lck_mtx_owner_check_panic(lck_mtx_t * lock)1082 lck_mtx_owner_check_panic(
1083 	lck_mtx_t       *lock)
1084 {
1085 	thread_t owner = ctid_get_thread_unsafe(lock->lck_mtx_owner);
1086 	panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner, lock);
1087 }
1088 #endif
1089 
1090 /*
1091  * Routine:     lck_mtx_unlock_slow
1092  *
1093  * Unlocks a mutex held by current thread.
1094  *
1095  * It will wake up waiters if necessary.
1096  *
1097  * Interlock can be held.
1098  */
1099 __attribute__((noinline))
1100 void
lck_mtx_unlock_slow(lck_mtx_t * lock)1101 lck_mtx_unlock_slow(
1102 	lck_mtx_t       *lock)
1103 {
1104 	thread_t        thread;
1105 	uint32_t        state;
1106 
1107 	state = ordered_load_mtx_state(lock);
1108 
1109 	thread = current_thread();
1110 
1111 #if DEVELOPMENT | DEBUG
1112 	if (__improbable(lock->lck_mtx_owner != thread->ctid)) {
1113 		lck_mtx_owner_check_panic(lock);
1114 	}
1115 #endif
1116 
1117 	/* check if it is held as a spinlock */
1118 	if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0)) {
1119 		goto unlock;
1120 	}
1121 
1122 	lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
1123 
1124 unlock:
1125 	/* preemption disabled, interlock held and mutex not held */
1126 
1127 	/* clear owner */
1128 	ordered_store_mtx_owner(lock, 0);
1129 
1130 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1131 #if     MACH_LDEBUG
1132 		if (thread) {
1133 			thread->mutex_count--;
1134 		}
1135 #endif
1136 		return lck_mtx_unlock_wakeup_tail(lock, state);
1137 	}
1138 
1139 	/* release interlock, promotion and clear spin flag */
1140 	state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
1141 	ordered_store_mtx_state_release(lock, state);           /* since I own the interlock, I don't need an atomic update */
1142 
1143 #if     MACH_LDEBUG
1144 	/* perform lock statistics after drop to prevent delay */
1145 	if (thread) {
1146 		thread->mutex_count--;          /* lock statistic */
1147 	}
1148 #endif  /* MACH_LDEBUG */
1149 
1150 	/* re-enable preemption */
1151 	lck_mtx_unlock_finish_inline(lock, state);
1152 }
1153 
1154 #define LCK_MTX_LCK_WAIT_CODE           0x20
1155 #define LCK_MTX_LCK_WAKEUP_CODE         0x21
1156 #define LCK_MTX_LCK_SPIN_CODE           0x22
1157 #define LCK_MTX_LCK_ACQUIRE_CODE        0x23
1158 #define LCK_MTX_LCK_DEMOTE_CODE         0x24
1159 
1160 /*
1161  * Routine:    lck_mtx_unlock_wakeup_tail
1162  *
1163  * Invoked on unlock when there is
1164  * contention, i.e. the assembly routine sees
1165  * that mutex->lck_mtx_waiters != 0
1166  *
1167  * neither the mutex or interlock is held
1168  *
1169  * Note that this routine might not be called if there are pending
1170  * waiters which have previously been woken up, and they didn't
1171  * end up boosting the old owner.
1172  *
1173  * assembly routine previously did the following to mutex:
1174  * (after saving the state in prior_lock_state)
1175  *      decremented lck_mtx_waiters if nonzero
1176  *
1177  * This function needs to be called as a tail call
1178  * to optimize the compiled code.
1179  */
1180 __attribute__((noinline))
1181 static void
lck_mtx_unlock_wakeup_tail(lck_mtx_t * mutex,uint32_t state)1182 lck_mtx_unlock_wakeup_tail(
1183 	lck_mtx_t       *mutex,
1184 	uint32_t        state)
1185 {
1186 	struct turnstile *ts;
1187 
1188 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1189 	kern_return_t did_wake;
1190 
1191 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1192 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1193 
1194 	ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1195 
1196 	did_wake = waitq_wakeup64_one(&ts->ts_waitq, LCK_MTX_EVENT(mutex),
1197 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
1198 	assert(did_wake == KERN_SUCCESS);
1199 
1200 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1201 	turnstile_complete_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1202 
1203 	state -= LCK_MTX_WAITER;
1204 	state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
1205 	ordered_store_mtx_state_release(mutex, state);
1206 
1207 	assert(current_thread()->turnstile != NULL);
1208 
1209 	turnstile_cleanup();
1210 
1211 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1212 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1213 
1214 	lck_mtx_unlock_finish_inline(mutex, state);
1215 }
1216 
1217 /*
1218  * Routine:     lck_mtx_lock_acquire_x86
1219  *
1220  * Invoked on acquiring the mutex when there is
1221  * contention (i.e. the assembly routine sees that
1222  * that mutex->lck_mtx_waiters != 0
1223  *
1224  * mutex is owned...  interlock is held... preemption is disabled
1225  */
1226 __attribute__((always_inline))
1227 static void
lck_mtx_lock_acquire_inline(lck_mtx_t * mutex,struct turnstile * ts)1228 lck_mtx_lock_acquire_inline(
1229 	lck_mtx_t       *mutex,
1230 	struct turnstile *ts)
1231 {
1232 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1233 
1234 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1235 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1236 
1237 	if (mutex->lck_mtx_waiters > 0) {
1238 		if (ts == NULL) {
1239 			ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1240 		}
1241 
1242 		turnstile_update_inheritor(ts, current_thread(),
1243 		    TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
1244 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1245 	}
1246 
1247 	if (ts != NULL) {
1248 		turnstile_complete_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1249 	}
1250 
1251 	assert(current_thread()->turnstile != NULL);
1252 
1253 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1254 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1255 }
1256 
1257 void
lck_mtx_lock_acquire_x86(lck_mtx_t * mutex)1258 lck_mtx_lock_acquire_x86(
1259 	lck_mtx_t       *mutex)
1260 {
1261 	return lck_mtx_lock_acquire_inline(mutex, NULL);
1262 }
1263 
1264 /*
1265  * Tail call helpers for lock functions that perform
1266  * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
1267  * the caller's compiled code.
1268  */
1269 
1270 __attribute__((noinline))
1271 static void
lck_mtx_lock_acquire_tail(lck_mtx_t * mutex,struct turnstile * ts)1272 lck_mtx_lock_acquire_tail(
1273 	lck_mtx_t       *mutex,
1274 	struct turnstile *ts)
1275 {
1276 	lck_mtx_lock_acquire_inline(mutex, ts);
1277 	lck_mtx_lock_finish_inline_with_cleanup(mutex, ordered_load_mtx_state(mutex));
1278 }
1279 
1280 __attribute__((noinline))
1281 static boolean_t
lck_mtx_try_lock_acquire_tail(lck_mtx_t * mutex)1282 lck_mtx_try_lock_acquire_tail(
1283 	lck_mtx_t       *mutex)
1284 {
1285 	lck_mtx_lock_acquire_inline(mutex, NULL);
1286 	lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
1287 
1288 	return TRUE;
1289 }
1290 
1291 __attribute__((noinline))
1292 static void
lck_mtx_convert_spin_acquire_tail(lck_mtx_t * mutex)1293 lck_mtx_convert_spin_acquire_tail(
1294 	lck_mtx_t       *mutex)
1295 {
1296 	lck_mtx_lock_acquire_inline(mutex, NULL);
1297 	lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
1298 }
1299 
1300 static void
lck_mtx_ilk_unlock(lck_mtx_t * mutex)1301 lck_mtx_ilk_unlock(
1302 	lck_mtx_t       *mutex)
1303 {
1304 	lck_mtx_ilk_unlock_inline(mutex, ordered_load_mtx_state(mutex));
1305 }
1306 
1307 static inline void
lck_mtx_interlock_lock_set_and_clear_flags(lck_mtx_t * mutex,uint32_t xor_flags,uint32_t and_flags,uint32_t * new_state)1308 lck_mtx_interlock_lock_set_and_clear_flags(
1309 	lck_mtx_t *mutex,
1310 	uint32_t xor_flags,
1311 	uint32_t and_flags,
1312 	uint32_t *new_state)
1313 {
1314 	uint32_t state, prev;
1315 	state = *new_state;
1316 
1317 	for (;;) {
1318 		/* have to wait for interlock to clear */
1319 		while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
1320 			cpu_pause();
1321 			state = ordered_load_mtx_state(mutex);
1322 		}
1323 		prev = state;                                   /* prev contains snapshot for exchange */
1324 		state |= LCK_MTX_ILOCKED_MSK | xor_flags;       /* pick up interlock */
1325 		state &= ~and_flags;                            /* clear flags */
1326 
1327 		disable_preemption();
1328 		if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1329 			break;
1330 		}
1331 		enable_preemption();
1332 		cpu_pause();
1333 		state = ordered_load_mtx_state(mutex);
1334 	}
1335 	*new_state = state;
1336 	return;
1337 }
1338 
1339 static inline void
lck_mtx_interlock_lock_clear_flags(lck_mtx_t * mutex,uint32_t and_flags,uint32_t * new_state)1340 lck_mtx_interlock_lock_clear_flags(
1341 	lck_mtx_t *mutex,
1342 	uint32_t and_flags,
1343 	uint32_t *new_state)
1344 {
1345 	return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, and_flags, new_state);
1346 }
1347 
1348 static inline void
lck_mtx_interlock_lock(lck_mtx_t * mutex,uint32_t * new_state)1349 lck_mtx_interlock_lock(
1350 	lck_mtx_t *mutex,
1351 	uint32_t *new_state)
1352 {
1353 	return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, 0, new_state);
1354 }
1355 
1356 static inline int
lck_mtx_interlock_try_lock_set_flags(lck_mtx_t * mutex,uint32_t or_flags,uint32_t * new_state)1357 lck_mtx_interlock_try_lock_set_flags(
1358 	lck_mtx_t *mutex,
1359 	uint32_t or_flags,
1360 	uint32_t *new_state)
1361 {
1362 	uint32_t state, prev;
1363 	state = *new_state;
1364 
1365 	/* have to wait for interlock to clear */
1366 	if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
1367 		return 0;
1368 	}
1369 	prev = state;                                   /* prev contains snapshot for exchange */
1370 	state |= LCK_MTX_ILOCKED_MSK | or_flags;        /* pick up interlock */
1371 	disable_preemption();
1372 	if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1373 		*new_state = state;
1374 		return 1;
1375 	}
1376 
1377 	enable_preemption();
1378 	return 0;
1379 }
1380 
1381 __attribute__((noinline))
1382 static void
lck_mtx_lock_contended(lck_mtx_t * lock,bool profile,boolean_t * first_miss)1383 lck_mtx_lock_contended(
1384 	lck_mtx_t *lock,
1385 	bool       profile,
1386 	boolean_t *first_miss)
1387 {
1388 	lck_mtx_spinwait_ret_type_t ret;
1389 	uint32_t state;
1390 	thread_t thread;
1391 	struct turnstile *ts = NULL;
1392 
1393 try_again:
1394 
1395 	if (profile) {
1396 		LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, first_miss);
1397 	}
1398 
1399 	ret = lck_mtx_lock_spinwait_x86(lock);
1400 	state = ordered_load_mtx_state(lock);
1401 	switch (ret) {
1402 	case LCK_MTX_SPINWAIT_NO_SPIN:
1403 		/*
1404 		 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
1405 		 * try to spin.
1406 		 */
1407 		/* just fall through case LCK_MTX_SPINWAIT_SPUN */
1408 		OS_FALLTHROUGH;
1409 	case LCK_MTX_SPINWAIT_SPUN_HIGH_THR:
1410 	case LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE:
1411 	case LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION:
1412 	case LCK_MTX_SPINWAIT_SPUN_SLIDING_THR:
1413 		/*
1414 		 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
1415 		 * interlock not held
1416 		 */
1417 		lck_mtx_interlock_lock(lock, &state);
1418 		assert(state & LCK_MTX_ILOCKED_MSK);
1419 
1420 		if (state & LCK_MTX_MLOCKED_MSK) {
1421 			if (profile) {
1422 				LCK_MTX_PROF_WAIT(lock, lock->lck_mtx_grp,
1423 				    ret == LCK_MTX_SPINWAIT_NO_SPIN, first_miss);
1424 			}
1425 			lck_mtx_lock_wait_x86(lock, &ts);
1426 			/*
1427 			 * interlock is not held here.
1428 			 */
1429 			goto try_again;
1430 		} else {
1431 			/* grab the mutex */
1432 			state |= LCK_MTX_MLOCKED_MSK;
1433 			ordered_store_mtx_state_release(lock, state);
1434 			thread = current_thread();
1435 			ordered_store_mtx_owner(lock, thread->ctid);
1436 #if     MACH_LDEBUG
1437 			if (thread) {
1438 				thread->mutex_count++;
1439 			}
1440 #endif  /* MACH_LDEBUG */
1441 		}
1442 
1443 		break;
1444 	case LCK_MTX_SPINWAIT_ACQUIRED:
1445 		/*
1446 		 * mutex has been acquired by lck_mtx_lock_spinwait_x86
1447 		 * interlock is held and preemption disabled
1448 		 * owner is set and mutex marked as locked
1449 		 * statistics updated too
1450 		 */
1451 		break;
1452 	default:
1453 		panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p", ret, lock);
1454 	}
1455 
1456 	/*
1457 	 * interlock is already acquired here
1458 	 */
1459 
1460 	/* mutex has been acquired */
1461 	if (state & LCK_MTX_WAITERS_MSK) {
1462 		/*
1463 		 * lck_mtx_lock_acquire_tail will call
1464 		 * turnstile_complete.
1465 		 */
1466 		return lck_mtx_lock_acquire_tail(lock, ts);
1467 	}
1468 
1469 	if (ts != NULL) {
1470 		turnstile_complete_hash((uintptr_t)lock, TURNSTILE_KERNEL_MUTEX);
1471 	}
1472 
1473 	assert(current_thread()->turnstile != NULL);
1474 
1475 	/* release the interlock */
1476 	lck_mtx_lock_finish_inline_with_cleanup(lock, ordered_load_mtx_state(lock));
1477 }
1478 
1479 /*
1480  * Helper noinline functions for calling
1481  * panic to optimize compiled code.
1482  */
1483 
1484 __attribute__((noinline)) __abortlike
1485 static void
lck_mtx_destroyed(lck_mtx_t * lock)1486 lck_mtx_destroyed(
1487 	lck_mtx_t       *lock)
1488 {
1489 	panic("trying to interlock destroyed mutex (%p)", lock);
1490 }
1491 
1492 __attribute__((noinline))
1493 static boolean_t
lck_mtx_try_destroyed(lck_mtx_t * lock)1494 lck_mtx_try_destroyed(
1495 	lck_mtx_t       *lock)
1496 {
1497 	panic("trying to interlock destroyed mutex (%p)", lock);
1498 	return FALSE;
1499 }
1500 
1501 __attribute__((always_inline))
1502 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1503 lck_mtx_lock_wait_interlock_to_clear(
1504 	lck_mtx_t       *lock,
1505 	uint32_t*        new_state)
1506 {
1507 	uint32_t state;
1508 
1509 	for (;;) {
1510 		cpu_pause();
1511 		state = ordered_load_mtx_state(lock);
1512 		if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1513 			*new_state = state;
1514 			return TRUE;
1515 		}
1516 		if (state & LCK_MTX_MLOCKED_MSK) {
1517 			/* if it is held as mutex, just fail */
1518 			return FALSE;
1519 		}
1520 	}
1521 }
1522 
1523 __attribute__((always_inline))
1524 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1525 lck_mtx_try_lock_wait_interlock_to_clear(
1526 	lck_mtx_t       *lock,
1527 	uint32_t*        new_state)
1528 {
1529 	uint32_t state;
1530 
1531 	for (;;) {
1532 		cpu_pause();
1533 		state = ordered_load_mtx_state(lock);
1534 		if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
1535 			/* if it is held as mutex or spin, just fail */
1536 			return FALSE;
1537 		}
1538 		if (!(state & LCK_MTX_ILOCKED_MSK)) {
1539 			*new_state = state;
1540 			return TRUE;
1541 		}
1542 	}
1543 }
1544 
1545 /*
1546  * Routine:	lck_mtx_lock_slow
1547  *
1548  * Locks a mutex for current thread.
1549  * If the lock is contended this function might
1550  * sleep.
1551  *
1552  * Called with interlock not held.
1553  */
1554 __attribute__((noinline))
1555 void
lck_mtx_lock_slow(lck_mtx_t * lock)1556 lck_mtx_lock_slow(
1557 	lck_mtx_t       *lock)
1558 {
1559 	bool            profile;
1560 	uint32_t        state;
1561 	int             first_miss = 0;
1562 
1563 	state = ordered_load_mtx_state(lock);
1564 	profile = (state & LCK_MTX_PROFILE_MSK);
1565 
1566 	if (__improbable(profile)) {
1567 		if (state & LCK_MTX_SPIN_MSK) {
1568 			/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1569 			assert(state & LCK_MTX_ILOCKED_MSK);
1570 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1571 		}
1572 	}
1573 
1574 	/* is the interlock or mutex held */
1575 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1576 		/*
1577 		 * Note: LCK_MTX_TAG_DESTROYED has
1578 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1579 		 */
1580 
1581 		/* is the mutex already held */
1582 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1583 			/* no, must have been the mutex */
1584 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1585 		}
1586 
1587 		/* check to see if it is marked destroyed */
1588 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1589 			lck_mtx_destroyed(lock);
1590 		}
1591 	}
1592 
1593 	/* no - can't be DESTROYED or locked */
1594 	while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1595 		if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1596 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1597 		}
1598 	}
1599 
1600 	/* lock and interlock acquired */
1601 
1602 	/* record owner of mutex */
1603 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1604 
1605 #if MACH_LDEBUG
1606 	if (thread) {
1607 		thread->mutex_count++;          /* lock statistic */
1608 	}
1609 #endif
1610 	/*
1611 	 * Check if there are waiters to
1612 	 * inherit their priority.
1613 	 */
1614 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1615 		return lck_mtx_lock_acquire_tail(lock, NULL);
1616 	}
1617 
1618 	/* release the interlock */
1619 	lck_mtx_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1620 }
1621 
1622 __attribute__((noinline))
1623 boolean_t
lck_mtx_try_lock_slow(lck_mtx_t * lock)1624 lck_mtx_try_lock_slow(
1625 	lck_mtx_t       *lock)
1626 {
1627 	bool            profile;
1628 	uint32_t        state;
1629 	int             first_miss = 0;
1630 
1631 	state = ordered_load_mtx_state(lock);
1632 	profile = (state & LCK_MTX_PROFILE_MSK);
1633 
1634 	/* is the interlock or mutex held */
1635 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1636 		/*
1637 		 * Note: LCK_MTX_TAG_DESTROYED has
1638 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1639 		 */
1640 
1641 		/* is the mutex already held */
1642 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1643 			return FALSE;
1644 		}
1645 
1646 		/* check to see if it is marked destroyed */
1647 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1648 			lck_mtx_try_destroyed(lock);
1649 		}
1650 
1651 		if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1652 			if (__improbable(profile)) {
1653 				LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1654 			}
1655 			return FALSE;
1656 		}
1657 	}
1658 
1659 	if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1660 		if (__improbable(profile)) {
1661 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1662 		}
1663 		return FALSE;
1664 	}
1665 
1666 	/* lock and interlock acquired */
1667 
1668 	/* record owner of mutex */
1669 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1670 
1671 #if MACH_LDEBUG
1672 	if (thread) {
1673 		thread->mutex_count++;          /* lock statistic */
1674 	}
1675 #endif
1676 	/*
1677 	 * Check if there are waiters to
1678 	 * inherit their priority.
1679 	 */
1680 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1681 		return lck_mtx_try_lock_acquire_tail(lock);
1682 	}
1683 
1684 	/* release the interlock */
1685 	lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1686 
1687 	return TRUE;
1688 }
1689 
1690 __attribute__((noinline))
1691 void
lck_mtx_lock_spin_slow(lck_mtx_t * lock)1692 lck_mtx_lock_spin_slow(
1693 	lck_mtx_t       *lock)
1694 {
1695 	bool            profile;
1696 	uint32_t        state;
1697 	int             first_miss = 0;
1698 
1699 	state = ordered_load_mtx_state(lock);
1700 	profile = (state & LCK_MTX_PROFILE_MSK);
1701 
1702 	if (__improbable(profile)) {
1703 		if (state & LCK_MTX_SPIN_MSK) {
1704 			/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1705 			assert(state & LCK_MTX_ILOCKED_MSK);
1706 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1707 		}
1708 	}
1709 
1710 	/* is interlock or mutex held */
1711 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1712 		/*
1713 		 * Note: LCK_MTX_TAG_DESTROYED has
1714 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1715 		 */
1716 
1717 		/* is the mutex already held */
1718 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1719 			/* no, must have been the mutex */
1720 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1721 		}
1722 
1723 		/* check to see if it is marked destroyed */
1724 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1725 			lck_mtx_destroyed(lock);
1726 		}
1727 	}
1728 
1729 	bool acquired = true;
1730 
1731 #if CONFIG_DTRACE
1732 	uint64_t spin_start;
1733 
1734 	if (__probable(lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1735 		goto past_spin;
1736 	}
1737 
1738 	spin_start = LCK_MTX_SPIN_SPIN_BEGIN();
1739 #endif /* CONFIG_DTRACE */
1740 
1741 	/* note - can't be DESTROYED or locked */
1742 	/* note - preemption is not disabled while spinning */
1743 	while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1744 		if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1745 			acquired = false;
1746 			break;
1747 		}
1748 	}
1749 
1750 	LCK_MTX_SPIN_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
1751 #if CONFIG_DTRACE
1752 past_spin:
1753 #endif /* CONFIG_DTRACE */
1754 
1755 	if (__improbable(!acquired)) {
1756 		return lck_mtx_lock_contended(lock, profile, &first_miss);
1757 	}
1758 
1759 	/* lock as spinlock and interlock acquired */
1760 
1761 	/* record owner of mutex */
1762 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1763 
1764 #if MACH_LDEBUG
1765 	if (thread) {
1766 		thread->mutex_count++;          /* lock statistic */
1767 	}
1768 #endif
1769 	LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp, true, profile);
1770 
1771 	/* return with the interlock held and preemption disabled */
1772 	return;
1773 }
1774 
1775 __attribute__((noinline))
1776 boolean_t
lck_mtx_try_lock_spin_slow(lck_mtx_t * lock)1777 lck_mtx_try_lock_spin_slow(
1778 	lck_mtx_t       *lock)
1779 {
1780 	bool            profile;
1781 	uint32_t        state;
1782 	int             first_miss = 0;
1783 
1784 	state = ordered_load_mtx_state(lock);
1785 	profile = (state & LCK_MTX_PROFILE_MSK);
1786 
1787 	/* is the interlock or mutex held */
1788 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1789 		/*
1790 		 * Note: LCK_MTX_TAG_DESTROYED has
1791 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1792 		 */
1793 
1794 		/* is the mutex already held */
1795 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1796 			return FALSE;
1797 		}
1798 
1799 		/* check to see if it is marked destroyed */
1800 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1801 			lck_mtx_try_destroyed(lock);
1802 		}
1803 	}
1804 
1805 	/* note - can't be DESTROYED or locked */
1806 	if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1807 		if (__improbable(profile)) {
1808 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1809 		}
1810 		return FALSE;
1811 	}
1812 
1813 	/* lock and interlock acquired */
1814 
1815 	/* record owner of mutex */
1816 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1817 
1818 #if MACH_LDEBUG
1819 	if (thread) {
1820 		thread->mutex_count++;          /* lock statistic */
1821 	}
1822 #endif
1823 
1824 #if     CONFIG_DTRACE
1825 	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_SPIN_ACQUIRE, lock, 0);
1826 #endif
1827 	return TRUE;
1828 }
1829 
1830 __attribute__((noinline))
1831 void
lck_mtx_convert_spin(lck_mtx_t * lock)1832 lck_mtx_convert_spin(
1833 	lck_mtx_t       *lock)
1834 {
1835 	uint32_t state;
1836 
1837 	state = ordered_load_mtx_state(lock);
1838 
1839 	assertf(lock->lck_mtx_owner == current_thread()->ctid,
1840 	    "lock %p not owned by thread ctid %d (current owner %d)",
1841 	    lock, current_thread()->ctid, lock->lck_mtx_owner);
1842 
1843 	if (__improbable(state & LCK_MTX_MLOCKED_MSK)) {
1844 		/* already owned as a mutex, just return */
1845 		return;
1846 	}
1847 
1848 	assert(get_preemption_level() > 0);
1849 	assert(state & LCK_MTX_ILOCKED_MSK);
1850 	assert(state & LCK_MTX_SPIN_MSK);
1851 
1852 	/*
1853 	 * Check if there are waiters to
1854 	 * inherit their priority.
1855 	 */
1856 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1857 		return lck_mtx_convert_spin_acquire_tail(lock);
1858 	}
1859 
1860 	lck_mtx_convert_spin_finish_inline(lock, ordered_load_mtx_state(lock));
1861 
1862 	return;
1863 }
1864 
1865 static inline boolean_t
lck_mtx_lock_grab_mutex(lck_mtx_t * lock)1866 lck_mtx_lock_grab_mutex(
1867 	lck_mtx_t       *lock)
1868 {
1869 	uint32_t state;
1870 
1871 	state = ordered_load_mtx_state(lock);
1872 
1873 	if (!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state)) {
1874 		return FALSE;
1875 	}
1876 
1877 	/* lock and interlock acquired */
1878 
1879 	/* record owner of mutex */
1880 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1881 
1882 #if MACH_LDEBUG
1883 	if (thread) {
1884 		thread->mutex_count++;          /* lock statistic */
1885 	}
1886 #endif
1887 	return TRUE;
1888 }
1889 
1890 __attribute__((noinline))
1891 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)1892 lck_mtx_assert(
1893 	lck_mtx_t       *lock,
1894 	unsigned int    type)
1895 {
1896 	thread_t thread;
1897 	uint32_t state;
1898 
1899 	thread = current_thread();
1900 	state = ordered_load_mtx_state(lock);
1901 
1902 	if (type == LCK_MTX_ASSERT_OWNED) {
1903 		if ((lock->lck_mtx_owner != thread->ctid) ||
1904 		    !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1905 			panic("mutex (%p) not owned", lock);
1906 		}
1907 	} else {
1908 		assert(type == LCK_MTX_ASSERT_NOTOWNED);
1909 		if (lock->lck_mtx_owner == thread->ctid) {
1910 			panic("mutex (%p) owned", lock);
1911 		}
1912 	}
1913 }
1914 
1915 /*
1916  * Routine:     lck_mtx_lock_spinwait_x86
1917  *
1918  * Invoked trying to acquire a mutex when there is contention but
1919  * the holder is running on another processor. We spin for up to a maximum
1920  * time waiting for the lock to be released.
1921  *
1922  * Called with the interlock unlocked.
1923  * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
1924  * returns LCK_MTX_SPINWAIT_SPUN if we spun
1925  * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
1926  */
1927 __attribute__((noinline))
1928 lck_mtx_spinwait_ret_type_t
lck_mtx_lock_spinwait_x86(lck_mtx_t * mutex)1929 lck_mtx_lock_spinwait_x86(
1930 	lck_mtx_t       *mutex)
1931 {
1932 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1933 	ctid_t          owner, prev_owner;
1934 	uint64_t        window_deadline, sliding_deadline, high_deadline;
1935 	uint64_t        spin_start, start_time, cur_time, avg_hold_time, bias, delta;
1936 	lck_mtx_spinwait_ret_type_t             retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
1937 	int             loopcount = 0;
1938 	int             total_hold_time_samples, window_hold_time_samples, unfairness;
1939 	uint            prev_owner_cpu;
1940 	bool            adjust;
1941 
1942 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1943 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
1944 	    mutex->lck_mtx_waiters, 0, 0);
1945 
1946 	spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
1947 	start_time = mach_absolute_time();
1948 
1949 	/*
1950 	 * window_deadline represents the "learning" phase.
1951 	 * The thread collects statistics about the lock during
1952 	 * window_deadline and then it makes a decision on whether to spin more
1953 	 * or block according to the concurrency behavior
1954 	 * observed.
1955 	 *
1956 	 * Every thread can spin at least low_MutexSpin.
1957 	 */
1958 	window_deadline = start_time + low_MutexSpin;
1959 	/*
1960 	 * Sliding_deadline is the adjusted spin deadline
1961 	 * computed after the "learning" phase.
1962 	 */
1963 	sliding_deadline = window_deadline;
1964 	/*
1965 	 * High_deadline is a hard deadline. No thread
1966 	 * can spin more than this deadline.
1967 	 */
1968 	if (high_MutexSpin >= 0) {
1969 		high_deadline = start_time + high_MutexSpin;
1970 	} else {
1971 		high_deadline = start_time + low_MutexSpin * real_ncpus;
1972 	}
1973 
1974 	/*
1975 	 * Do not know yet which is the owner cpu.
1976 	 * Initialize prev_owner_cpu with next cpu.
1977 	 */
1978 	prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
1979 	total_hold_time_samples = 0;
1980 	window_hold_time_samples = 0;
1981 	avg_hold_time = 0;
1982 	adjust = TRUE;
1983 	bias = (os_hash_kernel_pointer(mutex) + cpu_number()) % real_ncpus;
1984 
1985 	prev_owner = mutex->lck_mtx_owner;
1986 
1987 	/*
1988 	 * Spin while:
1989 	 *   - mutex is locked, and
1990 	 *   - it's locked as a spin lock, and
1991 	 *   - owner is running on another processor, and
1992 	 *   - we haven't spun for long enough.
1993 	 */
1994 	do {
1995 		/*
1996 		 * Try to acquire the lock.
1997 		 */
1998 		if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
1999 			retval = LCK_MTX_SPINWAIT_ACQUIRED;
2000 			break;
2001 		}
2002 
2003 		cur_time = mach_absolute_time();
2004 
2005 		/*
2006 		 * Never spin past high_deadline.
2007 		 */
2008 		if (cur_time >= high_deadline) {
2009 			retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
2010 			break;
2011 		}
2012 
2013 		/*
2014 		 * Check if owner is on core. If not block.
2015 		 */
2016 		owner = mutex->lck_mtx_owner;
2017 		if (owner) {
2018 			uint32_t i = prev_owner_cpu;
2019 			bool     owner_on_core = false;
2020 
2021 			disable_preemption();
2022 			owner = mutex->lck_mtx_owner;
2023 
2024 			/*
2025 			 * For scalability we want to check if the owner is on core
2026 			 * without locking the mutex interlock.
2027 			 * If we do not lock the mutex interlock, the owner that we see might be
2028 			 * invalid, so we cannot dereference it. Therefore we cannot check
2029 			 * any field of the thread to tell us if it is on core.
2030 			 * Check if the thread that is running on the other cpus matches the owner.
2031 			 */
2032 			if (owner) {
2033 				thread_t owner_thread = ctid_get_thread_unsafe(owner);
2034 
2035 				do {
2036 					if ((cpu_data_ptr[i] != NULL) && (cpu_data_ptr[i]->cpu_active_thread == owner_thread)) {
2037 						owner_on_core = true;
2038 						break;
2039 					}
2040 					if (++i >= real_ncpus) {
2041 						i = 0;
2042 					}
2043 				} while (i != prev_owner_cpu);
2044 			}
2045 
2046 			enable_preemption();
2047 
2048 			if (owner == 0) {
2049 				/* nothing to do, we didn't find an owner */
2050 			} else if (owner_on_core) {
2051 				prev_owner_cpu = i;
2052 			} else {
2053 				prev_owner = owner;
2054 				owner = mutex->lck_mtx_owner;
2055 				if (owner == prev_owner) {
2056 					/*
2057 					 * Owner is not on core.
2058 					 * Stop spinning.
2059 					 */
2060 					if (loopcount == 0) {
2061 						retval = LCK_MTX_SPINWAIT_NO_SPIN;
2062 					} else {
2063 						retval = LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE;
2064 					}
2065 					break;
2066 				}
2067 				/*
2068 				 * Fall through if the owner changed while we were scanning.
2069 				 * The new owner could potentially be on core, so loop
2070 				 * again.
2071 				 */
2072 			}
2073 		}
2074 
2075 		/*
2076 		 * Save how many times we see the owner changing.
2077 		 * We can roughly estimate the mutex hold
2078 		 * time and the fairness with that.
2079 		 */
2080 		if (owner != prev_owner) {
2081 			prev_owner = owner;
2082 			total_hold_time_samples++;
2083 			window_hold_time_samples++;
2084 		}
2085 
2086 		/*
2087 		 * Learning window expired.
2088 		 * Try to adjust the sliding_deadline.
2089 		 */
2090 		if (cur_time >= window_deadline) {
2091 			/*
2092 			 * If there was not contention during the window
2093 			 * stop spinning.
2094 			 */
2095 			if (window_hold_time_samples < 1) {
2096 				retval = LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION;
2097 				break;
2098 			}
2099 
2100 			if (adjust) {
2101 				/*
2102 				 * For a fair lock, we'd wait for at most (NCPU-1) periods,
2103 				 * but the lock is unfair, so let's try to estimate by how much.
2104 				 */
2105 				unfairness = total_hold_time_samples / real_ncpus;
2106 
2107 				if (unfairness == 0) {
2108 					/*
2109 					 * We observed the owner changing `total_hold_time_samples` times which
2110 					 * let us estimate the average hold time of this mutex for the duration
2111 					 * of the spin time.
2112 					 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
2113 					 *
2114 					 * In this case spin at max avg_hold_time * (real_ncpus - 1)
2115 					 */
2116 					delta = cur_time - start_time;
2117 					sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
2118 				} else {
2119 					/*
2120 					 * In this case at least one of the other cpus was able to get the lock twice
2121 					 * while I was spinning.
2122 					 * We could spin longer but it won't necessarily help if the system is unfair.
2123 					 * Try to randomize the wait to reduce contention.
2124 					 *
2125 					 * We compute how much time we could potentially spin
2126 					 * and distribute it over the cpus.
2127 					 *
2128 					 * bias is an integer between 0 and real_ncpus.
2129 					 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
2130 					 */
2131 					delta = high_deadline - cur_time;
2132 					sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
2133 					adjust = FALSE;
2134 				}
2135 			}
2136 
2137 			window_deadline += low_MutexSpin;
2138 			window_hold_time_samples = 0;
2139 		}
2140 
2141 		/*
2142 		 * Stop spinning if we past
2143 		 * the adjusted deadline.
2144 		 */
2145 		if (cur_time >= sliding_deadline) {
2146 			retval = LCK_MTX_SPINWAIT_SPUN_SLIDING_THR;
2147 			break;
2148 		}
2149 
2150 		if (mutex->lck_mtx_owner != 0) {
2151 			cpu_pause();
2152 		}
2153 
2154 		loopcount++;
2155 	} while (TRUE);
2156 
2157 	LCK_MTX_ADAPTIVE_SPIN_END(mutex, mutex->lck_mtx_grp, spin_start);
2158 
2159 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2160 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
2161 	    mutex->lck_mtx_waiters, retval, 0);
2162 
2163 	return retval;
2164 }
2165 
2166 
2167 
2168 /*
2169  * Routine:     lck_mtx_lock_wait_x86
2170  *
2171  * Invoked in order to wait on contention.
2172  *
2173  * Called with the interlock locked and
2174  * preemption disabled...
2175  * returns it unlocked and with preemption enabled
2176  *
2177  * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
2178  *      A runnable waiter can exist between wait and acquire
2179  *      without a waiters count being set.
2180  *      This allows us to never make a spurious wakeup call.
2181  *
2182  * Priority:
2183  *      This avoids taking the thread lock if the owning thread is the same priority.
2184  *      This optimizes the case of same-priority threads contending on a lock.
2185  *      However, that allows the owning thread to drop in priority while holding the lock,
2186  *      because there is no state that the priority change can notice that
2187  *      says that the targeted thread holds a contended mutex.
2188  *
2189  *      One possible solution: priority changes could look for some atomic tag
2190  *      on the thread saying 'holding contended lock', and then set up a promotion.
2191  *      Needs a story for dropping that promotion - the last contended unlock
2192  *      has to notice that this has happened.
2193  */
2194 __attribute__((noinline))
2195 void
lck_mtx_lock_wait_x86(lck_mtx_t * mutex,struct turnstile ** ts)2196 lck_mtx_lock_wait_x86(
2197 	lck_mtx_t       *mutex,
2198 	struct turnstile **ts)
2199 {
2200 	thread_t self = current_thread();
2201 	uint64_t sleep_start;
2202 
2203 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2204 
2205 	thread_t holder = ctid_get_thread(mutex->lck_mtx_owner);
2206 
2207 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2208 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder),
2209 	    mutex->lck_mtx_waiters, 0, 0);
2210 	sleep_start = LCK_MTX_BLOCK_BEGIN();
2211 
2212 	mutex->lck_mtx_waiters++;
2213 
2214 	/*
2215 	 * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
2216 	 * the same turnstile while looping, the matching turnstile compleate will be called
2217 	 * by lck_mtx_lock_contended when finally acquiring the lock.
2218 	 */
2219 	if (*ts == NULL) {
2220 		*ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
2221 	}
2222 
2223 	struct turnstile *turnstile = *ts;
2224 	thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
2225 	turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2226 
2227 	waitq_assert_wait64(&turnstile->ts_waitq, LCK_MTX_EVENT(mutex), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
2228 
2229 	lck_mtx_ilk_unlock(mutex);
2230 
2231 	turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
2232 
2233 	thread_block(THREAD_CONTINUE_NULL);
2234 
2235 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2236 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
2237 	    mutex->lck_mtx_waiters, 0, 0);
2238 
2239 	LCK_MTX_BLOCK_END(mutex, mutex->lck_mtx_grp, sleep_start);
2240 }
2241 
2242 /*
2243  *      Routine: kdp_lck_mtx_lock_spin_is_acquired
2244  *      NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2245  *      Returns: TRUE if lock is acquired.
2246  */
2247 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)2248 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t     *lck)
2249 {
2250 	if (not_in_kdp) {
2251 		panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2252 	}
2253 
2254 	if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
2255 		return TRUE;
2256 	}
2257 
2258 	return FALSE;
2259 }
2260 
2261 void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2262 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2263 {
2264 	lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2265 	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2266 	waitinfo->owner   = thread_tid(ctid_get_thread(mutex->lck_mtx_owner));
2267 }
2268 
2269 #endif /* LCK_MTX_USE_ARCH */
2270 #if CONFIG_PV_TICKET
2271 __startup_func
2272 void
lck_init_pv(void)2273 lck_init_pv(void)
2274 {
2275 	uint32_t pvtck = 1;
2276 	PE_parse_boot_argn("pvticket", &pvtck, sizeof(pvtck));
2277 	if (pvtck == 0) {
2278 		return;
2279 	}
2280 	has_lock_pv = cpuid_vmm_present() &&
2281 	    (cpuid_vmm_get_kvm_features() & CPUID_KVM_FEATURE_PV_UNHALT) != 0;
2282 }
2283 STARTUP(LOCKS, STARTUP_RANK_FIRST, lck_init_pv);
2284 #endif
2285