xref: /xnu-11417.101.15/osfmk/i386/locks_i386.c (revision e3723e1f17661b24996789d8afc084c0c3303b26)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  *	File:	kern/lock.c
58  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
59  *	Date:	1985
60  *
61  *	Locking primitives implementation
62  */
63 
64 #define LOCK_PRIVATE 1
65 
66 #include <mach_ldebug.h>
67 
68 #include <kern/lock_stat.h>
69 #include <kern/locks.h>
70 #include <kern/zalloc.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/cpu_data.h>
75 #include <kern/cpu_number.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <string.h>
79 
80 #include <i386/machine_routines.h> /* machine_timeout_suspended() */
81 #include <i386/tsc.h>
82 #include <machine/atomic.h>
83 #include <machine/machine_cpu.h>
84 #include <i386/mp.h>
85 #include <machine/atomic.h>
86 #include <sys/kdebug.h>
87 #if LCK_MTX_USE_ARCH
88 #include <i386/locks_i386_inlines.h>
89 #endif /* LCK_MTX_USE_ARCH */
90 #include <kern/cpu_number.h>
91 #include <os/hash.h>
92 #include <i386/cpuid.h>
93 
94 #define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
95 
96 uint64_t _Atomic lock_panic_timeout = 0xf000000;  /* 251e6 TSC ticks */
97 
98 /* Forwards */
99 
100 #if     USLOCK_DEBUG
101 /*
102  *	Perform simple lock checks.
103  */
104 int     uslock_check = 1;
105 int     max_lock_loops  = 100000000;
106 decl_simple_lock_data(extern, printf_lock);
107 decl_simple_lock_data(extern, panic_lock);
108 #endif  /* USLOCK_DEBUG */
109 
110 extern unsigned int not_in_kdp;
111 
112 #if !LCK_GRP_USE_ARG
113 #define usimple_lock_nopreempt(lck, grp) \
114 	usimple_lock_nopreempt(lck)
115 #define usimple_lock_try_nopreempt(lck, grp) \
116 	usimple_lock_try_nopreempt(lck)
117 #endif
118 static void usimple_lock_nopreempt(usimple_lock_t, lck_grp_t *);
119 static unsigned int usimple_lock_try_nopreempt(usimple_lock_t, lck_grp_t *);
120 
121 /*
122  *	We often want to know the addresses of the callers
123  *	of the various lock routines.  However, this information
124  *	is only used for debugging and statistics.
125  */
126 typedef void    *pc_t;
127 #define INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
128 #define INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
129 #if     ANY_LOCK_DEBUG
130 #define OBTAIN_PC(pc)   ((pc) = GET_RETURN_PC())
131 #define DECL_PC(pc)     pc_t pc;
132 #else   /* ANY_LOCK_DEBUG */
133 #define DECL_PC(pc)
134 #ifdef  lint
135 /*
136  *	Eliminate lint complaints about unused local pc variables.
137  */
138 #define OBTAIN_PC(pc)   ++pc
139 #else   /* lint */
140 #define OBTAIN_PC(pc)
141 #endif  /* lint */
142 #endif  /* USLOCK_DEBUG */
143 
144 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
145 
146 #if LCK_MTX_USE_ARCH
147 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
148 #endif /* LCK_MTX_USE_ARCH */
149 
150 /*
151  * atomic exchange API is a low level abstraction of the operations
152  * to atomically read, modify, and write a pointer.  This abstraction works
153  * for both Intel and ARMv8.1 compare and exchange atomic instructions as
154  * well as the ARM exclusive instructions.
155  *
156  * atomic_exchange_begin() - begin exchange and retrieve current value
157  * atomic_exchange_complete() - conclude an exchange
158  * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
159  */
160 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)161 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
162 {
163 	uint32_t        val;
164 
165 	(void)ord;                      // Memory order not used
166 	val = os_atomic_load(target, relaxed);
167 	*previous = val;
168 	return val;
169 }
170 
171 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)172 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
173 {
174 	return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
175 }
176 
177 void
atomic_exchange_abort(void)178 atomic_exchange_abort(void)
179 {
180 }
181 
182 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)183 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
184 {
185 	uint32_t        value, prev;
186 
187 	for (;;) {
188 		value = atomic_exchange_begin32(target, &prev, ord);
189 		if (value & test_mask) {
190 			if (wait) {
191 				cpu_pause();
192 			} else {
193 				atomic_exchange_abort();
194 			}
195 			return FALSE;
196 		}
197 		value |= set_mask;
198 		if (atomic_exchange_complete32(target, prev, value, ord)) {
199 			return TRUE;
200 		}
201 	}
202 }
203 
204 /*
205  *	Portable lock package implementation of usimple_locks.
206  */
207 
208 #if     USLOCK_DEBUG
209 #define USLDBG(stmt)    stmt
210 void            usld_lock_init(usimple_lock_t, unsigned short);
211 void            usld_lock_pre(usimple_lock_t, pc_t);
212 void            usld_lock_post(usimple_lock_t, pc_t);
213 void            usld_unlock(usimple_lock_t, pc_t);
214 void            usld_lock_try_pre(usimple_lock_t, pc_t);
215 void            usld_lock_try_post(usimple_lock_t, pc_t);
216 int             usld_lock_common_checks(usimple_lock_t, char *);
217 #else   /* USLOCK_DEBUG */
218 #define USLDBG(stmt)
219 #endif  /* USLOCK_DEBUG */
220 
221 #if LCK_MTX_USE_ARCH
222 /*
223  * Forward definitions
224  */
225 
226 static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state);
227 static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
228 static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
229 static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
230 static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
231 static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
232 #endif /* LCK_MTX_USE_ARCH */
233 
234 
235 /*
236  *      Routine:        lck_spin_alloc_init
237  */
238 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)239 lck_spin_alloc_init(
240 	lck_grp_t       *grp,
241 	lck_attr_t      *attr)
242 {
243 	lck_spin_t *lck;
244 
245 	lck = zalloc(KT_LCK_SPIN);
246 	lck_spin_init(lck, grp, attr);
247 	return lck;
248 }
249 
250 /*
251  *      Routine:        lck_spin_free
252  */
253 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)254 lck_spin_free(
255 	lck_spin_t      *lck,
256 	lck_grp_t       *grp)
257 {
258 	lck_spin_destroy(lck, grp);
259 	zfree(KT_LCK_SPIN, lck);
260 }
261 
262 /*
263  *      Routine:        lck_spin_init
264  */
265 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)266 lck_spin_init(
267 	lck_spin_t      *lck,
268 	lck_grp_t       *grp,
269 	__unused lck_attr_t     *attr)
270 {
271 	usimple_lock_init((usimple_lock_t) lck, 0);
272 	if (grp) {
273 		lck_grp_reference(grp, &grp->lck_grp_spincnt);
274 	}
275 }
276 
277 /*
278  *      Routine:        lck_spin_destroy
279  */
280 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)281 lck_spin_destroy(
282 	lck_spin_t      *lck,
283 	lck_grp_t       *grp)
284 {
285 	if (lck->interlock == LCK_SPIN_TAG_DESTROYED) {
286 		return;
287 	}
288 	lck->interlock = LCK_SPIN_TAG_DESTROYED;
289 	if (grp) {
290 		lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
291 	}
292 	return;
293 }
294 
295 /*
296  *      Routine:        lck_spin_lock
297  */
298 void
lck_spin_lock_grp(lck_spin_t * lck,lck_grp_t * grp)299 lck_spin_lock_grp(
300 	lck_spin_t      *lck,
301 	lck_grp_t       *grp)
302 {
303 #pragma unused(grp)
304 	usimple_lock((usimple_lock_t) lck, grp);
305 }
306 
307 void
lck_spin_lock(lck_spin_t * lck)308 lck_spin_lock(
309 	lck_spin_t      *lck)
310 {
311 	usimple_lock((usimple_lock_t) lck, NULL);
312 }
313 
314 void
lck_spin_lock_nopreempt(lck_spin_t * lck)315 lck_spin_lock_nopreempt(
316 	lck_spin_t      *lck)
317 {
318 	usimple_lock_nopreempt((usimple_lock_t) lck, NULL);
319 }
320 
321 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)322 lck_spin_lock_nopreempt_grp(
323 	lck_spin_t      *lck,
324 	lck_grp_t       *grp)
325 {
326 #pragma unused(grp)
327 	usimple_lock_nopreempt((usimple_lock_t) lck, grp);
328 }
329 
330 /*
331  *      Routine:        lck_spin_unlock
332  */
333 void
lck_spin_unlock(lck_spin_t * lck)334 lck_spin_unlock(
335 	lck_spin_t      *lck)
336 {
337 	usimple_unlock((usimple_lock_t) lck);
338 }
339 
340 void
lck_spin_unlock_nopreempt(lck_spin_t * lck)341 lck_spin_unlock_nopreempt(
342 	lck_spin_t      *lck)
343 {
344 	usimple_unlock_nopreempt((usimple_lock_t) lck);
345 }
346 
347 boolean_t
lck_spin_try_lock_grp(lck_spin_t * lck,lck_grp_t * grp)348 lck_spin_try_lock_grp(
349 	lck_spin_t      *lck,
350 	lck_grp_t       *grp)
351 {
352 #pragma unused(grp)
353 	boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
354 #if     DEVELOPMENT || DEBUG
355 	if (lrval) {
356 		pltrace(FALSE);
357 	}
358 #endif
359 	return lrval;
360 }
361 
362 
363 /*
364  *      Routine:        lck_spin_try_lock
365  */
366 boolean_t
lck_spin_try_lock(lck_spin_t * lck)367 lck_spin_try_lock(
368 	lck_spin_t      *lck)
369 {
370 	boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
371 #if     DEVELOPMENT || DEBUG
372 	if (lrval) {
373 		pltrace(FALSE);
374 	}
375 #endif
376 	return lrval;
377 }
378 
379 int
lck_spin_try_lock_nopreempt(lck_spin_t * lck)380 lck_spin_try_lock_nopreempt(
381 	lck_spin_t      *lck)
382 {
383 	boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, LCK_GRP_NULL);
384 #if     DEVELOPMENT || DEBUG
385 	if (lrval) {
386 		pltrace(FALSE);
387 	}
388 #endif
389 	return lrval;
390 }
391 
392 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lck,lck_grp_t * grp)393 lck_spin_try_lock_nopreempt_grp(
394 	lck_spin_t      *lck,
395 	lck_grp_t       *grp)
396 {
397 #pragma unused(grp)
398 	boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, grp);
399 #if     DEVELOPMENT || DEBUG
400 	if (lrval) {
401 		pltrace(FALSE);
402 	}
403 #endif
404 	return lrval;
405 }
406 
407 /*
408  *	Routine:	lck_spin_assert
409  */
410 void
lck_spin_assert(const lck_spin_t * lock,unsigned int type)411 lck_spin_assert(const lck_spin_t *lock, unsigned int type)
412 {
413 	thread_t thread, holder;
414 	uintptr_t state;
415 
416 	if (__improbable(type != LCK_ASSERT_OWNED && type != LCK_ASSERT_NOTOWNED)) {
417 		panic("lck_spin_assert(): invalid arg (%u)", type);
418 	}
419 
420 	state = lock->interlock;
421 	holder = (thread_t)state;
422 	thread = current_thread();
423 	if (type == LCK_ASSERT_OWNED) {
424 		if (__improbable(holder == THREAD_NULL)) {
425 			panic("Lock not owned %p = %lx", lock, state);
426 		}
427 		if (__improbable(holder != thread)) {
428 			panic("Lock not owned by current thread %p = %lx", lock, state);
429 		}
430 	} else if (type == LCK_ASSERT_NOTOWNED) {
431 		if (__improbable(holder != THREAD_NULL)) {
432 			if (holder == thread) {
433 				panic("Lock owned by current thread %p = %lx", lock, state);
434 			}
435 		}
436 	}
437 }
438 
439 /*
440  *      Routine: kdp_lck_spin_is_acquired
441  *      NOT SAFE: To be used only by kernel debugger to avoid deadlock.
442  *      Returns: TRUE if lock is acquired.
443  */
444 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)445 kdp_lck_spin_is_acquired(lck_spin_t *lck)
446 {
447 	if (not_in_kdp) {
448 		panic("panic: spinlock acquired check done outside of kernel debugger");
449 	}
450 	return (lck->interlock != 0)? TRUE : FALSE;
451 }
452 
453 /*
454  *	Initialize a usimple_lock.
455  *
456  *	No change in preemption state.
457  */
458 void
usimple_lock_init(usimple_lock_t l,__unused unsigned short tag)459 usimple_lock_init(
460 	usimple_lock_t  l,
461 	__unused unsigned short tag)
462 {
463 	USLDBG(usld_lock_init(l, tag));
464 	hw_lock_init(&l->interlock);
465 }
466 
467 static hw_spin_timeout_status_t
usimple_lock_acquire_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)468 usimple_lock_acquire_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
469 {
470 	usimple_lock_t l = _lock;
471 	uintptr_t lowner;
472 	lck_spinlock_to_info_t lsti;
473 
474 	if (machine_timeout_suspended()) {
475 		return HW_LOCK_TIMEOUT_CONTINUE;
476 	}
477 
478 	lowner = (uintptr_t)l->interlock.lock_data;
479 	lsti = lck_spinlock_timeout_hit(l, lowner);
480 
481 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT " ;"
482 	    "lock owner thread=0x%lx, current_thread: %p, "
483 	    "lock owner active on CPU %d, "
484 	    HW_SPIN_TIMEOUT_DETAILS_FMT,
485 	    l, HW_SPIN_TIMEOUT_ARG(to, st),
486 	    lowner, current_thread(), lsti->owner_cpu,
487 	    HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
488 }
489 
490 static const struct hw_spin_policy usimple_lock_spin_policy = {
491 	.hwsp_name              = "usimple_lock_t",
492 	.hwsp_timeout           = &LockTimeOutTSC,
493 	.hwsp_op_timeout        = usimple_lock_acquire_timeout_panic,
494 };
495 
496 /*
497  *	Acquire a usimple_lock.
498  *
499  *	Returns with preemption disabled.  Note
500  *	that the hw_lock routines are responsible for
501  *	maintaining preemption state.
502  */
503 void
504 (usimple_lock)(
505 	usimple_lock_t  l
506 	LCK_GRP_ARG(lck_grp_t *grp))
507 {
508 	DECL_PC(pc);
509 
510 	OBTAIN_PC(pc);
511 	USLDBG(usld_lock_pre(l, pc));
512 
513 	(void)hw_lock_to(&l->interlock, &usimple_lock_spin_policy, grp);
514 #if DEVELOPMENT || DEBUG
515 	pltrace(FALSE);
516 #endif
517 
518 	USLDBG(usld_lock_post(l, pc));
519 #if CONFIG_DTRACE
520 	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
521 #endif
522 }
523 
524 /*
525  *	Acquire a usimple_lock_nopreempt
526  *
527  *	Called and returns with preemption disabled.  Note
528  *	that the hw_lock routines are responsible for
529  *	maintaining preemption state.
530  */
531 static void
usimple_lock_nopreempt(usimple_lock_t l,lck_grp_t * grp)532 usimple_lock_nopreempt(
533 	usimple_lock_t  l,
534 	lck_grp_t *grp)
535 {
536 	DECL_PC(pc);
537 
538 	OBTAIN_PC(pc);
539 	USLDBG(usld_lock_pre(l, pc));
540 
541 	(void)hw_lock_to_nopreempt(&l->interlock, &usimple_lock_spin_policy, grp);
542 
543 #if DEVELOPMENT || DEBUG
544 	pltrace(FALSE);
545 #endif
546 
547 	USLDBG(usld_lock_post(l, pc));
548 #if CONFIG_DTRACE
549 	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
550 #endif
551 }
552 
553 
554 /*
555  *	Release a usimple_lock.
556  *
557  *	Returns with preemption enabled.  Note
558  *	that the hw_lock routines are responsible for
559  *	maintaining preemption state.
560  */
561 void
usimple_unlock(usimple_lock_t l)562 usimple_unlock(
563 	usimple_lock_t  l)
564 {
565 	DECL_PC(pc);
566 
567 	OBTAIN_PC(pc);
568 	USLDBG(usld_unlock(l, pc));
569 #if DEVELOPMENT || DEBUG
570 	pltrace(TRUE);
571 #endif
572 	hw_lock_unlock(&l->interlock);
573 }
574 
575 /*
576  *	Release a usimple_unlock_nopreempt.
577  *
578  *	Called and returns with preemption enabled.  Note
579  *	that the hw_lock routines are responsible for
580  *	maintaining preemption state.
581  */
582 void
usimple_unlock_nopreempt(usimple_lock_t l)583 usimple_unlock_nopreempt(
584 	usimple_lock_t  l)
585 {
586 	DECL_PC(pc);
587 
588 	OBTAIN_PC(pc);
589 	USLDBG(usld_unlock(l, pc));
590 #if DEVELOPMENT || DEBUG
591 	pltrace(TRUE);
592 #endif
593 	hw_lock_unlock_nopreempt(&l->interlock);
594 }
595 
596 /*
597  *	Conditionally acquire a usimple_lock.
598  *
599  *	On success, returns with preemption disabled.
600  *	On failure, returns with preemption in the same state
601  *	as when first invoked.  Note that the hw_lock routines
602  *	are responsible for maintaining preemption state.
603  *
604  *	XXX No stats are gathered on a miss; I preserved this
605  *	behavior from the original assembly-language code, but
606  *	doesn't it make sense to log misses?  XXX
607  */
608 unsigned int
usimple_lock_try(usimple_lock_t l,lck_grp_t * grp)609 usimple_lock_try(
610 	usimple_lock_t  l,
611 	lck_grp_t *grp)
612 {
613 	unsigned int    success;
614 	DECL_PC(pc);
615 
616 	OBTAIN_PC(pc);
617 	USLDBG(usld_lock_try_pre(l, pc));
618 	if ((success = hw_lock_try(&l->interlock, grp))) {
619 #if DEVELOPMENT || DEBUG
620 		pltrace(FALSE);
621 #endif
622 		USLDBG(usld_lock_try_post(l, pc));
623 	}
624 	return success;
625 }
626 
627 void
usimple_lock_assert(usimple_lock_t l,unsigned int type)628 usimple_lock_assert(usimple_lock_t l, unsigned int type)
629 {
630 	hw_lock_assert(&l->interlock, type);
631 }
632 
633 /*
634  *	Conditionally acquire a usimple_lock.
635  *
636  *	Called and returns with preemption disabled.  Note
637  *	that the hw_lock routines are responsible for
638  *	maintaining preemption state.
639  *
640  *	XXX No stats are gathered on a miss; I preserved this
641  *	behavior from the original assembly-language code, but
642  *	doesn't it make sense to log misses?  XXX
643  */
644 static unsigned int
usimple_lock_try_nopreempt(usimple_lock_t l,lck_grp_t * grp)645 usimple_lock_try_nopreempt(
646 	usimple_lock_t  l,
647 	lck_grp_t *grp)
648 {
649 	unsigned int    success;
650 	DECL_PC(pc);
651 
652 	OBTAIN_PC(pc);
653 	USLDBG(usld_lock_try_pre(l, pc));
654 	if ((success = hw_lock_try_nopreempt(&l->interlock, grp))) {
655 #if DEVELOPMENT || DEBUG
656 		pltrace(FALSE);
657 #endif
658 		USLDBG(usld_lock_try_post(l, pc));
659 	}
660 	return success;
661 }
662 
663 /*
664  * Acquire a usimple_lock while polling for pending cpu signals
665  * and spinning on a lock.
666  *
667  */
668 unsigned
669 int
670 (usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
671     uint64_t deadline
672     LCK_GRP_ARG(lck_grp_t *grp))
673 {
674 	boolean_t istate = ml_get_interrupts_enabled();
675 
676 	if (deadline < mach_absolute_time()) {
677 		return 0;
678 	}
679 
680 	while (!simple_lock_try(l, grp)) {
681 		if (!istate) {
682 			cpu_signal_handler(NULL);
683 		}
684 
685 		if (deadline < mach_absolute_time()) {
686 			return 0;
687 		}
688 
689 		cpu_pause();
690 	}
691 
692 	return 1;
693 }
694 
695 void
696 (usimple_lock_try_lock_loop)(usimple_lock_t l
697     LCK_GRP_ARG(lck_grp_t *grp))
698 {
699 	/* When the lock is not contended, grab the lock and go. */
700 	if (!simple_lock_try(l, grp)) {
701 		usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
702 	}
703 }
704 
705 unsigned
706 int
707 (usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
708     uint64_t duration
709     LCK_GRP_ARG(lck_grp_t *grp))
710 {
711 	uint64_t deadline;
712 	uint64_t base_at;
713 	uint64_t duration_at;
714 
715 	/* Fast track for uncontended locks */
716 	if (simple_lock_try(l, grp)) {
717 		return 1;
718 	}
719 
720 	base_at = mach_absolute_time();
721 
722 	nanoseconds_to_absolutetime(duration, &duration_at);
723 	deadline = base_at + duration_at;
724 	if (deadline < base_at) {
725 		/* deadline has overflowed, make it saturate */
726 		deadline = ULLONG_MAX;
727 	}
728 
729 	return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
730 }
731 
732 #if     USLOCK_DEBUG
733 /*
734  *	States of a usimple_lock.  The default when initializing
735  *	a usimple_lock is setting it up for debug checking.
736  */
737 #define USLOCK_CHECKED          0x0001          /* lock is being checked */
738 #define USLOCK_TAKEN            0x0002          /* lock has been taken */
739 #define USLOCK_INIT             0xBAA0          /* lock has been initialized */
740 #define USLOCK_INITIALIZED      (USLOCK_INIT|USLOCK_CHECKED)
741 #define USLOCK_CHECKING(l)      (uslock_check &&                        \
742 	                         ((l)->debug.state & USLOCK_CHECKED))
743 
744 /*
745  *	Initialize the debugging information contained
746  *	in a usimple_lock.
747  */
748 void
usld_lock_init(usimple_lock_t l,__unused unsigned short tag)749 usld_lock_init(
750 	usimple_lock_t  l,
751 	__unused unsigned short tag)
752 {
753 	if (l == USIMPLE_LOCK_NULL) {
754 		panic("lock initialization:  null lock pointer");
755 	}
756 	l->lock_type = USLOCK_TAG;
757 	l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
758 	l->debug.lock_cpu = l->debug.unlock_cpu = 0;
759 	l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
760 	l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
761 	l->debug.duration[0] = l->debug.duration[1] = 0;
762 	l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
763 	l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
764 	l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
765 }
766 
767 
768 /*
769  *	These checks apply to all usimple_locks, not just
770  *	those with USLOCK_CHECKED turned on.
771  */
772 int
usld_lock_common_checks(usimple_lock_t l,char * caller)773 usld_lock_common_checks(
774 	usimple_lock_t  l,
775 	char            *caller)
776 {
777 	if (l == USIMPLE_LOCK_NULL) {
778 		panic("%s:  null lock pointer", caller);
779 	}
780 	if (l->lock_type != USLOCK_TAG) {
781 		panic("%s:  %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
782 	}
783 	if (!(l->debug.state & USLOCK_INIT)) {
784 		panic("%s:  %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
785 	}
786 	return USLOCK_CHECKING(l);
787 }
788 
789 
790 /*
791  *	Debug checks on a usimple_lock just before attempting
792  *	to acquire it.
793  */
794 /* ARGSUSED */
795 void
usld_lock_pre(usimple_lock_t l,pc_t pc)796 usld_lock_pre(
797 	usimple_lock_t  l,
798 	pc_t            pc)
799 {
800 	char    caller[] = "usimple_lock";
801 
802 
803 	if (!usld_lock_common_checks(l, caller)) {
804 		return;
805 	}
806 
807 /*
808  *	Note that we have a weird case where we are getting a lock when we are]
809  *	in the process of putting the system to sleep. We are running with no
810  *	current threads, therefore we can't tell if we are trying to retake a lock
811  *	we have or someone on the other processor has it.  Therefore we just
812  *	ignore this test if the locking thread is 0.
813  */
814 
815 	if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
816 	    l->debug.lock_thread == (void *) current_thread()) {
817 		printf("%s:  lock %p already locked (at %p) by",
818 		    caller, l, l->debug.lock_pc);
819 		printf(" current thread %p (new attempt at pc %p)\n",
820 		    l->debug.lock_thread, pc);
821 		panic("%s", caller);
822 	}
823 	mp_disable_preemption();
824 	mp_enable_preemption();
825 }
826 
827 
828 /*
829  *	Debug checks on a usimple_lock just after acquiring it.
830  *
831  *	Pre-emption has been disabled at this point,
832  *	so we are safe in using cpu_number.
833  */
834 void
usld_lock_post(usimple_lock_t l,pc_t pc)835 usld_lock_post(
836 	usimple_lock_t  l,
837 	pc_t            pc)
838 {
839 	unsigned int mycpu;
840 	char    caller[] = "successful usimple_lock";
841 
842 
843 	if (!usld_lock_common_checks(l, caller)) {
844 		return;
845 	}
846 
847 	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
848 		panic("%s:  lock %p became uninitialized",
849 		    caller, l);
850 	}
851 	if ((l->debug.state & USLOCK_TAKEN)) {
852 		panic("%s:  lock 0x%p became TAKEN by someone else",
853 		    caller, l);
854 	}
855 
856 	mycpu = (unsigned int)cpu_number();
857 	assert(mycpu <= UCHAR_MAX);
858 
859 	l->debug.lock_thread = (void *)current_thread();
860 	l->debug.state |= USLOCK_TAKEN;
861 	l->debug.lock_pc = pc;
862 	l->debug.lock_cpu = (unsigned char)mycpu;
863 }
864 
865 
866 /*
867  *	Debug checks on a usimple_lock just before
868  *	releasing it.  Note that the caller has not
869  *	yet released the hardware lock.
870  *
871  *	Preemption is still disabled, so there's
872  *	no problem using cpu_number.
873  */
874 void
usld_unlock(usimple_lock_t l,pc_t pc)875 usld_unlock(
876 	usimple_lock_t  l,
877 	pc_t            pc)
878 {
879 	unsigned int mycpu;
880 	char    caller[] = "usimple_unlock";
881 
882 
883 	if (!usld_lock_common_checks(l, caller)) {
884 		return;
885 	}
886 
887 	mycpu = cpu_number();
888 	assert(mycpu <= UCHAR_MAX);
889 
890 	if (!(l->debug.state & USLOCK_TAKEN)) {
891 		panic("%s:  lock 0x%p hasn't been taken",
892 		    caller, l);
893 	}
894 	if (l->debug.lock_thread != (void *) current_thread()) {
895 		panic("%s:  unlocking lock 0x%p, owned by thread %p",
896 		    caller, l, l->debug.lock_thread);
897 	}
898 	if (l->debug.lock_cpu != mycpu) {
899 		printf("%s:  unlocking lock 0x%p on cpu 0x%x",
900 		    caller, l, mycpu);
901 		printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
902 		panic("%s", caller);
903 	}
904 
905 	l->debug.unlock_thread = l->debug.lock_thread;
906 	l->debug.lock_thread = INVALID_PC;
907 	l->debug.state &= ~USLOCK_TAKEN;
908 	l->debug.unlock_pc = pc;
909 	l->debug.unlock_cpu = (unsigned char)mycpu;
910 }
911 
912 
913 /*
914  *	Debug checks on a usimple_lock just before
915  *	attempting to acquire it.
916  *
917  *	Preemption isn't guaranteed to be disabled.
918  */
919 void
usld_lock_try_pre(usimple_lock_t l,__unused pc_t pc)920 usld_lock_try_pre(
921 	usimple_lock_t  l,
922 	__unused pc_t   pc)
923 {
924 	char    caller[] = "usimple_lock_try";
925 
926 	if (!usld_lock_common_checks(l, caller)) {
927 		return;
928 	}
929 }
930 
931 
932 /*
933  *	Debug checks on a usimple_lock just after
934  *	successfully attempting to acquire it.
935  *
936  *	Preemption has been disabled by the
937  *	lock acquisition attempt, so it's safe
938  *	to use cpu_number.
939  */
940 void
usld_lock_try_post(usimple_lock_t l,pc_t pc)941 usld_lock_try_post(
942 	usimple_lock_t  l,
943 	pc_t            pc)
944 {
945 	unsigned int mycpu;
946 	char    caller[] = "successful usimple_lock_try";
947 
948 	if (!usld_lock_common_checks(l, caller)) {
949 		return;
950 	}
951 
952 	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
953 		panic("%s:  lock 0x%p became uninitialized",
954 		    caller, l);
955 	}
956 	if ((l->debug.state & USLOCK_TAKEN)) {
957 		panic("%s:  lock 0x%p became TAKEN by someone else",
958 		    caller, l);
959 	}
960 
961 	mycpu = cpu_number();
962 	assert(mycpu <= UCHAR_MAX);
963 
964 	l->debug.lock_thread = (void *) current_thread();
965 	l->debug.state |= USLOCK_TAKEN;
966 	l->debug.lock_pc = pc;
967 	l->debug.lock_cpu = (unsigned char)mycpu;
968 }
969 #endif  /* USLOCK_DEBUG */
970 #if LCK_MTX_USE_ARCH
971 
972 /*
973  * Slow path routines for lck_mtx locking and unlocking functions.
974  *
975  * These functions were previously implemented in x86 assembly,
976  * and some optimizations are in place in this c code to obtain a compiled code
977  * as performant and compact as the assembly version.
978  *
979  * To avoid to inline these functions on the fast path, all functions directly called by
980  * the fast paths have the __attribute__((noinline)) specified. Also they are all implemented
981  * in such a way the fast path can tail call into them. In this way the return address
982  * does not need to be pushed on the caller stack and stack optimization can happen on the caller.
983  *
984  * Slow path code is structured in such a way there are no calls to functions that will return
985  * on the context of the caller function, i.e. all functions called are or tail call functions
986  * or inline functions. The number of arguments of the tail call functions are less then six,
987  * so that they can be passed over registers and do not need to be pushed on stack.
988  * This allows the compiler to not create a stack frame for the functions.
989  *
990  * __improbable and __probable are used to compile the slow path code in such a way
991  * the fast path case will be on a sequence of instructions with as less jumps as possible,
992  * to make this case the most optimized even if falling through the slow path.
993  */
994 
995 /*
996  * Intel lock invariants:
997  *
998  * lck_mtx_waiters: contains the count of threads currently in the mutex waitqueue
999  *
1000  * The lock owner is promoted to the max priority of all its waiters only if it
1001  * was a lower priority when it acquired or was an owner when a waiter waited.
1002  * Max priority is capped at MAXPRI_PROMOTE.
1003  *
1004  * The last waiter will not be promoted as it is woken up, but the last
1005  * lock owner may not have been the last thread to have been woken up depending on the
1006  * luck of the draw.  Therefore a last-owner may still have the promoted-on-wakeup
1007  * flag set.
1008  *
1009  * TODO: Figure out an algorithm for stopping a lock holder which is already at the right
1010  *       priority from dropping priority in the future without having to take thread lock
1011  *       on acquire.
1012  */
1013 
1014 /*
1015  *      Routine:        lck_mtx_alloc_init
1016  */
1017 lck_mtx_t *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)1018 lck_mtx_alloc_init(
1019 	lck_grp_t       *grp,
1020 	lck_attr_t      *attr)
1021 {
1022 	lck_mtx_t *lck;
1023 
1024 	lck = zalloc(KT_LCK_MTX);
1025 	lck_mtx_init(lck, grp, attr);
1026 	return lck;
1027 }
1028 
1029 /*
1030  *      Routine:        lck_mtx_free
1031  */
1032 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)1033 lck_mtx_free(
1034 	lck_mtx_t       *lck,
1035 	lck_grp_t       *grp)
1036 {
1037 	lck_mtx_destroy(lck, grp);
1038 	zfree(KT_LCK_MTX, lck);
1039 }
1040 
1041 /*
1042  *      Routine:        lck_mtx_init
1043  */
1044 void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)1045 lck_mtx_init(
1046 	lck_mtx_t       *lck,
1047 	lck_grp_t       *grp,
1048 	lck_attr_t      *attr)
1049 {
1050 	if (attr == LCK_ATTR_NULL) {
1051 		attr = &lck_attr_default;
1052 	}
1053 
1054 	*lck = (lck_mtx_t){
1055 		.lck_mtx_grp = grp->lck_grp_attr_id,
1056 	};
1057 
1058 	if (__improbable((attr->lck_attr_val & LCK_ATTR_DEBUG) ||
1059 	    (grp->lck_grp_attr_id & LCK_GRP_ATTR_DEBUG))) {
1060 		lck->lck_mtx_profile = 1;
1061 	}
1062 
1063 	lck_grp_reference(grp, &grp->lck_grp_mtxcnt);
1064 }
1065 
1066 /*
1067  *      Routine:        lck_mtx_destroy
1068  */
1069 void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)1070 lck_mtx_destroy(
1071 	lck_mtx_t       *lck,
1072 	lck_grp_t       *grp)
1073 {
1074 	if (lck->lck_mtx_state == LCK_MTX_TAG_DESTROYED) {
1075 		return;
1076 	}
1077 #if MACH_LDEBUG
1078 	lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1079 #endif
1080 	ordered_store_mtx_state_release(lck, LCK_MTX_TAG_DESTROYED);
1081 	lck_grp_deallocate(grp, &grp->lck_grp_mtxcnt);
1082 }
1083 
1084 
1085 #if DEVELOPMENT | DEBUG
1086 __attribute__((noinline))
1087 void
lck_mtx_owner_check_panic(lck_mtx_t * lock)1088 lck_mtx_owner_check_panic(
1089 	lck_mtx_t       *lock)
1090 {
1091 	thread_t owner = ctid_get_thread_unsafe(lock->lck_mtx_owner);
1092 	panic("Mutex unlock attempted from non-owner thread. Owner=%p lock=%p", owner, lock);
1093 }
1094 #endif
1095 
1096 /*
1097  * Routine:     lck_mtx_unlock_slow
1098  *
1099  * Unlocks a mutex held by current thread.
1100  *
1101  * It will wake up waiters if necessary.
1102  *
1103  * Interlock can be held.
1104  */
1105 __attribute__((noinline))
1106 void
lck_mtx_unlock_slow(lck_mtx_t * lock)1107 lck_mtx_unlock_slow(
1108 	lck_mtx_t       *lock)
1109 {
1110 	thread_t        thread;
1111 	uint32_t        state;
1112 
1113 	state = ordered_load_mtx_state(lock);
1114 
1115 	thread = current_thread();
1116 
1117 #if DEVELOPMENT | DEBUG
1118 	if (__improbable(lock->lck_mtx_owner != thread->ctid)) {
1119 		lck_mtx_owner_check_panic(lock);
1120 	}
1121 #endif
1122 
1123 	/* check if it is held as a spinlock */
1124 	if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0)) {
1125 		goto unlock;
1126 	}
1127 
1128 	lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
1129 
1130 unlock:
1131 	/* preemption disabled, interlock held and mutex not held */
1132 
1133 	/* clear owner */
1134 	ordered_store_mtx_owner(lock, 0);
1135 
1136 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1137 		return lck_mtx_unlock_wakeup_tail(lock, state);
1138 	}
1139 
1140 	/* release interlock, promotion and clear spin flag */
1141 	state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
1142 	ordered_store_mtx_state_release(lock, state);           /* since I own the interlock, I don't need an atomic update */
1143 
1144 	/* re-enable preemption */
1145 	lck_mtx_unlock_finish_inline(lock, state);
1146 }
1147 
1148 #define LCK_MTX_LCK_WAIT_CODE           0x20
1149 #define LCK_MTX_LCK_WAKEUP_CODE         0x21
1150 #define LCK_MTX_LCK_SPIN_CODE           0x22
1151 #define LCK_MTX_LCK_ACQUIRE_CODE        0x23
1152 #define LCK_MTX_LCK_DEMOTE_CODE         0x24
1153 
1154 /*
1155  * Routine:    lck_mtx_unlock_wakeup_tail
1156  *
1157  * Invoked on unlock when there is
1158  * contention, i.e. the assembly routine sees
1159  * that mutex->lck_mtx_waiters != 0
1160  *
1161  * neither the mutex or interlock is held
1162  *
1163  * Note that this routine might not be called if there are pending
1164  * waiters which have previously been woken up, and they didn't
1165  * end up boosting the old owner.
1166  *
1167  * assembly routine previously did the following to mutex:
1168  * (after saving the state in prior_lock_state)
1169  *      decremented lck_mtx_waiters if nonzero
1170  *
1171  * This function needs to be called as a tail call
1172  * to optimize the compiled code.
1173  */
1174 __attribute__((noinline))
1175 static void
lck_mtx_unlock_wakeup_tail(lck_mtx_t * mutex,uint32_t state)1176 lck_mtx_unlock_wakeup_tail(
1177 	lck_mtx_t       *mutex,
1178 	uint32_t        state)
1179 {
1180 	struct turnstile *ts;
1181 
1182 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1183 	kern_return_t did_wake;
1184 
1185 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
1186 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1187 
1188 	ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1189 
1190 	did_wake = waitq_wakeup64_one(&ts->ts_waitq, LCK_MTX_EVENT(mutex),
1191 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
1192 	assert(did_wake == KERN_SUCCESS);
1193 
1194 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1195 	turnstile_complete_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1196 
1197 	state -= LCK_MTX_WAITER;
1198 	state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
1199 	ordered_store_mtx_state_release(mutex, state);
1200 
1201 	assert(current_thread()->turnstile != NULL);
1202 
1203 	turnstile_cleanup();
1204 
1205 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
1206 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1207 
1208 	lck_mtx_unlock_finish_inline(mutex, state);
1209 }
1210 
1211 /*
1212  * Routine:     lck_mtx_lock_acquire_x86
1213  *
1214  * Invoked on acquiring the mutex when there is
1215  * contention (i.e. the assembly routine sees that
1216  * that mutex->lck_mtx_waiters != 0
1217  *
1218  * mutex is owned...  interlock is held... preemption is disabled
1219  */
1220 __attribute__((always_inline))
1221 static void
lck_mtx_lock_acquire_inline(lck_mtx_t * mutex,struct turnstile * ts)1222 lck_mtx_lock_acquire_inline(
1223 	lck_mtx_t       *mutex,
1224 	struct turnstile *ts)
1225 {
1226 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1227 
1228 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
1229 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1230 
1231 	if (mutex->lck_mtx_waiters > 0) {
1232 		if (ts == NULL) {
1233 			ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1234 		}
1235 
1236 		turnstile_update_inheritor(ts, current_thread(),
1237 		    TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
1238 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1239 	}
1240 
1241 	if (ts != NULL) {
1242 		turnstile_complete_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
1243 	}
1244 
1245 	assert(current_thread()->turnstile != NULL);
1246 
1247 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
1248 	    trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
1249 }
1250 
1251 void
lck_mtx_lock_acquire_x86(lck_mtx_t * mutex)1252 lck_mtx_lock_acquire_x86(
1253 	lck_mtx_t       *mutex)
1254 {
1255 	return lck_mtx_lock_acquire_inline(mutex, NULL);
1256 }
1257 
1258 /*
1259  * Tail call helpers for lock functions that perform
1260  * lck_mtx_lock_acquire followed by the caller's finish routine, to optimize
1261  * the caller's compiled code.
1262  */
1263 
1264 __attribute__((noinline))
1265 static void
lck_mtx_lock_acquire_tail(lck_mtx_t * mutex,struct turnstile * ts)1266 lck_mtx_lock_acquire_tail(
1267 	lck_mtx_t       *mutex,
1268 	struct turnstile *ts)
1269 {
1270 	lck_mtx_lock_acquire_inline(mutex, ts);
1271 	lck_mtx_lock_finish_inline_with_cleanup(mutex, ordered_load_mtx_state(mutex));
1272 }
1273 
1274 __attribute__((noinline))
1275 static boolean_t
lck_mtx_try_lock_acquire_tail(lck_mtx_t * mutex)1276 lck_mtx_try_lock_acquire_tail(
1277 	lck_mtx_t       *mutex)
1278 {
1279 	lck_mtx_lock_acquire_inline(mutex, NULL);
1280 	lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
1281 
1282 	return TRUE;
1283 }
1284 
1285 __attribute__((noinline))
1286 static void
lck_mtx_convert_spin_acquire_tail(lck_mtx_t * mutex)1287 lck_mtx_convert_spin_acquire_tail(
1288 	lck_mtx_t       *mutex)
1289 {
1290 	lck_mtx_lock_acquire_inline(mutex, NULL);
1291 	lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
1292 }
1293 
1294 static void
lck_mtx_ilk_unlock(lck_mtx_t * mutex)1295 lck_mtx_ilk_unlock(
1296 	lck_mtx_t       *mutex)
1297 {
1298 	lck_mtx_ilk_unlock_inline(mutex, ordered_load_mtx_state(mutex));
1299 }
1300 
1301 static inline void
lck_mtx_interlock_lock_set_and_clear_flags(lck_mtx_t * mutex,uint32_t xor_flags,uint32_t and_flags,uint32_t * new_state)1302 lck_mtx_interlock_lock_set_and_clear_flags(
1303 	lck_mtx_t *mutex,
1304 	uint32_t xor_flags,
1305 	uint32_t and_flags,
1306 	uint32_t *new_state)
1307 {
1308 	uint32_t state, prev;
1309 	state = *new_state;
1310 
1311 	for (;;) {
1312 		/* have to wait for interlock to clear */
1313 		while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
1314 			cpu_pause();
1315 			state = ordered_load_mtx_state(mutex);
1316 		}
1317 		prev = state;                                   /* prev contains snapshot for exchange */
1318 		state |= LCK_MTX_ILOCKED_MSK | xor_flags;       /* pick up interlock */
1319 		state &= ~and_flags;                            /* clear flags */
1320 
1321 		disable_preemption();
1322 		if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1323 			break;
1324 		}
1325 		enable_preemption();
1326 		cpu_pause();
1327 		state = ordered_load_mtx_state(mutex);
1328 	}
1329 	*new_state = state;
1330 	return;
1331 }
1332 
1333 static inline void
lck_mtx_interlock_lock_clear_flags(lck_mtx_t * mutex,uint32_t and_flags,uint32_t * new_state)1334 lck_mtx_interlock_lock_clear_flags(
1335 	lck_mtx_t *mutex,
1336 	uint32_t and_flags,
1337 	uint32_t *new_state)
1338 {
1339 	return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, and_flags, new_state);
1340 }
1341 
1342 static inline void
lck_mtx_interlock_lock(lck_mtx_t * mutex,uint32_t * new_state)1343 lck_mtx_interlock_lock(
1344 	lck_mtx_t *mutex,
1345 	uint32_t *new_state)
1346 {
1347 	return lck_mtx_interlock_lock_set_and_clear_flags(mutex, 0, 0, new_state);
1348 }
1349 
1350 static inline int
lck_mtx_interlock_try_lock_set_flags(lck_mtx_t * mutex,uint32_t or_flags,uint32_t * new_state)1351 lck_mtx_interlock_try_lock_set_flags(
1352 	lck_mtx_t *mutex,
1353 	uint32_t or_flags,
1354 	uint32_t *new_state)
1355 {
1356 	uint32_t state, prev;
1357 	state = *new_state;
1358 
1359 	/* have to wait for interlock to clear */
1360 	if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
1361 		return 0;
1362 	}
1363 	prev = state;                                   /* prev contains snapshot for exchange */
1364 	state |= LCK_MTX_ILOCKED_MSK | or_flags;        /* pick up interlock */
1365 	disable_preemption();
1366 	if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
1367 		*new_state = state;
1368 		return 1;
1369 	}
1370 
1371 	enable_preemption();
1372 	return 0;
1373 }
1374 
1375 __attribute__((noinline))
1376 static void
lck_mtx_lock_contended(lck_mtx_t * lock,bool profile,boolean_t * first_miss)1377 lck_mtx_lock_contended(
1378 	lck_mtx_t *lock,
1379 	bool       profile,
1380 	boolean_t *first_miss)
1381 {
1382 	lck_mtx_spinwait_ret_type_t ret;
1383 	uint32_t state;
1384 	thread_t thread;
1385 	struct turnstile *ts = NULL;
1386 
1387 try_again:
1388 
1389 	if (profile) {
1390 		LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, first_miss);
1391 	}
1392 
1393 	ret = lck_mtx_lock_spinwait_x86(lock);
1394 	state = ordered_load_mtx_state(lock);
1395 	switch (ret) {
1396 	case LCK_MTX_SPINWAIT_NO_SPIN:
1397 		/*
1398 		 * owner not on core, lck_mtx_lock_spinwait_x86 didn't even
1399 		 * try to spin.
1400 		 */
1401 		/* just fall through case LCK_MTX_SPINWAIT_SPUN */
1402 		OS_FALLTHROUGH;
1403 	case LCK_MTX_SPINWAIT_SPUN_HIGH_THR:
1404 	case LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE:
1405 	case LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION:
1406 	case LCK_MTX_SPINWAIT_SPUN_SLIDING_THR:
1407 		/*
1408 		 * mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
1409 		 * interlock not held
1410 		 */
1411 		lck_mtx_interlock_lock(lock, &state);
1412 		assert(state & LCK_MTX_ILOCKED_MSK);
1413 
1414 		if (state & LCK_MTX_MLOCKED_MSK) {
1415 			if (profile) {
1416 				LCK_MTX_PROF_WAIT(lock, lock->lck_mtx_grp,
1417 				    ret == LCK_MTX_SPINWAIT_NO_SPIN, first_miss);
1418 			}
1419 			lck_mtx_lock_wait_x86(lock, &ts);
1420 			/*
1421 			 * interlock is not held here.
1422 			 */
1423 			goto try_again;
1424 		} else {
1425 			/* grab the mutex */
1426 			state |= LCK_MTX_MLOCKED_MSK;
1427 			ordered_store_mtx_state_release(lock, state);
1428 			thread = current_thread();
1429 			ordered_store_mtx_owner(lock, thread->ctid);
1430 		}
1431 
1432 		break;
1433 	case LCK_MTX_SPINWAIT_ACQUIRED:
1434 		/*
1435 		 * mutex has been acquired by lck_mtx_lock_spinwait_x86
1436 		 * interlock is held and preemption disabled
1437 		 * owner is set and mutex marked as locked
1438 		 * statistics updated too
1439 		 */
1440 		break;
1441 	default:
1442 		panic("lck_mtx_lock_spinwait_x86 returned %d for mutex %p", ret, lock);
1443 	}
1444 
1445 	/*
1446 	 * interlock is already acquired here
1447 	 */
1448 
1449 	/* mutex has been acquired */
1450 	if (state & LCK_MTX_WAITERS_MSK) {
1451 		/*
1452 		 * lck_mtx_lock_acquire_tail will call
1453 		 * turnstile_complete.
1454 		 */
1455 		return lck_mtx_lock_acquire_tail(lock, ts);
1456 	}
1457 
1458 	if (ts != NULL) {
1459 		turnstile_complete_hash((uintptr_t)lock, TURNSTILE_KERNEL_MUTEX);
1460 	}
1461 
1462 	assert(current_thread()->turnstile != NULL);
1463 
1464 	/* release the interlock */
1465 	lck_mtx_lock_finish_inline_with_cleanup(lock, ordered_load_mtx_state(lock));
1466 }
1467 
1468 /*
1469  * Helper noinline functions for calling
1470  * panic to optimize compiled code.
1471  */
1472 
1473 __attribute__((noinline)) __abortlike
1474 static void
lck_mtx_destroyed(lck_mtx_t * lock)1475 lck_mtx_destroyed(
1476 	lck_mtx_t       *lock)
1477 {
1478 	panic("trying to interlock destroyed mutex (%p)", lock);
1479 }
1480 
1481 __attribute__((noinline))
1482 static boolean_t
lck_mtx_try_destroyed(lck_mtx_t * lock)1483 lck_mtx_try_destroyed(
1484 	lck_mtx_t       *lock)
1485 {
1486 	panic("trying to interlock destroyed mutex (%p)", lock);
1487 	return FALSE;
1488 }
1489 
1490 __attribute__((always_inline))
1491 static boolean_t
lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1492 lck_mtx_lock_wait_interlock_to_clear(
1493 	lck_mtx_t       *lock,
1494 	uint32_t*        new_state)
1495 {
1496 	uint32_t state;
1497 
1498 	for (;;) {
1499 		cpu_pause();
1500 		state = ordered_load_mtx_state(lock);
1501 		if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1502 			*new_state = state;
1503 			return TRUE;
1504 		}
1505 		if (state & LCK_MTX_MLOCKED_MSK) {
1506 			/* if it is held as mutex, just fail */
1507 			return FALSE;
1508 		}
1509 	}
1510 }
1511 
1512 __attribute__((always_inline))
1513 static boolean_t
lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t * lock,uint32_t * new_state)1514 lck_mtx_try_lock_wait_interlock_to_clear(
1515 	lck_mtx_t       *lock,
1516 	uint32_t*        new_state)
1517 {
1518 	uint32_t state;
1519 
1520 	for (;;) {
1521 		cpu_pause();
1522 		state = ordered_load_mtx_state(lock);
1523 		if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
1524 			/* if it is held as mutex or spin, just fail */
1525 			return FALSE;
1526 		}
1527 		if (!(state & LCK_MTX_ILOCKED_MSK)) {
1528 			*new_state = state;
1529 			return TRUE;
1530 		}
1531 	}
1532 }
1533 
1534 /*
1535  * Routine:	lck_mtx_lock_slow
1536  *
1537  * Locks a mutex for current thread.
1538  * If the lock is contended this function might
1539  * sleep.
1540  *
1541  * Called with interlock not held.
1542  */
1543 __attribute__((noinline))
1544 void
lck_mtx_lock_slow(lck_mtx_t * lock)1545 lck_mtx_lock_slow(
1546 	lck_mtx_t       *lock)
1547 {
1548 	bool            profile;
1549 	uint32_t        state;
1550 	int             first_miss = 0;
1551 
1552 	state = ordered_load_mtx_state(lock);
1553 	profile = (state & LCK_MTX_PROFILE_MSK);
1554 
1555 	if (__improbable(profile)) {
1556 		if (state & LCK_MTX_SPIN_MSK) {
1557 			/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1558 			assert(state & LCK_MTX_ILOCKED_MSK);
1559 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1560 		}
1561 	}
1562 
1563 	/* is the interlock or mutex held */
1564 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1565 		/*
1566 		 * Note: LCK_MTX_TAG_DESTROYED has
1567 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1568 		 */
1569 
1570 		/* is the mutex already held */
1571 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1572 			/* no, must have been the mutex */
1573 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1574 		}
1575 
1576 		/* check to see if it is marked destroyed */
1577 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1578 			lck_mtx_destroyed(lock);
1579 		}
1580 	}
1581 
1582 	/* no - can't be DESTROYED or locked */
1583 	while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1584 		if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1585 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1586 		}
1587 	}
1588 
1589 	/* lock and interlock acquired */
1590 
1591 	/* record owner of mutex */
1592 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1593 
1594 	/*
1595 	 * Check if there are waiters to
1596 	 * inherit their priority.
1597 	 */
1598 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1599 		return lck_mtx_lock_acquire_tail(lock, NULL);
1600 	}
1601 
1602 	/* release the interlock */
1603 	lck_mtx_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1604 }
1605 
1606 __attribute__((noinline))
1607 boolean_t
lck_mtx_try_lock_slow(lck_mtx_t * lock)1608 lck_mtx_try_lock_slow(
1609 	lck_mtx_t       *lock)
1610 {
1611 	bool            profile;
1612 	uint32_t        state;
1613 	int             first_miss = 0;
1614 
1615 	state = ordered_load_mtx_state(lock);
1616 	profile = (state & LCK_MTX_PROFILE_MSK);
1617 
1618 	/* is the interlock or mutex held */
1619 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1620 		/*
1621 		 * Note: LCK_MTX_TAG_DESTROYED has
1622 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1623 		 */
1624 
1625 		/* is the mutex already held */
1626 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1627 			return FALSE;
1628 		}
1629 
1630 		/* check to see if it is marked destroyed */
1631 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1632 			lck_mtx_try_destroyed(lock);
1633 		}
1634 
1635 		if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
1636 			if (__improbable(profile)) {
1637 				LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1638 			}
1639 			return FALSE;
1640 		}
1641 	}
1642 
1643 	if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
1644 		if (__improbable(profile)) {
1645 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1646 		}
1647 		return FALSE;
1648 	}
1649 
1650 	/* lock and interlock acquired */
1651 
1652 	/* record owner of mutex */
1653 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1654 
1655 	/*
1656 	 * Check if there are waiters to
1657 	 * inherit their priority.
1658 	 */
1659 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1660 		return lck_mtx_try_lock_acquire_tail(lock);
1661 	}
1662 
1663 	/* release the interlock */
1664 	lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
1665 
1666 	return TRUE;
1667 }
1668 
1669 __attribute__((noinline))
1670 void
lck_mtx_lock_spin_slow(lck_mtx_t * lock)1671 lck_mtx_lock_spin_slow(
1672 	lck_mtx_t       *lock)
1673 {
1674 	bool            profile;
1675 	uint32_t        state;
1676 	int             first_miss = 0;
1677 
1678 	state = ordered_load_mtx_state(lock);
1679 	profile = (state & LCK_MTX_PROFILE_MSK);
1680 
1681 	if (__improbable(profile)) {
1682 		if (state & LCK_MTX_SPIN_MSK) {
1683 			/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
1684 			assert(state & LCK_MTX_ILOCKED_MSK);
1685 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1686 		}
1687 	}
1688 
1689 	/* is interlock or mutex held */
1690 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1691 		/*
1692 		 * Note: LCK_MTX_TAG_DESTROYED has
1693 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1694 		 */
1695 
1696 		/* is the mutex already held */
1697 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1698 			/* no, must have been the mutex */
1699 			return lck_mtx_lock_contended(lock, profile, &first_miss);
1700 		}
1701 
1702 		/* check to see if it is marked destroyed */
1703 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1704 			lck_mtx_destroyed(lock);
1705 		}
1706 	}
1707 
1708 	bool acquired = true;
1709 
1710 #if CONFIG_DTRACE
1711 	uint64_t spin_start;
1712 
1713 	if (__probable(lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1714 		goto past_spin;
1715 	}
1716 
1717 	spin_start = LCK_MTX_SPIN_SPIN_BEGIN();
1718 #endif /* CONFIG_DTRACE */
1719 
1720 	/* note - can't be DESTROYED or locked */
1721 	/* note - preemption is not disabled while spinning */
1722 	while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1723 		if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
1724 			acquired = false;
1725 			break;
1726 		}
1727 	}
1728 
1729 	LCK_MTX_SPIN_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
1730 #if CONFIG_DTRACE
1731 past_spin:
1732 #endif /* CONFIG_DTRACE */
1733 
1734 	if (__improbable(!acquired)) {
1735 		return lck_mtx_lock_contended(lock, profile, &first_miss);
1736 	}
1737 
1738 	/* lock as spinlock and interlock acquired */
1739 
1740 	/* record owner of mutex */
1741 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1742 
1743 	LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp, true, profile);
1744 
1745 	/* return with the interlock held and preemption disabled */
1746 	return;
1747 }
1748 
1749 __attribute__((noinline))
1750 boolean_t
lck_mtx_try_lock_spin_slow(lck_mtx_t * lock)1751 lck_mtx_try_lock_spin_slow(
1752 	lck_mtx_t       *lock)
1753 {
1754 	bool            profile;
1755 	uint32_t        state;
1756 	int             first_miss = 0;
1757 
1758 	state = ordered_load_mtx_state(lock);
1759 	profile = (state & LCK_MTX_PROFILE_MSK);
1760 
1761 	/* is the interlock or mutex held */
1762 	if (__improbable(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1763 		/*
1764 		 * Note: LCK_MTX_TAG_DESTROYED has
1765 		 *       LCK_MTX_ILOCKED_MSK and LCK_MTX_MLOCKED_MSK set.
1766 		 */
1767 
1768 		/* is the mutex already held */
1769 		if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
1770 			return FALSE;
1771 		}
1772 
1773 		/* check to see if it is marked destroyed */
1774 		if (__improbable(state == LCK_MTX_TAG_DESTROYED)) {
1775 			lck_mtx_try_destroyed(lock);
1776 		}
1777 	}
1778 
1779 	/* note - can't be DESTROYED or locked */
1780 	if (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
1781 		if (__improbable(profile)) {
1782 			LCK_MTX_PROF_MISS(lock, lock->lck_mtx_grp, &first_miss);
1783 		}
1784 		return FALSE;
1785 	}
1786 
1787 	/* lock and interlock acquired */
1788 
1789 	/* record owner of mutex */
1790 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1791 
1792 #if     CONFIG_DTRACE
1793 	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_SPIN_ACQUIRE, lock, 0);
1794 #endif
1795 	return TRUE;
1796 }
1797 
1798 __attribute__((noinline))
1799 void
lck_mtx_convert_spin(lck_mtx_t * lock)1800 lck_mtx_convert_spin(
1801 	lck_mtx_t       *lock)
1802 {
1803 	uint32_t state;
1804 
1805 	state = ordered_load_mtx_state(lock);
1806 
1807 	assertf(lock->lck_mtx_owner == current_thread()->ctid,
1808 	    "lock %p not owned by thread ctid %d (current owner %d)",
1809 	    lock, current_thread()->ctid, lock->lck_mtx_owner);
1810 
1811 	if (__improbable(state & LCK_MTX_MLOCKED_MSK)) {
1812 		/* already owned as a mutex, just return */
1813 		return;
1814 	}
1815 
1816 	assert(get_preemption_level() > 0);
1817 	assert(state & LCK_MTX_ILOCKED_MSK);
1818 	assert(state & LCK_MTX_SPIN_MSK);
1819 
1820 	/*
1821 	 * Check if there are waiters to
1822 	 * inherit their priority.
1823 	 */
1824 	if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
1825 		return lck_mtx_convert_spin_acquire_tail(lock);
1826 	}
1827 
1828 	lck_mtx_convert_spin_finish_inline(lock, ordered_load_mtx_state(lock));
1829 
1830 	return;
1831 }
1832 
1833 static inline boolean_t
lck_mtx_lock_grab_mutex(lck_mtx_t * lock)1834 lck_mtx_lock_grab_mutex(
1835 	lck_mtx_t       *lock)
1836 {
1837 	uint32_t state;
1838 
1839 	state = ordered_load_mtx_state(lock);
1840 
1841 	if (!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state)) {
1842 		return FALSE;
1843 	}
1844 
1845 	/* lock and interlock acquired */
1846 
1847 	/* record owner of mutex */
1848 	ordered_store_mtx_owner(lock, current_thread()->ctid);
1849 	return TRUE;
1850 }
1851 
1852 __attribute__((noinline))
1853 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)1854 lck_mtx_assert(
1855 	lck_mtx_t       *lock,
1856 	unsigned int    type)
1857 {
1858 	thread_t thread;
1859 	uint32_t state;
1860 
1861 	thread = current_thread();
1862 	state = ordered_load_mtx_state(lock);
1863 
1864 	if (type == LCK_MTX_ASSERT_OWNED) {
1865 		if ((lock->lck_mtx_owner != thread->ctid) ||
1866 		    !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
1867 			panic("mutex (%p) not owned", lock);
1868 		}
1869 	} else {
1870 		assert(type == LCK_MTX_ASSERT_NOTOWNED);
1871 		if (lock->lck_mtx_owner == thread->ctid) {
1872 			panic("mutex (%p) owned", lock);
1873 		}
1874 	}
1875 }
1876 
1877 /*
1878  * Routine:     lck_mtx_lock_spinwait_x86
1879  *
1880  * Invoked trying to acquire a mutex when there is contention but
1881  * the holder is running on another processor. We spin for up to a maximum
1882  * time waiting for the lock to be released.
1883  *
1884  * Called with the interlock unlocked.
1885  * returns LCK_MTX_SPINWAIT_ACQUIRED if mutex acquired
1886  * returns LCK_MTX_SPINWAIT_SPUN if we spun
1887  * returns LCK_MTX_SPINWAIT_NO_SPIN if we didn't spin due to the holder not running
1888  */
1889 __attribute__((noinline))
1890 lck_mtx_spinwait_ret_type_t
lck_mtx_lock_spinwait_x86(lck_mtx_t * mutex)1891 lck_mtx_lock_spinwait_x86(
1892 	lck_mtx_t       *mutex)
1893 {
1894 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
1895 	ctid_t          owner, prev_owner;
1896 	uint64_t        window_deadline, sliding_deadline, high_deadline;
1897 	uint64_t        spin_start, start_time, cur_time, avg_hold_time, bias, delta;
1898 	lck_mtx_spinwait_ret_type_t             retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
1899 	int             loopcount = 0;
1900 	int             total_hold_time_samples, window_hold_time_samples, unfairness;
1901 	uint            prev_owner_cpu;
1902 	bool            adjust;
1903 
1904 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1905 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
1906 	    mutex->lck_mtx_waiters, 0, 0);
1907 
1908 	spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
1909 	start_time = mach_absolute_time();
1910 
1911 	/*
1912 	 * window_deadline represents the "learning" phase.
1913 	 * The thread collects statistics about the lock during
1914 	 * window_deadline and then it makes a decision on whether to spin more
1915 	 * or block according to the concurrency behavior
1916 	 * observed.
1917 	 *
1918 	 * Every thread can spin at least low_MutexSpin.
1919 	 */
1920 	window_deadline = start_time + low_MutexSpin;
1921 	/*
1922 	 * Sliding_deadline is the adjusted spin deadline
1923 	 * computed after the "learning" phase.
1924 	 */
1925 	sliding_deadline = window_deadline;
1926 	/*
1927 	 * High_deadline is a hard deadline. No thread
1928 	 * can spin more than this deadline.
1929 	 */
1930 	if (high_MutexSpin >= 0) {
1931 		high_deadline = start_time + high_MutexSpin;
1932 	} else {
1933 		high_deadline = start_time + low_MutexSpin * real_ncpus;
1934 	}
1935 
1936 	/*
1937 	 * Do not know yet which is the owner cpu.
1938 	 * Initialize prev_owner_cpu with next cpu.
1939 	 */
1940 	prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
1941 	total_hold_time_samples = 0;
1942 	window_hold_time_samples = 0;
1943 	avg_hold_time = 0;
1944 	adjust = TRUE;
1945 	bias = (os_hash_kernel_pointer(mutex) + cpu_number()) % real_ncpus;
1946 
1947 	prev_owner = mutex->lck_mtx_owner;
1948 
1949 	/*
1950 	 * Spin while:
1951 	 *   - mutex is locked, and
1952 	 *   - it's locked as a spin lock, and
1953 	 *   - owner is running on another processor, and
1954 	 *   - we haven't spun for long enough.
1955 	 */
1956 	do {
1957 		/*
1958 		 * Try to acquire the lock.
1959 		 */
1960 		if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
1961 			retval = LCK_MTX_SPINWAIT_ACQUIRED;
1962 			break;
1963 		}
1964 
1965 		cur_time = mach_absolute_time();
1966 
1967 		/*
1968 		 * Never spin past high_deadline.
1969 		 */
1970 		if (cur_time >= high_deadline) {
1971 			retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
1972 			break;
1973 		}
1974 
1975 		/*
1976 		 * Check if owner is on core. If not block.
1977 		 */
1978 		owner = mutex->lck_mtx_owner;
1979 		if (owner) {
1980 			uint32_t i = prev_owner_cpu;
1981 			bool     owner_on_core = false;
1982 
1983 			disable_preemption();
1984 			owner = mutex->lck_mtx_owner;
1985 
1986 			/*
1987 			 * For scalability we want to check if the owner is on core
1988 			 * without locking the mutex interlock.
1989 			 * If we do not lock the mutex interlock, the owner that we see might be
1990 			 * invalid, so we cannot dereference it. Therefore we cannot check
1991 			 * any field of the thread to tell us if it is on core.
1992 			 * Check if the thread that is running on the other cpus matches the owner.
1993 			 */
1994 			if (owner) {
1995 				thread_t owner_thread = ctid_get_thread_unsafe(owner);
1996 
1997 				do {
1998 					if ((cpu_data_ptr[i] != NULL) && (cpu_data_ptr[i]->cpu_active_thread == owner_thread)) {
1999 						owner_on_core = true;
2000 						break;
2001 					}
2002 					if (++i >= real_ncpus) {
2003 						i = 0;
2004 					}
2005 				} while (i != prev_owner_cpu);
2006 			}
2007 
2008 			enable_preemption();
2009 
2010 			if (owner == 0) {
2011 				/* nothing to do, we didn't find an owner */
2012 			} else if (owner_on_core) {
2013 				prev_owner_cpu = i;
2014 			} else {
2015 				prev_owner = owner;
2016 				owner = mutex->lck_mtx_owner;
2017 				if (owner == prev_owner) {
2018 					/*
2019 					 * Owner is not on core.
2020 					 * Stop spinning.
2021 					 */
2022 					if (loopcount == 0) {
2023 						retval = LCK_MTX_SPINWAIT_NO_SPIN;
2024 					} else {
2025 						retval = LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE;
2026 					}
2027 					break;
2028 				}
2029 				/*
2030 				 * Fall through if the owner changed while we were scanning.
2031 				 * The new owner could potentially be on core, so loop
2032 				 * again.
2033 				 */
2034 			}
2035 		}
2036 
2037 		/*
2038 		 * Save how many times we see the owner changing.
2039 		 * We can roughly estimate the mutex hold
2040 		 * time and the fairness with that.
2041 		 */
2042 		if (owner != prev_owner) {
2043 			prev_owner = owner;
2044 			total_hold_time_samples++;
2045 			window_hold_time_samples++;
2046 		}
2047 
2048 		/*
2049 		 * Learning window expired.
2050 		 * Try to adjust the sliding_deadline.
2051 		 */
2052 		if (cur_time >= window_deadline) {
2053 			/*
2054 			 * If there was not contention during the window
2055 			 * stop spinning.
2056 			 */
2057 			if (window_hold_time_samples < 1) {
2058 				retval = LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION;
2059 				break;
2060 			}
2061 
2062 			if (adjust) {
2063 				/*
2064 				 * For a fair lock, we'd wait for at most (NCPU-1) periods,
2065 				 * but the lock is unfair, so let's try to estimate by how much.
2066 				 */
2067 				unfairness = total_hold_time_samples / real_ncpus;
2068 
2069 				if (unfairness == 0) {
2070 					/*
2071 					 * We observed the owner changing `total_hold_time_samples` times which
2072 					 * let us estimate the average hold time of this mutex for the duration
2073 					 * of the spin time.
2074 					 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
2075 					 *
2076 					 * In this case spin at max avg_hold_time * (real_ncpus - 1)
2077 					 */
2078 					delta = cur_time - start_time;
2079 					sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
2080 				} else {
2081 					/*
2082 					 * In this case at least one of the other cpus was able to get the lock twice
2083 					 * while I was spinning.
2084 					 * We could spin longer but it won't necessarily help if the system is unfair.
2085 					 * Try to randomize the wait to reduce contention.
2086 					 *
2087 					 * We compute how much time we could potentially spin
2088 					 * and distribute it over the cpus.
2089 					 *
2090 					 * bias is an integer between 0 and real_ncpus.
2091 					 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
2092 					 */
2093 					delta = high_deadline - cur_time;
2094 					sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
2095 					adjust = FALSE;
2096 				}
2097 			}
2098 
2099 			window_deadline += low_MutexSpin;
2100 			window_hold_time_samples = 0;
2101 		}
2102 
2103 		/*
2104 		 * Stop spinning if we past
2105 		 * the adjusted deadline.
2106 		 */
2107 		if (cur_time >= sliding_deadline) {
2108 			retval = LCK_MTX_SPINWAIT_SPUN_SLIDING_THR;
2109 			break;
2110 		}
2111 
2112 		if (mutex->lck_mtx_owner != 0) {
2113 			cpu_pause();
2114 		}
2115 
2116 		loopcount++;
2117 	} while (TRUE);
2118 
2119 	LCK_MTX_ADAPTIVE_SPIN_END(mutex, mutex->lck_mtx_grp, spin_start);
2120 
2121 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
2122 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
2123 	    mutex->lck_mtx_waiters, retval, 0);
2124 
2125 	return retval;
2126 }
2127 
2128 
2129 
2130 /*
2131  * Routine:     lck_mtx_lock_wait_x86
2132  *
2133  * Invoked in order to wait on contention.
2134  *
2135  * Called with the interlock locked and
2136  * preemption disabled...
2137  * returns it unlocked and with preemption enabled
2138  *
2139  * lck_mtx_waiters is 1:1 with a wakeup needing to occur.
2140  *      A runnable waiter can exist between wait and acquire
2141  *      without a waiters count being set.
2142  *      This allows us to never make a spurious wakeup call.
2143  *
2144  * Priority:
2145  *      This avoids taking the thread lock if the owning thread is the same priority.
2146  *      This optimizes the case of same-priority threads contending on a lock.
2147  *      However, that allows the owning thread to drop in priority while holding the lock,
2148  *      because there is no state that the priority change can notice that
2149  *      says that the targeted thread holds a contended mutex.
2150  *
2151  *      One possible solution: priority changes could look for some atomic tag
2152  *      on the thread saying 'holding contended lock', and then set up a promotion.
2153  *      Needs a story for dropping that promotion - the last contended unlock
2154  *      has to notice that this has happened.
2155  */
2156 __attribute__((noinline))
2157 void
lck_mtx_lock_wait_x86(lck_mtx_t * mutex,struct turnstile ** ts)2158 lck_mtx_lock_wait_x86(
2159 	lck_mtx_t       *mutex,
2160 	struct turnstile **ts)
2161 {
2162 	thread_t self = current_thread();
2163 	uint64_t sleep_start;
2164 
2165 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
2166 
2167 	thread_t holder = ctid_get_thread(mutex->lck_mtx_owner);
2168 
2169 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
2170 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder),
2171 	    mutex->lck_mtx_waiters, 0, 0);
2172 	sleep_start = LCK_MTX_BLOCK_BEGIN();
2173 
2174 	mutex->lck_mtx_waiters++;
2175 
2176 	/*
2177 	 * lck_mtx_lock_wait_x86 might be called on a loop. Call prepare just once and reuse
2178 	 * the same turnstile while looping, the matching turnstile compleate will be called
2179 	 * by lck_mtx_lock_contended when finally acquiring the lock.
2180 	 */
2181 	if (*ts == NULL) {
2182 		*ts = turnstile_prepare_hash((uintptr_t)mutex, TURNSTILE_KERNEL_MUTEX);
2183 	}
2184 
2185 	struct turnstile *turnstile = *ts;
2186 	thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
2187 	turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2188 
2189 	waitq_assert_wait64(&turnstile->ts_waitq, LCK_MTX_EVENT(mutex), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
2190 
2191 	lck_mtx_ilk_unlock(mutex);
2192 
2193 	turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
2194 
2195 	thread_block(THREAD_CONTINUE_NULL);
2196 
2197 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
2198 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe(mutex->lck_mtx_owner)),
2199 	    mutex->lck_mtx_waiters, 0, 0);
2200 
2201 	LCK_MTX_BLOCK_END(mutex, mutex->lck_mtx_grp, sleep_start);
2202 }
2203 
2204 /*
2205  *      Routine: kdp_lck_mtx_lock_spin_is_acquired
2206  *      NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2207  *      Returns: TRUE if lock is acquired.
2208  */
2209 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)2210 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t     *lck)
2211 {
2212 	if (not_in_kdp) {
2213 		panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
2214 	}
2215 
2216 	if (lck->lck_mtx_ilocked || lck->lck_mtx_mlocked) {
2217 		return TRUE;
2218 	}
2219 
2220 	return FALSE;
2221 }
2222 
2223 void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2224 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2225 {
2226 	lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2227 	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2228 	waitinfo->owner   = thread_tid(ctid_get_thread(mutex->lck_mtx_owner));
2229 }
2230 
2231 #endif /* LCK_MTX_USE_ARCH */
2232 #if CONFIG_PV_TICKET
2233 __startup_func
2234 void
lck_init_pv(void)2235 lck_init_pv(void)
2236 {
2237 	uint32_t pvtck = 1;
2238 	PE_parse_boot_argn("pvticket", &pvtck, sizeof(pvtck));
2239 	if (pvtck == 0) {
2240 		return;
2241 	}
2242 	has_lock_pv = cpuid_vmm_present() &&
2243 	    (cpuid_vmm_get_kvm_features() & CPUID_KVM_FEATURE_PV_UNHALT) != 0;
2244 }
2245 STARTUP(LOCKS, STARTUP_RANK_FIRST, lck_init_pv);
2246 #endif
2247