xref: /xnu-8020.140.41/osfmk/arm/locks_arm.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33  * Mellon University All Rights Reserved.
34  *
35  * Permission to use, copy, modify and distribute this software and its
36  * documentation is hereby granted, provided that both the copyright notice
37  * and this permission notice appear in all copies of the software,
38  * derivative works or modified versions, and any portions thereof, and that
39  * both notices appear in supporting documentation.
40  *
41  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42  * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43  * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44  *
45  * Carnegie Mellon requests users of this software to return to
46  *
47  * Software Distribution Coordinator  or  [email protected]
48  * School of Computer Science Carnegie Mellon University Pittsburgh PA
49  * 15213-3890
50  *
51  * any improvements or extensions that they make and grant Carnegie Mellon the
52  * rights to redistribute these changes.
53  */
54 /*
55  *	File:	kern/lock.c
56  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
57  *	Date:	1985
58  *
59  *	Locking primitives implementation
60  */
61 
62 #define LOCK_PRIVATE 1
63 
64 #include <mach_ldebug.h>
65 
66 #include <mach/machine/sdt.h>
67 
68 #include <kern/zalloc.h>
69 #include <kern/lock_stat.h>
70 #include <kern/locks.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/sched_hygiene.h>
75 #include <kern/sched_prim.h>
76 #include <kern/debug.h>
77 #include <kern/kcdata.h>
78 #include <kern/percpu.h>
79 #include <string.h>
80 #include <arm/cpu_internal.h>
81 #include <os/hash.h>
82 #include <arm/cpu_data.h>
83 
84 #include <arm/cpu_data_internal.h>
85 #include <arm/proc_reg.h>
86 #include <arm/smp.h>
87 #include <machine/atomic.h>
88 #include <machine/machine_cpu.h>
89 
90 #include <pexpert/pexpert.h>
91 
92 #include <sys/kdebug.h>
93 
94 #define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
95 
96 // Panic in tests that check lock usage correctness
97 // These are undesirable when in a panic or a debugger is runnning.
98 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
99 
100 #define ADAPTIVE_SPIN_ENABLE 0x1
101 
102 int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
103 
104 #define SPINWAIT_OWNER_CHECK_COUNT 4
105 
106 typedef enum {
107 	SPINWAIT_ACQUIRED,     /* Got the lock. */
108 	SPINWAIT_INTERLOCK,    /* Got the interlock, no owner, but caller must finish acquiring the lock. */
109 	SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
110 	SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
111 	SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
112 	SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
113 	SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
114 } spinwait_result_t;
115 
116 /* Forwards */
117 
118 extern unsigned int not_in_kdp;
119 
120 /*
121  *	We often want to know the addresses of the callers
122  *	of the various lock routines.  However, this information
123  *	is only used for debugging and statistics.
124  */
125 typedef void   *pc_t;
126 #define INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
127 #define INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
128 
129 #ifdef  lint
130 /*
131  *	Eliminate lint complaints about unused local pc variables.
132  */
133 #define OBTAIN_PC(pc, l) ++pc
134 #else                           /* lint */
135 #define OBTAIN_PC(pc, l)
136 #endif                          /* lint */
137 
138 
139 /*
140  *	Portable lock package implementation of usimple_locks.
141  */
142 
143 /*
144  * Owner thread pointer when lock held in spin mode
145  */
146 #define LCK_MTX_SPIN_TAG  0xfffffff0
147 
148 
149 #define interlock_lock(lock)    hw_lock_bit    ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
150 #define interlock_try(lock)             hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
151 #define interlock_unlock(lock)  hw_unlock_bit  ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
152 #define load_memory_barrier()   os_atomic_thread_fence(acquire)
153 
154 // Enforce program order of loads and stores.
155 #define ordered_load(target) \
156 	        os_atomic_load(target, compiler_acq_rel)
157 #define ordered_store(target, value) \
158 	        os_atomic_store(target, value, compiler_acq_rel)
159 
160 #define ordered_load_mtx(lock)                  ordered_load(&(lock)->lck_mtx_data)
161 #define ordered_store_mtx(lock, value)  ordered_store(&(lock)->lck_mtx_data, (value))
162 #define ordered_load_hw(lock)                   ordered_load(&(lock)->lock_data)
163 #define ordered_store_hw(lock, value)   ordered_store(&(lock)->lock_data, (value))
164 #define ordered_load_bit(lock)                  ordered_load((lock))
165 #define ordered_store_bit(lock, value)  ordered_store((lock), (value))
166 
167 
168 // Prevent the compiler from reordering memory operations around this
169 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
170 
171 MACHINE_TIMEOUT32(lock_panic_timeout, "lock-panic",
172     0xc00000 /* 12.5 m ticks ~= 524ms with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
173 
174 #define NOINLINE                __attribute__((noinline))
175 
176 
177 #if __arm__
178 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
179 #else
180 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
181 #endif
182 
183 
184 #if __arm__
185 #define enable_fiq()            __asm__ volatile ("cpsie  f" ::: "memory");
186 #define enable_interrupts()     __asm__ volatile ("cpsie if" ::: "memory");
187 #endif
188 
189 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
190 
191 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
192 
193 KALLOC_TYPE_DEFINE(KT_LCK_MTX_EXT, lck_mtx_ext_t, KT_PRIV_ACCT);
194 
195 #pragma GCC visibility push(hidden)
196 /*
197  * atomic exchange API is a low level abstraction of the operations
198  * to atomically read, modify, and write a pointer.  This abstraction works
199  * for both Intel and ARMv8.1 compare and exchange atomic instructions as
200  * well as the ARM exclusive instructions.
201  *
202  * atomic_exchange_begin() - begin exchange and retrieve current value
203  * atomic_exchange_complete() - conclude an exchange
204  * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
205  */
206 uint32_t
load_exclusive32(uint32_t * target,enum memory_order ord)207 load_exclusive32(uint32_t *target, enum memory_order ord)
208 {
209 	uint32_t        value;
210 
211 #if __arm__
212 	if (_os_atomic_mo_has_release(ord)) {
213 		// Pre-load release barrier
214 		atomic_thread_fence(memory_order_release);
215 	}
216 	value = __builtin_arm_ldrex(target);
217 #else
218 	if (_os_atomic_mo_has_acquire(ord)) {
219 		value = __builtin_arm_ldaex(target);    // ldaxr
220 	} else {
221 		value = __builtin_arm_ldrex(target);    // ldxr
222 	}
223 #endif  // __arm__
224 	return value;
225 }
226 
227 boolean_t
store_exclusive32(uint32_t * target,uint32_t value,enum memory_order ord)228 store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
229 {
230 	boolean_t err;
231 
232 #if __arm__
233 	err = __builtin_arm_strex(value, target);
234 	if (_os_atomic_mo_has_acquire(ord)) {
235 		// Post-store acquire barrier
236 		atomic_thread_fence(memory_order_acquire);
237 	}
238 #else
239 	if (_os_atomic_mo_has_release(ord)) {
240 		err = __builtin_arm_stlex(value, target);       // stlxr
241 	} else {
242 		err = __builtin_arm_strex(value, target);       // stxr
243 	}
244 #endif  // __arm__
245 	return !err;
246 }
247 
248 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)249 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
250 {
251 	uint32_t        val;
252 
253 #if __ARM_ATOMICS_8_1
254 	ord = memory_order_relaxed;
255 #endif
256 	val = load_exclusive32(target, ord);
257 	*previous = val;
258 	return val;
259 }
260 
261 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)262 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
263 {
264 #if __ARM_ATOMICS_8_1
265 	return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
266 #else
267 	(void)previous;         // Previous not needed, monitor is held
268 	return store_exclusive32(target, newval, ord);
269 #endif
270 }
271 
272 void
atomic_exchange_abort(void)273 atomic_exchange_abort(void)
274 {
275 	os_atomic_clear_exclusive();
276 }
277 
278 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)279 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
280 {
281 	uint32_t                value, prev;
282 
283 	for (;;) {
284 		value = atomic_exchange_begin32(target, &prev, ord);
285 		if (value & test_mask) {
286 			if (wait) {
287 				wait_for_event();       // Wait with monitor held
288 			} else {
289 				atomic_exchange_abort();        // Clear exclusive monitor
290 			}
291 			return FALSE;
292 		}
293 		value |= set_mask;
294 		if (atomic_exchange_complete32(target, prev, value, ord)) {
295 			return TRUE;
296 		}
297 	}
298 }
299 
300 #pragma GCC visibility pop
301 
302 #if SCHED_PREEMPTION_DISABLE_DEBUG
303 
304 uint64_t PERCPU_DATA(preemption_disable_max_mt);
305 
306 MACHINE_TIMEOUT_WRITEABLE(sched_preemption_disable_threshold_mt, "sched-preemption", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, kprintf_spam_mt_pred);
307 
308 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, sched_preemption_disable_debug_mode,
309     "machine-timeouts",
310     "sched-preemption-disable-mode", /* DT property names have to be 31 chars max */
311     "sched_preemption_disable_debug_mode",
312     SCHED_HYGIENE_MODE_OFF,
313     TUNABLE_DT_CHECK_CHOSEN);
314 
315 static uint32_t const sched_preemption_disable_debug_dbgid = MACHDBG_CODE(DBG_MACH_SCHED, MACH_PREEMPTION_EXPIRED) | DBG_FUNC_NONE;
316 
317 NOINLINE void
_prepare_preemption_disable_measurement(thread_t thread)318 _prepare_preemption_disable_measurement(thread_t thread)
319 {
320 	if (thread->machine.inthandler_timestamp == 0) {
321 		/*
322 		 * Only prepare a measurement if not currently in an interrupt
323 		 * handler.
324 		 *
325 		 * We are only interested in the net duration of disabled
326 		 * preemption, that is: The time in which preemption was
327 		 * disabled, minus the intervals in which any (likely
328 		 * unrelated) interrupts were handled.
329 		 * ml_adjust_preemption_disable_time() will remove those
330 		 * intervals, however we also do not even start measuring
331 		 * preemption disablement if we are already within handling of
332 		 * an interrupt when preemption was disabled (the resulting
333 		 * net time would be 0).
334 		 *
335 		 * Interrupt handling duration is handled separately, and any
336 		 * long intervals of preemption disablement are counted
337 		 * towards that.
338 		 */
339 		thread->machine.preemption_disable_adj_mt = ml_get_speculative_timebase();
340 	}
341 }
342 
343 NOINLINE void
_collect_preemption_disable_measurement(thread_t thread)344 _collect_preemption_disable_measurement(thread_t thread)
345 {
346 	bool istate = ml_set_interrupts_enabled(false);
347 	/*
348 	 * Collect start time and current time with interrupts disabled.
349 	 * Otherwise an interrupt coming in after grabbing the timestamp
350 	 * could spuriously inflate the measurement, because it will
351 	 * adjust preemption_disable_adj_mt only after we already grabbed
352 	 * it.
353 	 *
354 	 * (Even worse if we collected the current time first: Then a
355 	 * subsequent interrupt could adjust preemption_disable_adj_mt to
356 	 * make the duration go negative after subtracting the already
357 	 * grabbed time. With interrupts disabled we don't care much about
358 	 * the order.)
359 	 */
360 
361 	uint64_t const mt = thread->machine.preemption_disable_adj_mt;
362 	uint64_t const now = ml_get_speculative_timebase();
363 
364 	os_compiler_barrier(acq_rel);
365 
366 	ml_set_interrupts_enabled(istate);
367 
368 	int64_t const duration = now - mt;
369 
370 
371 	uint64_t * const max_duration = PERCPU_GET(preemption_disable_max_mt);
372 
373 	if (__improbable(duration > *max_duration)) {
374 		*max_duration = duration;
375 	}
376 
377 	uint64_t const threshold = os_atomic_load(&sched_preemption_disable_threshold_mt, relaxed);
378 	if (__improbable(threshold > 0 && duration >= threshold)) {
379 		if (sched_preemption_disable_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
380 			panic("preemption disable timeout exceeded: %llu >= %llu timebase ticks", duration, threshold);
381 		}
382 
383 		DTRACE_SCHED1(mach_preemption_expired, uint64_t, duration);
384 		if (__improbable(kdebug_debugid_enabled(sched_preemption_disable_debug_dbgid))) {
385 			KDBG(sched_preemption_disable_debug_dbgid, duration);
386 		}
387 	}
388 
389 	thread->machine.preemption_disable_adj_mt = 0;
390 }
391 
392 /*
393  * Skip predicate for sched_preemption_disable, which would trigger
394  * spuriously when kprintf spam is enabled.
395  */
396 bool
kprintf_spam_mt_pred(struct machine_timeout_spec const __unused * spec)397 kprintf_spam_mt_pred(struct machine_timeout_spec const __unused *spec)
398 {
399 	bool const kprintf_spam_enabled = !(disable_kprintf_output || disable_serial_output);
400 	return kprintf_spam_enabled;
401 }
402 
403 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
404 
405 /*
406  * To help _disable_preemption() inline everywhere with LTO,
407  * we keep these nice non inlineable functions as the panic()
408  * codegen setup is quite large and for weird reasons causes a frame.
409  */
410 __abortlike
411 static void
_disable_preemption_overflow(void)412 _disable_preemption_overflow(void)
413 {
414 	panic("Preemption count overflow");
415 }
416 
417 void
_disable_preemption(void)418 _disable_preemption(void)
419 {
420 	thread_t     thread = current_thread();
421 	unsigned int count  = thread->machine.preemption_count;
422 
423 	if (__improbable(++count == 0)) {
424 		_disable_preemption_overflow();
425 	}
426 
427 	os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
428 
429 #if SCHED_PREEMPTION_DISABLE_DEBUG
430 
431 	/*
432 	 * Note that this is not the only place preemption gets disabled,
433 	 * it also gets modified on ISR and PPL entry/exit. Both of those
434 	 * events will be treated specially however, and
435 	 * increment/decrement being paired around their entry/exit means
436 	 * that collection here is not desynced otherwise.
437 	 */
438 
439 	if (count == 1 && sched_preemption_disable_debug_mode) {
440 		_prepare_preemption_disable_measurement(thread);
441 	}
442 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
443 }
444 
445 /*
446  * This variant of _disable_preemption() allows disabling preemption
447  * without taking measurements (and later potentially triggering
448  * actions on those).
449  *
450  * We do this through a separate variant because we do not want to
451  * disturb inlinability of _disable_preemption(). However, in order to
452  * also avoid code duplication, instead of repeating common code we
453  * simply call _disable_preemption() and explicitly abandon any taken
454  * measurement.
455  */
456 void
_disable_preemption_without_measurements(void)457 _disable_preemption_without_measurements(void)
458 {
459 	_disable_preemption();
460 
461 #if SCHED_PREEMPTION_DISABLE_DEBUG
462 	/*
463 	 * Abandon a potential preemption disable measurement. Useful for
464 	 * example for the idle thread, which would just spuriously
465 	 * trigger the threshold while actually idling, which we don't
466 	 * care about.
467 	 */
468 	thread_t t = current_thread();
469 	if (t->machine.preemption_disable_adj_mt != 0) {
470 		t->machine.preemption_disable_adj_mt = 0;
471 	}
472 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
473 }
474 
475 /*
476  * This function checks whether an AST_URGENT has been pended.
477  *
478  * It is called once the preemption has been reenabled, which means the thread
479  * may have been preempted right before this was called, and when this function
480  * actually performs the check, we've changed CPU.
481  *
482  * This race is however benign: the point of AST_URGENT is to trigger a context
483  * switch, so if one happened, there's nothing left to check for, and AST_URGENT
484  * was cleared in the process.
485  *
486  * It follows that this check cannot have false negatives, which allows us
487  * to avoid fiddling with interrupt state for the vast majority of cases
488  * when the check will actually be negative.
489  */
490 static NOINLINE void
kernel_preempt_check(thread_t thread)491 kernel_preempt_check(thread_t thread)
492 {
493 	long        state;
494 
495 #if __arm__
496 #define INTERRUPT_MASK PSR_IRQF
497 #else   // __arm__
498 #define INTERRUPT_MASK DAIF_IRQF
499 #endif  // __arm__
500 
501 	/* If interrupts are masked, we can't take an AST here */
502 	state = get_interrupts();
503 	if ((state & INTERRUPT_MASK) == 0) {
504 		disable_interrupts_noread();                    // Disable interrupts
505 
506 		/*
507 		 * Reload cpu_pending_ast: a context switch would cause it to change.
508 		 * Now that interrupts are disabled, this will debounce false positives.
509 		 */
510 		if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
511 #if __arm__
512 #if __ARM_USER_PROTECT__
513 			uintptr_t up = arm_user_protect_begin(thread);
514 #endif  // __ARM_USER_PROTECT__
515 			enable_fiq();
516 #endif  // __arm__
517 			ast_taken_kernel();                 // Handle urgent AST
518 #if __arm__
519 #if __ARM_USER_PROTECT__
520 			arm_user_protect_end(thread, up, TRUE);
521 #endif  // __ARM_USER_PROTECT__
522 			enable_interrupts();
523 			return;                             // Return early on arm only due to FIQ enabling
524 #endif  // __arm__
525 		}
526 		restore_interrupts(state);              // Enable interrupts
527 	}
528 }
529 
530 /*
531  * To help _enable_preemption() inline everywhere with LTO,
532  * we keep these nice non inlineable functions as the panic()
533  * codegen setup is quite large and for weird reasons causes a frame.
534  */
535 __abortlike
536 static void
_enable_preemption_underflow(void)537 _enable_preemption_underflow(void)
538 {
539 	panic("Preemption count underflow");
540 }
541 
542 void
_enable_preemption(void)543 _enable_preemption(void)
544 {
545 	thread_t     thread = current_thread();
546 	unsigned int count  = thread->machine.preemption_count;
547 
548 	if (__improbable(count == 0)) {
549 		_enable_preemption_underflow();
550 	}
551 	count -= 1;
552 
553 #if SCHED_PREEMPTION_DISABLE_DEBUG
554 	if (count == 0 && thread->machine.preemption_disable_adj_mt != 0) {
555 		_collect_preemption_disable_measurement(thread);
556 	}
557 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
558 
559 	os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
560 	if (count == 0) {
561 		/*
562 		 * This check is racy and could load from another CPU's pending_ast mask,
563 		 * but as described above, this can't have false negatives.
564 		 */
565 		if (__improbable(thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT)) {
566 			kernel_preempt_check(thread);
567 		}
568 	}
569 
570 	os_compiler_barrier();
571 }
572 
573 int
get_preemption_level(void)574 get_preemption_level(void)
575 {
576 	return current_thread()->machine.preemption_count;
577 }
578 
579 /*
580  *      Routine:        lck_spin_alloc_init
581  */
582 lck_spin_t     *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)583 lck_spin_alloc_init(
584 	lck_grp_t * grp,
585 	lck_attr_t * attr)
586 {
587 	lck_spin_t *lck;
588 
589 	lck = zalloc(KT_LCK_SPIN);
590 	lck_spin_init(lck, grp, attr);
591 	return lck;
592 }
593 
594 /*
595  *      Routine:        lck_spin_free
596  */
597 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)598 lck_spin_free(
599 	lck_spin_t * lck,
600 	lck_grp_t * grp)
601 {
602 	lck_spin_destroy(lck, grp);
603 	zfree(KT_LCK_SPIN, lck);
604 }
605 
606 /*
607  *      Routine:        lck_spin_init
608  */
609 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)610 lck_spin_init(
611 	lck_spin_t * lck,
612 	lck_grp_t * grp,
613 	__unused lck_attr_t * attr)
614 {
615 	lck->type = LCK_SPIN_TYPE;
616 	hw_lock_init(&lck->hwlock);
617 	if (grp) {
618 		lck_grp_reference(grp, &grp->lck_grp_spincnt);
619 	}
620 }
621 
622 /*
623  * arm_usimple_lock is a lck_spin_t without a group or attributes
624  */
625 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)626 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
627 {
628 	lck->type = LCK_SPIN_TYPE;
629 	hw_lock_init(&lck->hwlock);
630 }
631 
632 
633 /*
634  *      Routine:        lck_spin_lock
635  */
636 void
lck_spin_lock(lck_spin_t * lock)637 lck_spin_lock(lck_spin_t *lock)
638 {
639 #if     DEVELOPMENT || DEBUG
640 	if (lock->type != LCK_SPIN_TYPE) {
641 		panic("Invalid spinlock %p", lock);
642 	}
643 #endif  // DEVELOPMENT || DEBUG
644 	hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
645 }
646 
647 void
lck_spin_lock_grp(lck_spin_t * lock,lck_grp_t * grp)648 lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
649 {
650 #pragma unused(grp)
651 #if     DEVELOPMENT || DEBUG
652 	if (lock->type != LCK_SPIN_TYPE) {
653 		panic("Invalid spinlock %p", lock);
654 	}
655 #endif  // DEVELOPMENT || DEBUG
656 	hw_lock_lock(&lock->hwlock, grp);
657 }
658 
659 /*
660  *      Routine:        lck_spin_lock_nopreempt
661  */
662 void
lck_spin_lock_nopreempt(lck_spin_t * lock)663 lck_spin_lock_nopreempt(lck_spin_t *lock)
664 {
665 #if     DEVELOPMENT || DEBUG
666 	if (lock->type != LCK_SPIN_TYPE) {
667 		panic("Invalid spinlock %p", lock);
668 	}
669 #endif  // DEVELOPMENT || DEBUG
670 	hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
671 }
672 
673 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)674 lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
675 {
676 #pragma unused(grp)
677 #if     DEVELOPMENT || DEBUG
678 	if (lock->type != LCK_SPIN_TYPE) {
679 		panic("Invalid spinlock %p", lock);
680 	}
681 #endif  // DEVELOPMENT || DEBUG
682 	hw_lock_lock_nopreempt(&lock->hwlock, grp);
683 }
684 
685 /*
686  *      Routine:        lck_spin_try_lock
687  */
688 int
lck_spin_try_lock(lck_spin_t * lock)689 lck_spin_try_lock(lck_spin_t *lock)
690 {
691 	return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
692 }
693 
694 int
lck_spin_try_lock_grp(lck_spin_t * lock,lck_grp_t * grp)695 lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
696 {
697 #pragma unused(grp)
698 	return hw_lock_try(&lock->hwlock, grp);
699 }
700 
701 /*
702  *      Routine:        lck_spin_try_lock_nopreempt
703  */
704 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)705 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
706 {
707 	return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
708 }
709 
710 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)711 lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
712 {
713 #pragma unused(grp)
714 	return hw_lock_try_nopreempt(&lock->hwlock, grp);
715 }
716 
717 /*
718  *      Routine:        lck_spin_unlock
719  */
720 void
lck_spin_unlock(lck_spin_t * lock)721 lck_spin_unlock(lck_spin_t *lock)
722 {
723 #if     DEVELOPMENT || DEBUG
724 	if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
725 		panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
726 	}
727 	if (lock->type != LCK_SPIN_TYPE) {
728 		panic("Invalid spinlock type %p", lock);
729 	}
730 #endif  // DEVELOPMENT || DEBUG
731 	hw_lock_unlock(&lock->hwlock);
732 }
733 
734 /*
735  *      Routine:        lck_spin_unlock_nopreempt
736  */
737 void
lck_spin_unlock_nopreempt(lck_spin_t * lock)738 lck_spin_unlock_nopreempt(lck_spin_t *lock)
739 {
740 #if     DEVELOPMENT || DEBUG
741 	if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
742 		panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
743 	}
744 	if (lock->type != LCK_SPIN_TYPE) {
745 		panic("Invalid spinlock type %p", lock);
746 	}
747 #endif  // DEVELOPMENT || DEBUG
748 	hw_lock_unlock_nopreempt(&lock->hwlock);
749 }
750 
751 /*
752  *      Routine:        lck_spin_destroy
753  */
754 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)755 lck_spin_destroy(
756 	lck_spin_t * lck,
757 	lck_grp_t * grp)
758 {
759 	if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
760 		return;
761 	}
762 	lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
763 	if (grp) {
764 		lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
765 	}
766 }
767 
768 /*
769  * Routine: kdp_lck_spin_is_acquired
770  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
771  */
772 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)773 kdp_lck_spin_is_acquired(lck_spin_t *lck)
774 {
775 	if (not_in_kdp) {
776 		panic("panic: spinlock acquired check done outside of kernel debugger");
777 	}
778 	return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
779 }
780 
781 /*
782  *	Initialize a usimple_lock.
783  *
784  *	No change in preemption state.
785  */
786 void
usimple_lock_init(usimple_lock_t l,unsigned short tag)787 usimple_lock_init(
788 	usimple_lock_t l,
789 	unsigned short tag)
790 {
791 	simple_lock_init((simple_lock_t) l, tag);
792 }
793 
794 
795 /*
796  *	Acquire a usimple_lock.
797  *
798  *	Returns with preemption disabled.  Note
799  *	that the hw_lock routines are responsible for
800  *	maintaining preemption state.
801  */
802 void
803 (usimple_lock)(
804 	usimple_lock_t l
805 	LCK_GRP_ARG(lck_grp_t *grp))
806 {
807 	simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
808 }
809 
810 
811 extern void     sync(void);
812 
813 /*
814  *	Release a usimple_lock.
815  *
816  *	Returns with preemption enabled.  Note
817  *	that the hw_lock routines are responsible for
818  *	maintaining preemption state.
819  */
820 void
821 (usimple_unlock)(
822 	usimple_lock_t l)
823 {
824 	simple_unlock((simple_lock_t)l);
825 }
826 
827 
828 /*
829  *	Conditionally acquire a usimple_lock.
830  *
831  *	On success, returns with preemption disabled.
832  *	On failure, returns with preemption in the same state
833  *	as when first invoked.  Note that the hw_lock routines
834  *	are responsible for maintaining preemption state.
835  *
836  *	XXX No stats are gathered on a miss; I preserved this
837  *	behavior from the original assembly-language code, but
838  *	doesn't it make sense to log misses?  XXX
839  */
840 unsigned
841 int
842 (usimple_lock_try)(
843 	usimple_lock_t l
844 	LCK_GRP_ARG(lck_grp_t *grp))
845 {
846 	return simple_lock_try((simple_lock_t) l, grp);
847 }
848 
849 /*
850  * The C portion of the mutex package.  These routines are only invoked
851  * if the optimized assembler routines can't do the work.
852  */
853 
854 /*
855  *      Routine:        lck_mtx_alloc_init
856  */
857 lck_mtx_t      *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)858 lck_mtx_alloc_init(
859 	lck_grp_t * grp,
860 	lck_attr_t * attr)
861 {
862 	lck_mtx_t      *lck;
863 
864 	lck = zalloc(KT_LCK_MTX);
865 	lck_mtx_init(lck, grp, attr);
866 	return lck;
867 }
868 
869 /*
870  *      Routine:        lck_mtx_free
871  */
872 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)873 lck_mtx_free(
874 	lck_mtx_t * lck,
875 	lck_grp_t * grp)
876 {
877 	lck_mtx_destroy(lck, grp);
878 	zfree(KT_LCK_MTX, lck);
879 }
880 
881 /*
882  *      Routine:        lck_mtx_init
883  */
884 void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)885 lck_mtx_init(
886 	lck_mtx_t * lck,
887 	lck_grp_t * grp,
888 	lck_attr_t * attr)
889 {
890 	lck_mtx_ext_t  *lck_ext = NULL;
891 
892 	if (attr == LCK_ATTR_NULL) {
893 		attr = &LockDefaultLckAttr;
894 	}
895 #ifdef  BER_XXX
896 	if (attr->lck_attr_val & LCK_ATTR_DEBUG) {
897 		lck_ext = zalloc(KT_LCK_MTX_EXT);
898 	}
899 #endif
900 	lck_mtx_init_ext(lck, lck_ext, grp, attr);
901 }
902 
903 /*
904  *      Routine:        lck_mtx_init_ext
905  */
906 void
lck_mtx_init_ext(lck_mtx_t * lck,lck_mtx_ext_t * lck_ext __unused,lck_grp_t * grp,lck_attr_t * attr)907 lck_mtx_init_ext(
908 	lck_mtx_t * lck,
909 	lck_mtx_ext_t * lck_ext __unused,
910 	lck_grp_t * grp,
911 	lck_attr_t * attr)
912 {
913 	if (attr == LCK_ATTR_NULL) {
914 		attr = &LockDefaultLckAttr;
915 	}
916 
917 	*lck = (lck_mtx_t){
918 		.lck_mtx_type = LCK_MTX_TYPE,
919 	};
920 
921 #if LOCKS_INDIRECT_ALLOW
922 	if (__improbable(lck_ext && (attr->lck_attr_val & LCK_ATTR_DEBUG))) {
923 		*lck_ext = (lck_mtx_ext_t){
924 			.lck_mtx_deb.type = MUTEX_TAG,
925 			.lck_mtx_grp = grp,
926 			.lck_mtx = *lck,
927 		};
928 		lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
929 		lck->lck_mtx_ptr = lck_ext;
930 	}
931 #endif /* LOCKS_INDIRECT_ALLOW */
932 
933 	lck_grp_reference(grp, &grp->lck_grp_mtxcnt);
934 }
935 
936 /* The slow versions */
937 static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
938 static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
939 static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
940 
941 /* The adaptive spin function */
942 static spinwait_result_t lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
943 
944 /*
945  *	Routine:	lck_mtx_verify
946  *
947  *	Verify if a mutex is valid
948  */
949 static inline void
lck_mtx_verify(lck_mtx_t * lock)950 lck_mtx_verify(lck_mtx_t *lock)
951 {
952 	if (lock->lck_mtx_type != LCK_MTX_TYPE) {
953 		panic("Invalid mutex %p", lock);
954 	}
955 #if     DEVELOPMENT || DEBUG
956 	if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
957 		panic("Mutex destroyed %p", lock);
958 	}
959 #endif  /* DEVELOPMENT || DEBUG */
960 }
961 
962 /*
963  *	Routine:	lck_mtx_check_preemption
964  *
965  *	Verify preemption is enabled when attempting to acquire a mutex.
966  */
967 
968 static inline void
lck_mtx_check_preemption(lck_mtx_t * lock)969 lck_mtx_check_preemption(lck_mtx_t *lock)
970 {
971 #if     DEVELOPMENT || DEBUG
972 	if (current_cpu_datap()->cpu_hibernate) {
973 		return;
974 	}
975 
976 	int pl = get_preemption_level();
977 
978 	if (pl != 0) {
979 		panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
980 	}
981 #else
982 	(void)lock;
983 #endif
984 }
985 
986 /*
987  *	Routine:	lck_mtx_lock
988  */
989 void
lck_mtx_lock(lck_mtx_t * lock)990 lck_mtx_lock(lck_mtx_t *lock)
991 {
992 	thread_t        thread;
993 
994 	lck_mtx_verify(lock);
995 	lck_mtx_check_preemption(lock);
996 	thread = current_thread();
997 	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
998 	    0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
999 #if     CONFIG_DTRACE
1000 		LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
1001 #endif /* CONFIG_DTRACE */
1002 		return;
1003 	}
1004 	lck_mtx_lock_contended(lock, thread, FALSE);
1005 }
1006 
1007 /*
1008  *       This is the slow version of mutex locking.
1009  */
1010 static void NOINLINE
lck_mtx_lock_contended(lck_mtx_t * lock,thread_t thread,boolean_t interlocked)1011 lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
1012 {
1013 	thread_t                holding_thread;
1014 	uintptr_t               state;
1015 	int                     waiters = 0;
1016 	spinwait_result_t       sw_res;
1017 	struct turnstile        *ts = NULL;
1018 
1019 	/* Loop waiting until I see that the mutex is unowned */
1020 	for (;;) {
1021 		sw_res = lck_mtx_lock_contended_spinwait_arm(lock, thread, interlocked);
1022 		interlocked = FALSE;
1023 
1024 		switch (sw_res) {
1025 		case SPINWAIT_ACQUIRED:
1026 			if (ts != NULL) {
1027 				interlock_lock(lock);
1028 				turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1029 				interlock_unlock(lock);
1030 			}
1031 			goto done;
1032 		case SPINWAIT_INTERLOCK:
1033 			goto set_owner;
1034 		default:
1035 			break;
1036 		}
1037 
1038 		state = ordered_load_mtx(lock);
1039 		holding_thread = LCK_MTX_STATE_TO_THREAD(state);
1040 		if (holding_thread == NULL) {
1041 			break;
1042 		}
1043 		ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
1044 		lck_mtx_lock_wait(lock, holding_thread, &ts);
1045 		/* returns interlock unlocked */
1046 	}
1047 
1048 set_owner:
1049 	/* Hooray, I'm the new owner! */
1050 	state = ordered_load_mtx(lock);
1051 
1052 	if (state & ARM_LCK_WAITERS) {
1053 		/* Skip lck_mtx_lock_acquire if there are no waiters. */
1054 		waiters = lck_mtx_lock_acquire(lock, ts);
1055 		/*
1056 		 * lck_mtx_lock_acquire will call
1057 		 * turnstile_complete
1058 		 */
1059 	} else {
1060 		if (ts != NULL) {
1061 			turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1062 		}
1063 	}
1064 
1065 	state = LCK_MTX_THREAD_TO_STATE(thread);
1066 	if (waiters != 0) {
1067 		state |= ARM_LCK_WAITERS;
1068 	}
1069 	state |= LCK_ILOCK;                             // Preserve interlock
1070 	ordered_store_mtx(lock, state); // Set ownership
1071 	interlock_unlock(lock);                 // Release interlock, enable preemption
1072 
1073 done:
1074 	load_memory_barrier();
1075 
1076 	assert(thread->turnstile != NULL);
1077 
1078 	if (ts != NULL) {
1079 		turnstile_cleanup();
1080 	}
1081 
1082 #if CONFIG_DTRACE
1083 	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
1084 #endif /* CONFIG_DTRACE */
1085 }
1086 
1087 /*
1088  * Routine: lck_mtx_lock_spinwait_arm
1089  *
1090  * Invoked trying to acquire a mutex when there is contention but
1091  * the holder is running on another processor. We spin for up to a maximum
1092  * time waiting for the lock to be released.
1093  */
1094 static spinwait_result_t
lck_mtx_lock_contended_spinwait_arm(lck_mtx_t * lock,thread_t thread,boolean_t interlocked)1095 lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
1096 {
1097 	int                     has_interlock = (int)interlocked;
1098 	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1099 	thread_t        owner, prev_owner;
1100 	uint64_t        window_deadline, sliding_deadline, high_deadline;
1101 	uint64_t        start_time, cur_time, avg_hold_time, bias, delta;
1102 	int             loopcount = 0;
1103 	uint            i, prev_owner_cpu;
1104 	int             total_hold_time_samples, window_hold_time_samples, unfairness;
1105 	bool            owner_on_core, adjust;
1106 	uintptr_t       state, new_state, waiters;
1107 	spinwait_result_t       retval = SPINWAIT_DID_SPIN_HIGH_THR;
1108 
1109 	if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
1110 		if (!has_interlock) {
1111 			interlock_lock(lock);
1112 		}
1113 
1114 		return SPINWAIT_DID_NOT_SPIN;
1115 	}
1116 
1117 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1118 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
1119 
1120 	start_time = mach_absolute_time();
1121 	/*
1122 	 * window_deadline represents the "learning" phase.
1123 	 * The thread collects statistics about the lock during
1124 	 * window_deadline and then it makes a decision on whether to spin more
1125 	 * or block according to the concurrency behavior
1126 	 * observed.
1127 	 *
1128 	 * Every thread can spin at least low_MutexSpin.
1129 	 */
1130 	window_deadline = start_time + low_MutexSpin;
1131 	/*
1132 	 * Sliding_deadline is the adjusted spin deadline
1133 	 * computed after the "learning" phase.
1134 	 */
1135 	sliding_deadline = window_deadline;
1136 	/*
1137 	 * High_deadline is a hard deadline. No thread
1138 	 * can spin more than this deadline.
1139 	 */
1140 	if (high_MutexSpin >= 0) {
1141 		high_deadline = start_time + high_MutexSpin;
1142 	} else {
1143 		high_deadline = start_time + low_MutexSpin * real_ncpus;
1144 	}
1145 
1146 	/*
1147 	 * Do not know yet which is the owner cpu.
1148 	 * Initialize prev_owner_cpu with next cpu.
1149 	 */
1150 	prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
1151 	total_hold_time_samples = 0;
1152 	window_hold_time_samples = 0;
1153 	avg_hold_time = 0;
1154 	adjust = TRUE;
1155 	bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
1156 
1157 	/* Snoop the lock state */
1158 	state = ordered_load_mtx(lock);
1159 	owner = LCK_MTX_STATE_TO_THREAD(state);
1160 	prev_owner = owner;
1161 
1162 	if (has_interlock) {
1163 		if (owner == NULL) {
1164 			retval = SPINWAIT_INTERLOCK;
1165 			goto done_spinning;
1166 		} else {
1167 			/*
1168 			 * We are holding the interlock, so
1169 			 * we can safely dereference owner.
1170 			 */
1171 			if (!machine_thread_on_core(owner) || (owner->state & TH_IDLE)) {
1172 				retval = SPINWAIT_DID_NOT_SPIN;
1173 				goto done_spinning;
1174 			}
1175 		}
1176 		interlock_unlock(lock);
1177 		has_interlock = 0;
1178 	}
1179 
1180 	/*
1181 	 * Spin while:
1182 	 *   - mutex is locked, and
1183 	 *   - it's locked as a spin lock, and
1184 	 *   - owner is running on another processor, and
1185 	 *   - we haven't spun for long enough.
1186 	 */
1187 	do {
1188 		/*
1189 		 * Try to acquire the lock.
1190 		 */
1191 		owner = LCK_MTX_STATE_TO_THREAD(state);
1192 		if (owner == NULL) {
1193 			waiters = state & ARM_LCK_WAITERS;
1194 			if (waiters) {
1195 				/*
1196 				 * preserve the waiter bit
1197 				 * and try acquire the interlock.
1198 				 * Note: we will successfully acquire
1199 				 * the interlock only if we can also
1200 				 * acquire the lock.
1201 				 */
1202 				new_state = ARM_LCK_WAITERS | LCK_ILOCK;
1203 				has_interlock = 1;
1204 				retval = SPINWAIT_INTERLOCK;
1205 				disable_preemption();
1206 			} else {
1207 				new_state = LCK_MTX_THREAD_TO_STATE(thread);
1208 				retval = SPINWAIT_ACQUIRED;
1209 			}
1210 
1211 			/*
1212 			 * The cmpxchg will succed only if the lock
1213 			 * is not owned (doesn't have an owner set)
1214 			 * and it is not interlocked.
1215 			 * It will not fail if there are waiters.
1216 			 */
1217 			if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
1218 			    waiters, new_state, &state, acquire)) {
1219 				goto done_spinning;
1220 			} else {
1221 				if (waiters) {
1222 					has_interlock = 0;
1223 					enable_preemption();
1224 				}
1225 			}
1226 		}
1227 
1228 		cur_time = mach_absolute_time();
1229 
1230 		/*
1231 		 * Never spin past high_deadline.
1232 		 */
1233 		if (cur_time >= high_deadline) {
1234 			retval = SPINWAIT_DID_SPIN_HIGH_THR;
1235 			break;
1236 		}
1237 
1238 		/*
1239 		 * Check if owner is on core. If not block.
1240 		 */
1241 		owner = LCK_MTX_STATE_TO_THREAD(state);
1242 		if (owner) {
1243 			i = prev_owner_cpu;
1244 			owner_on_core = FALSE;
1245 
1246 			disable_preemption();
1247 			state = ordered_load_mtx(lock);
1248 			owner = LCK_MTX_STATE_TO_THREAD(state);
1249 
1250 			/*
1251 			 * For scalability we want to check if the owner is on core
1252 			 * without locking the mutex interlock.
1253 			 * If we do not lock the mutex interlock, the owner that we see might be
1254 			 * invalid, so we cannot dereference it. Therefore we cannot check
1255 			 * any field of the thread to tell us if it is on core.
1256 			 * Check if the thread that is running on the other cpus matches the owner.
1257 			 */
1258 			if (owner) {
1259 				do {
1260 					cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
1261 					if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
1262 						owner_on_core = TRUE;
1263 						break;
1264 					}
1265 					if (++i >= real_ncpus) {
1266 						i = 0;
1267 					}
1268 				} while (i != prev_owner_cpu);
1269 				enable_preemption();
1270 
1271 				if (owner_on_core) {
1272 					prev_owner_cpu = i;
1273 				} else {
1274 					prev_owner = owner;
1275 					state = ordered_load_mtx(lock);
1276 					owner = LCK_MTX_STATE_TO_THREAD(state);
1277 					if (owner == prev_owner) {
1278 						/*
1279 						 * Owner is not on core.
1280 						 * Stop spinning.
1281 						 */
1282 						if (loopcount == 0) {
1283 							retval = SPINWAIT_DID_NOT_SPIN;
1284 						} else {
1285 							retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
1286 						}
1287 						break;
1288 					}
1289 					/*
1290 					 * Fall through if the owner changed while we were scanning.
1291 					 * The new owner could potentially be on core, so loop
1292 					 * again.
1293 					 */
1294 				}
1295 			} else {
1296 				enable_preemption();
1297 			}
1298 		}
1299 
1300 		/*
1301 		 * Save how many times we see the owner changing.
1302 		 * We can roughly estimate the the mutex hold
1303 		 * time and the fairness with that.
1304 		 */
1305 		if (owner != prev_owner) {
1306 			prev_owner = owner;
1307 			total_hold_time_samples++;
1308 			window_hold_time_samples++;
1309 		}
1310 
1311 		/*
1312 		 * Learning window expired.
1313 		 * Try to adjust the sliding_deadline.
1314 		 */
1315 		if (cur_time >= window_deadline) {
1316 			/*
1317 			 * If there was not contention during the window
1318 			 * stop spinning.
1319 			 */
1320 			if (window_hold_time_samples < 1) {
1321 				retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
1322 				break;
1323 			}
1324 
1325 			if (adjust) {
1326 				/*
1327 				 * For a fair lock, we'd wait for at most (NCPU-1) periods,
1328 				 * but the lock is unfair, so let's try to estimate by how much.
1329 				 */
1330 				unfairness = total_hold_time_samples / real_ncpus;
1331 
1332 				if (unfairness == 0) {
1333 					/*
1334 					 * We observed the owner changing `total_hold_time_samples` times which
1335 					 * let us estimate the average hold time of this mutex for the duration
1336 					 * of the spin time.
1337 					 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
1338 					 *
1339 					 * In this case spin at max avg_hold_time * (real_ncpus - 1)
1340 					 */
1341 					delta = cur_time - start_time;
1342 					sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
1343 				} else {
1344 					/*
1345 					 * In this case at least one of the other cpus was able to get the lock twice
1346 					 * while I was spinning.
1347 					 * We could spin longer but it won't necessarily help if the system is unfair.
1348 					 * Try to randomize the wait to reduce contention.
1349 					 *
1350 					 * We compute how much time we could potentially spin
1351 					 * and distribute it over the cpus.
1352 					 *
1353 					 * bias is an integer between 0 and real_ncpus.
1354 					 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
1355 					 */
1356 					delta = high_deadline - cur_time;
1357 					sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
1358 					adjust = FALSE;
1359 				}
1360 			}
1361 
1362 			window_deadline += low_MutexSpin;
1363 			window_hold_time_samples = 0;
1364 		}
1365 
1366 		/*
1367 		 * Stop spinning if we past
1368 		 * the adjusted deadline.
1369 		 */
1370 		if (cur_time >= sliding_deadline) {
1371 			retval = SPINWAIT_DID_SPIN_SLIDING_THR;
1372 			break;
1373 		}
1374 
1375 		/*
1376 		 * We want to arm the monitor for wfe,
1377 		 * so load exclusively the lock.
1378 		 *
1379 		 * NOTE:
1380 		 * we rely on the fact that wfe will
1381 		 * eventually return even if the cache line
1382 		 * is not modified. This way we will keep
1383 		 * looping and checking if the deadlines expired.
1384 		 */
1385 		state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
1386 		owner = LCK_MTX_STATE_TO_THREAD(state);
1387 		if (owner != NULL) {
1388 			wait_for_event();
1389 			state = ordered_load_mtx(lock);
1390 		} else {
1391 			atomic_exchange_abort();
1392 		}
1393 
1394 		loopcount++;
1395 	} while (TRUE);
1396 
1397 done_spinning:
1398 #if     CONFIG_DTRACE
1399 	/*
1400 	 * Note that we record a different probe id depending on whether
1401 	 * this is a direct or indirect mutex.  This allows us to
1402 	 * penalize only lock groups that have debug/stats enabled
1403 	 * with dtrace processing if desired.
1404 	 */
1405 #if LOCKS_INDIRECT_ALLOW
1406 	if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
1407 		LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ADAPTIVE_SPIN, lock,
1408 		    mach_absolute_time() - start_time);
1409 	} else {
1410 		LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ADAPTIVE_SPIN, lock,
1411 		    mach_absolute_time() - start_time);
1412 	}
1413 #else
1414 	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ADAPTIVE_SPIN, lock,
1415 	    mach_absolute_time() - start_time);
1416 #endif /* LOCKS_INDIRECT_ALLOW */
1417 	/* The lockstat acquire event is recorded by the caller. */
1418 #endif
1419 
1420 	state = ordered_load_mtx(lock);
1421 
1422 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
1423 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
1424 	if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
1425 		/* We must own either the lock or the interlock on return. */
1426 		interlock_lock(lock);
1427 	}
1428 
1429 	return retval;
1430 }
1431 
1432 
1433 /*
1434  *	Common code for mutex locking as spinlock
1435  */
1436 static inline void
lck_mtx_lock_spin_internal(lck_mtx_t * lock,boolean_t allow_held_as_mutex)1437 lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
1438 {
1439 	uintptr_t       state;
1440 #if CONFIG_DTRACE
1441 	bool stat_enabled = false;
1442 	uint64_t start_time = 0;
1443 
1444 	if (interlock_try(lock)) {
1445 		goto interlock_locked;
1446 	}
1447 
1448 #if LOCKS_INDIRECT_ALLOW
1449 	bool indirect = (lock->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
1450 
1451 	if ((lockstat_probemap[LS_LCK_MTX_LOCK_SPIN_SPIN] && !indirect) ||
1452 	    (lockstat_probemap[LS_LCK_MTX_EXT_LOCK_SPIN_SPIN] && indirect))
1453 #else
1454 	if (lockstat_probemap[LS_LCK_MTX_LOCK_SPIN_SPIN])
1455 #endif /* LOCKS_INDIRECT_ALLOW */
1456 	{
1457 		stat_enabled = true;
1458 		start_time = mach_absolute_time();
1459 	}
1460 #endif /* CONFIG_DTRACE */
1461 
1462 	interlock_lock(lock);
1463 
1464 #if CONFIG_DTRACE
1465 	if (stat_enabled) {
1466 #if LOCKS_INDIRECT_ALLOW
1467 		if (indirect) {
1468 			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN_SPIN, lock,
1469 			    mach_absolute_time() - start_time);
1470 		} else
1471 #endif /* LOCKS_INDIRECT_ALLOW */
1472 		{
1473 			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_SPIN, lock,
1474 			    mach_absolute_time() - start_time);
1475 		}
1476 	}
1477 
1478 interlock_locked:
1479 #endif /* CONFIG_DTRACE */
1480 
1481 	state = ordered_load_mtx(lock);
1482 	if (LCK_MTX_STATE_TO_THREAD(state)) {
1483 		if (allow_held_as_mutex) {
1484 			lck_mtx_lock_contended(lock, current_thread(), TRUE);
1485 		} else {
1486 			// "Always" variants can never block. If the lock is held and blocking is not allowed
1487 			// then someone is mixing always and non-always calls on the same lock, which is
1488 			// forbidden.
1489 			panic("Attempting to block on a lock taken as spin-always %p", lock);
1490 		}
1491 		return;
1492 	}
1493 	state &= ARM_LCK_WAITERS;                                               // Preserve waiters bit
1494 	state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);        // Add spin tag and maintain interlock
1495 	ordered_store_mtx(lock, state);
1496 	load_memory_barrier();
1497 
1498 #if     CONFIG_DTRACE
1499 	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
1500 #endif /* CONFIG_DTRACE */
1501 }
1502 
1503 /*
1504  *	Routine:	lck_mtx_lock_spin
1505  */
1506 void
lck_mtx_lock_spin(lck_mtx_t * lock)1507 lck_mtx_lock_spin(lck_mtx_t *lock)
1508 {
1509 	lck_mtx_check_preemption(lock);
1510 	lck_mtx_lock_spin_internal(lock, TRUE);
1511 }
1512 
1513 /*
1514  *	Routine:	lck_mtx_lock_spin_always
1515  */
1516 void
lck_mtx_lock_spin_always(lck_mtx_t * lock)1517 lck_mtx_lock_spin_always(lck_mtx_t *lock)
1518 {
1519 	lck_mtx_lock_spin_internal(lock, FALSE);
1520 }
1521 
1522 /*
1523  *	Routine:	lck_mtx_try_lock
1524  */
1525 boolean_t
lck_mtx_try_lock(lck_mtx_t * lock)1526 lck_mtx_try_lock(lck_mtx_t *lock)
1527 {
1528 	thread_t        thread = current_thread();
1529 
1530 	lck_mtx_verify(lock);
1531 	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
1532 	    0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
1533 #if     CONFIG_DTRACE
1534 		LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
1535 #endif /* CONFIG_DTRACE */
1536 		return TRUE;
1537 	}
1538 	return lck_mtx_try_lock_contended(lock, thread);
1539 }
1540 
1541 static boolean_t NOINLINE
lck_mtx_try_lock_contended(lck_mtx_t * lock,thread_t thread)1542 lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
1543 {
1544 	thread_t        holding_thread;
1545 	uintptr_t       state;
1546 	int             waiters;
1547 
1548 	if (!interlock_try(lock)) {
1549 		return FALSE;
1550 	}
1551 	state = ordered_load_mtx(lock);
1552 	holding_thread = LCK_MTX_STATE_TO_THREAD(state);
1553 	if (holding_thread) {
1554 		interlock_unlock(lock);
1555 		return FALSE;
1556 	}
1557 	waiters = lck_mtx_lock_acquire(lock, NULL);
1558 	state = LCK_MTX_THREAD_TO_STATE(thread);
1559 	if (waiters != 0) {
1560 		state |= ARM_LCK_WAITERS;
1561 	}
1562 	state |= LCK_ILOCK;                             // Preserve interlock
1563 	ordered_store_mtx(lock, state); // Set ownership
1564 	interlock_unlock(lock);                 // Release interlock, enable preemption
1565 	load_memory_barrier();
1566 
1567 	turnstile_cleanup();
1568 
1569 #if     CONFIG_DTRACE
1570 	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
1571 #endif /* CONFIG_DTRACE */
1572 
1573 	return TRUE;
1574 }
1575 
1576 static inline boolean_t
lck_mtx_try_lock_spin_internal(lck_mtx_t * lock,boolean_t allow_held_as_mutex)1577 lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
1578 {
1579 	uintptr_t       state;
1580 
1581 	if (!interlock_try(lock)) {
1582 		return FALSE;
1583 	}
1584 	state = ordered_load_mtx(lock);
1585 	if (LCK_MTX_STATE_TO_THREAD(state)) {
1586 		// Lock is held as mutex
1587 		if (allow_held_as_mutex) {
1588 			interlock_unlock(lock);
1589 		} else {
1590 			// "Always" variants can never block. If the lock is held as a normal mutex
1591 			// then someone is mixing always and non-always calls on the same lock, which is
1592 			// forbidden.
1593 			panic("Spin-mutex held as full mutex %p", lock);
1594 		}
1595 		return FALSE;
1596 	}
1597 	state &= ARM_LCK_WAITERS;                                               // Preserve waiters bit
1598 	state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);        // Add spin tag and maintain interlock
1599 	ordered_store_mtx(lock, state);
1600 	load_memory_barrier();
1601 
1602 #if     CONFIG_DTRACE
1603 	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_SPIN_ACQUIRE, lock, 0);
1604 #endif /* CONFIG_DTRACE */
1605 	return TRUE;
1606 }
1607 
1608 /*
1609  *	Routine: lck_mtx_try_lock_spin
1610  */
1611 boolean_t
lck_mtx_try_lock_spin(lck_mtx_t * lock)1612 lck_mtx_try_lock_spin(lck_mtx_t *lock)
1613 {
1614 	return lck_mtx_try_lock_spin_internal(lock, TRUE);
1615 }
1616 
1617 /*
1618  *	Routine: lck_mtx_try_lock_spin_always
1619  */
1620 boolean_t
lck_mtx_try_lock_spin_always(lck_mtx_t * lock)1621 lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
1622 {
1623 	return lck_mtx_try_lock_spin_internal(lock, FALSE);
1624 }
1625 
1626 
1627 
1628 /*
1629  *	Routine:	lck_mtx_unlock
1630  */
1631 void
lck_mtx_unlock(lck_mtx_t * lock)1632 lck_mtx_unlock(lck_mtx_t *lock)
1633 {
1634 	thread_t        thread = current_thread();
1635 	uintptr_t       state;
1636 	boolean_t       ilk_held = FALSE;
1637 
1638 	lck_mtx_verify(lock);
1639 
1640 	state = ordered_load_mtx(lock);
1641 	if (state & LCK_ILOCK) {
1642 		if (LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) {
1643 			ilk_held = TRUE;        // Interlock is held by (presumably) this thread
1644 		}
1645 		goto slow_case;
1646 	}
1647 	// Locked as a mutex
1648 	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
1649 	    LCK_MTX_THREAD_TO_STATE(thread), 0, release)) {
1650 #if     CONFIG_DTRACE
1651 		LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
1652 #endif /* CONFIG_DTRACE */
1653 		return;
1654 	}
1655 slow_case:
1656 	lck_mtx_unlock_contended(lock, thread, ilk_held);
1657 }
1658 
1659 static void NOINLINE
lck_mtx_unlock_contended(lck_mtx_t * lock,thread_t thread,boolean_t ilk_held)1660 lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
1661 {
1662 	uintptr_t       state;
1663 	boolean_t               cleanup = FALSE;
1664 
1665 	if (ilk_held) {
1666 		state = ordered_load_mtx(lock);
1667 	} else {
1668 		interlock_lock(lock);
1669 		state = ordered_load_mtx(lock);
1670 		if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
1671 			panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
1672 		}
1673 		if (state & ARM_LCK_WAITERS) {
1674 			if (lck_mtx_unlock_wakeup(lock, thread)) {
1675 				state = ARM_LCK_WAITERS;
1676 			} else {
1677 				state = 0;
1678 			}
1679 			cleanup = TRUE;
1680 			goto unlock;
1681 		}
1682 	}
1683 	state &= ARM_LCK_WAITERS;   /* Clear state, retain waiters bit */
1684 unlock:
1685 	state |= LCK_ILOCK;
1686 	ordered_store_mtx(lock, state);
1687 	interlock_unlock(lock);
1688 	if (cleanup) {
1689 		/*
1690 		 * Do not do any turnstile operations outside of this block.
1691 		 * lock/unlock is called at early stage of boot with single thread,
1692 		 * when turnstile is not yet initialized.
1693 		 * Even without contention we can come throught the slow path
1694 		 * if the mutex is acquired as a spin lock.
1695 		 */
1696 		turnstile_cleanup();
1697 	}
1698 
1699 #if     CONFIG_DTRACE
1700 	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
1701 #endif /* CONFIG_DTRACE */
1702 }
1703 
1704 /*
1705  *	Routine:	lck_mtx_assert
1706  */
1707 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)1708 lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
1709 {
1710 	thread_t        thread, holder;
1711 	uintptr_t       state;
1712 
1713 	state = ordered_load_mtx(lock);
1714 	holder = LCK_MTX_STATE_TO_THREAD(state);
1715 	if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
1716 		// Lock is held in spin mode, owner is unknown.
1717 		return; // Punt
1718 	}
1719 	thread = current_thread();
1720 	if (type == LCK_MTX_ASSERT_OWNED) {
1721 		if (thread != holder) {
1722 			panic("lck_mtx_assert(): mutex (%p) owned", lock);
1723 		}
1724 	} else if (type == LCK_MTX_ASSERT_NOTOWNED) {
1725 		if (thread == holder) {
1726 			panic("lck_mtx_assert(): mutex (%p) not owned", lock);
1727 		}
1728 	} else {
1729 		panic("lck_mtx_assert(): invalid arg (%u)", type);
1730 	}
1731 }
1732 
1733 /*
1734  *	Routine:	lck_mtx_ilk_unlock
1735  */
1736 boolean_t
lck_mtx_ilk_unlock(lck_mtx_t * lock)1737 lck_mtx_ilk_unlock(lck_mtx_t *lock)
1738 {
1739 	interlock_unlock(lock);
1740 	return TRUE;
1741 }
1742 
1743 /*
1744  *	Routine:	lck_mtx_convert_spin
1745  *
1746  *	Convert a mutex held for spin into a held full mutex
1747  */
1748 void
lck_mtx_convert_spin(lck_mtx_t * lock)1749 lck_mtx_convert_spin(lck_mtx_t *lock)
1750 {
1751 	thread_t        thread = current_thread();
1752 	uintptr_t       state;
1753 	int                     waiters;
1754 
1755 	state = ordered_load_mtx(lock);
1756 	if (LCK_MTX_STATE_TO_THREAD(state) == thread) {
1757 		return;         // Already owned as mutex, return
1758 	}
1759 	if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) {
1760 		panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
1761 	}
1762 	state &= ~(LCK_MTX_THREAD_MASK);                // Clear the spin tag
1763 	ordered_store_mtx(lock, state);
1764 	waiters = lck_mtx_lock_acquire(lock, NULL);   // Acquire to manage priority boosts
1765 	state = LCK_MTX_THREAD_TO_STATE(thread);
1766 	if (waiters != 0) {
1767 		state |= ARM_LCK_WAITERS;
1768 	}
1769 	state |= LCK_ILOCK;
1770 	ordered_store_mtx(lock, state);                 // Set ownership
1771 	interlock_unlock(lock);                                 // Release interlock, enable preemption
1772 	turnstile_cleanup();
1773 }
1774 
1775 
1776 /*
1777  *      Routine:        lck_mtx_destroy
1778  */
1779 void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)1780 lck_mtx_destroy(
1781 	lck_mtx_t * lck,
1782 	lck_grp_t * grp)
1783 {
1784 	if (lck->lck_mtx_type != LCK_MTX_TYPE) {
1785 		panic("Destroying invalid mutex %p", lck);
1786 	}
1787 	if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
1788 		panic("Destroying previously destroyed lock %p", lck);
1789 	}
1790 	lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1791 	lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
1792 	lck_grp_deallocate(grp, &grp->lck_grp_mtxcnt);
1793 }
1794 
1795 /*
1796  *	Routine:	lck_spin_assert
1797  */
1798 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)1799 lck_spin_assert(lck_spin_t *lock, unsigned int type)
1800 {
1801 	thread_t        thread, holder;
1802 	uintptr_t       state;
1803 
1804 	if (lock->type != LCK_SPIN_TYPE) {
1805 		panic("Invalid spinlock %p", lock);
1806 	}
1807 
1808 	state = lock->lck_spin_data;
1809 	holder = (thread_t)(state & ~LCK_ILOCK);
1810 	thread = current_thread();
1811 	if (type == LCK_ASSERT_OWNED) {
1812 		if (holder == 0) {
1813 			panic("Lock not owned %p = %lx", lock, state);
1814 		}
1815 		if (holder != thread) {
1816 			panic("Lock not owned by current thread %p = %lx", lock, state);
1817 		}
1818 		if ((state & LCK_ILOCK) == 0) {
1819 			panic("Lock bit not set %p = %lx", lock, state);
1820 		}
1821 	} else if (type == LCK_ASSERT_NOTOWNED) {
1822 		if (holder != 0) {
1823 			if (holder == thread) {
1824 				panic("Lock owned by current thread %p = %lx", lock, state);
1825 			}
1826 		}
1827 	} else {
1828 		panic("lck_spin_assert(): invalid arg (%u)", type);
1829 	}
1830 }
1831 
1832 /*
1833  * Routine: kdp_lck_mtx_lock_spin_is_acquired
1834  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1835  */
1836 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)1837 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
1838 {
1839 	uintptr_t       state;
1840 
1841 	if (not_in_kdp) {
1842 		panic("panic: spinlock acquired check done outside of kernel debugger");
1843 	}
1844 	state = ordered_load_mtx(lck);
1845 	if (state == LCK_MTX_TAG_DESTROYED) {
1846 		return FALSE;
1847 	}
1848 	if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK)) {
1849 		return TRUE;
1850 	}
1851 	return FALSE;
1852 }
1853 
1854 void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)1855 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1856 {
1857 	lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
1858 	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
1859 	uintptr_t state   = ordered_load_mtx(mutex);
1860 	thread_t holder   = LCK_MTX_STATE_TO_THREAD(state);
1861 	if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
1862 		waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
1863 	} else {
1864 		assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
1865 #if LOCKS_INDIRECT_ALLOW
1866 		assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
1867 #endif /* LOCKS_INDIRECT_ALLOW */
1868 		waitinfo->owner = thread_tid(holder);
1869 	}
1870 }
1871