xref: /xnu-8019.80.24/osfmk/arm/locks_arm.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2007-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33  * Mellon University All Rights Reserved.
34  *
35  * Permission to use, copy, modify and distribute this software and its
36  * documentation is hereby granted, provided that both the copyright notice
37  * and this permission notice appear in all copies of the software,
38  * derivative works or modified versions, and any portions thereof, and that
39  * both notices appear in supporting documentation.
40  *
41  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42  * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43  * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44  *
45  * Carnegie Mellon requests users of this software to return to
46  *
47  * Software Distribution Coordinator  or  [email protected]
48  * School of Computer Science Carnegie Mellon University Pittsburgh PA
49  * 15213-3890
50  *
51  * any improvements or extensions that they make and grant Carnegie Mellon the
52  * rights to redistribute these changes.
53  */
54 /*
55  *	File:	kern/lock.c
56  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
57  *	Date:	1985
58  *
59  *	Locking primitives implementation
60  */
61 
62 #define LOCK_PRIVATE 1
63 
64 #include <mach_ldebug.h>
65 
66 #include <mach/machine/sdt.h>
67 
68 #include <kern/zalloc.h>
69 #include <kern/lock_stat.h>
70 #include <kern/locks.h>
71 #include <kern/misc_protos.h>
72 #include <kern/thread.h>
73 #include <kern/processor.h>
74 #include <kern/sched_hygiene.h>
75 #include <kern/sched_prim.h>
76 #include <kern/debug.h>
77 #include <kern/kcdata.h>
78 #include <kern/percpu.h>
79 #include <string.h>
80 #include <arm/cpu_internal.h>
81 #include <os/hash.h>
82 #include <arm/cpu_data.h>
83 
84 #include <arm/cpu_data_internal.h>
85 #include <arm/proc_reg.h>
86 #include <arm/smp.h>
87 #include <machine/atomic.h>
88 #include <machine/machine_cpu.h>
89 
90 #include <pexpert/pexpert.h>
91 
92 #include <sys/kdebug.h>
93 
94 #define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
95 
96 // Panic in tests that check lock usage correctness
97 // These are undesirable when in a panic or a debugger is runnning.
98 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
99 
100 #define ADAPTIVE_SPIN_ENABLE 0x1
101 
102 int lck_mtx_adaptive_spin_mode = ADAPTIVE_SPIN_ENABLE;
103 
104 #define SPINWAIT_OWNER_CHECK_COUNT 4
105 
106 typedef enum {
107 	SPINWAIT_ACQUIRED,     /* Got the lock. */
108 	SPINWAIT_INTERLOCK,    /* Got the interlock, no owner, but caller must finish acquiring the lock. */
109 	SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
110 	SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
111 	SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
112 	SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
113 	SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
114 } spinwait_result_t;
115 
116 #if CONFIG_DTRACE
117 extern machine_timeout32_t dtrace_spin_threshold;
118 #endif
119 
120 /* Forwards */
121 
122 extern unsigned int not_in_kdp;
123 
124 /*
125  *	We often want to know the addresses of the callers
126  *	of the various lock routines.  However, this information
127  *	is only used for debugging and statistics.
128  */
129 typedef void   *pc_t;
130 #define INVALID_PC      ((void *) VM_MAX_KERNEL_ADDRESS)
131 #define INVALID_THREAD  ((void *) VM_MAX_KERNEL_ADDRESS)
132 
133 #ifdef  lint
134 /*
135  *	Eliminate lint complaints about unused local pc variables.
136  */
137 #define OBTAIN_PC(pc, l) ++pc
138 #else                           /* lint */
139 #define OBTAIN_PC(pc, l)
140 #endif                          /* lint */
141 
142 
143 /*
144  *	Portable lock package implementation of usimple_locks.
145  */
146 
147 /*
148  * Owner thread pointer when lock held in spin mode
149  */
150 #define LCK_MTX_SPIN_TAG  0xfffffff0
151 
152 
153 #define interlock_lock(lock)    hw_lock_bit    ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
154 #define interlock_try(lock)             hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT, LCK_GRP_NULL)
155 #define interlock_unlock(lock)  hw_unlock_bit  ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
156 #define load_memory_barrier()   os_atomic_thread_fence(acquire)
157 
158 // Enforce program order of loads and stores.
159 #define ordered_load(target) \
160 	        os_atomic_load(target, compiler_acq_rel)
161 #define ordered_store(target, value) \
162 	        os_atomic_store(target, value, compiler_acq_rel)
163 
164 #define ordered_load_mtx(lock)                  ordered_load(&(lock)->lck_mtx_data)
165 #define ordered_store_mtx(lock, value)  ordered_store(&(lock)->lck_mtx_data, (value))
166 #define ordered_load_hw(lock)                   ordered_load(&(lock)->lock_data)
167 #define ordered_store_hw(lock, value)   ordered_store(&(lock)->lock_data, (value))
168 #define ordered_load_bit(lock)                  ordered_load((lock))
169 #define ordered_store_bit(lock, value)  ordered_store((lock), (value))
170 
171 
172 // Prevent the compiler from reordering memory operations around this
173 #define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
174 
175 MACHINE_TIMEOUT32(lock_panic_timeout, "lock-panic",
176     0xc00000 /* 12.5 m ticks ~= 524ms with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
177 
178 #define NOINLINE                __attribute__((noinline))
179 
180 
181 #if __arm__
182 #define interrupts_disabled(mask) (mask & PSR_INTMASK)
183 #else
184 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
185 #endif
186 
187 
188 #if __arm__
189 #define enable_fiq()            __asm__ volatile ("cpsie  f" ::: "memory");
190 #define enable_interrupts()     __asm__ volatile ("cpsie if" ::: "memory");
191 #endif
192 
193 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
194 
195 KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
196 
197 KALLOC_TYPE_DEFINE(KT_LCK_MTX_EXT, lck_mtx_ext_t, KT_PRIV_ACCT);
198 
199 #pragma GCC visibility push(hidden)
200 /*
201  * atomic exchange API is a low level abstraction of the operations
202  * to atomically read, modify, and write a pointer.  This abstraction works
203  * for both Intel and ARMv8.1 compare and exchange atomic instructions as
204  * well as the ARM exclusive instructions.
205  *
206  * atomic_exchange_begin() - begin exchange and retrieve current value
207  * atomic_exchange_complete() - conclude an exchange
208  * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
209  */
210 uint32_t
load_exclusive32(uint32_t * target,enum memory_order ord)211 load_exclusive32(uint32_t *target, enum memory_order ord)
212 {
213 	uint32_t        value;
214 
215 #if __arm__
216 	if (_os_atomic_mo_has_release(ord)) {
217 		// Pre-load release barrier
218 		atomic_thread_fence(memory_order_release);
219 	}
220 	value = __builtin_arm_ldrex(target);
221 #else
222 	if (_os_atomic_mo_has_acquire(ord)) {
223 		value = __builtin_arm_ldaex(target);    // ldaxr
224 	} else {
225 		value = __builtin_arm_ldrex(target);    // ldxr
226 	}
227 #endif  // __arm__
228 	return value;
229 }
230 
231 boolean_t
store_exclusive32(uint32_t * target,uint32_t value,enum memory_order ord)232 store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
233 {
234 	boolean_t err;
235 
236 #if __arm__
237 	err = __builtin_arm_strex(value, target);
238 	if (_os_atomic_mo_has_acquire(ord)) {
239 		// Post-store acquire barrier
240 		atomic_thread_fence(memory_order_acquire);
241 	}
242 #else
243 	if (_os_atomic_mo_has_release(ord)) {
244 		err = __builtin_arm_stlex(value, target);       // stlxr
245 	} else {
246 		err = __builtin_arm_strex(value, target);       // stxr
247 	}
248 #endif  // __arm__
249 	return !err;
250 }
251 
252 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)253 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
254 {
255 	uint32_t        val;
256 
257 #if __ARM_ATOMICS_8_1
258 	ord = memory_order_relaxed;
259 #endif
260 	val = load_exclusive32(target, ord);
261 	*previous = val;
262 	return val;
263 }
264 
265 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)266 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
267 {
268 #if __ARM_ATOMICS_8_1
269 	return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
270 #else
271 	(void)previous;         // Previous not needed, monitor is held
272 	return store_exclusive32(target, newval, ord);
273 #endif
274 }
275 
276 void
atomic_exchange_abort(void)277 atomic_exchange_abort(void)
278 {
279 	os_atomic_clear_exclusive();
280 }
281 
282 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)283 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
284 {
285 	uint32_t                value, prev;
286 
287 	for (;;) {
288 		value = atomic_exchange_begin32(target, &prev, ord);
289 		if (value & test_mask) {
290 			if (wait) {
291 				wait_for_event();       // Wait with monitor held
292 			} else {
293 				atomic_exchange_abort();        // Clear exclusive monitor
294 			}
295 			return FALSE;
296 		}
297 		value |= set_mask;
298 		if (atomic_exchange_complete32(target, prev, value, ord)) {
299 			return TRUE;
300 		}
301 	}
302 }
303 
304 #pragma GCC visibility pop
305 
306 #if SCHED_PREEMPTION_DISABLE_DEBUG
307 
308 uint64_t PERCPU_DATA(preemption_disable_max_mt);
309 
310 MACHINE_TIMEOUT_WRITEABLE(sched_preemption_disable_threshold_mt, "sched-preemption", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, kprintf_spam_mt_pred);
311 
312 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, sched_preemption_disable_debug_mode,
313     "machine-timeouts",
314     "sched-preemption-disable-mode", /* DT property names have to be 31 chars max */
315     "sched_preemption_disable_debug_mode",
316     SCHED_HYGIENE_MODE_OFF,
317     TUNABLE_DT_CHECK_CHOSEN);
318 
319 static uint32_t const sched_preemption_disable_debug_dbgid = MACHDBG_CODE(DBG_MACH_SCHED, MACH_PREEMPTION_EXPIRED) | DBG_FUNC_NONE;
320 
321 NOINLINE void
_prepare_preemption_disable_measurement(thread_t thread)322 _prepare_preemption_disable_measurement(thread_t thread)
323 {
324 	if (thread->machine.inthandler_timestamp == 0) {
325 		/*
326 		 * Only prepare a measurement if not currently in an interrupt
327 		 * handler.
328 		 *
329 		 * We are only interested in the net duration of disabled
330 		 * preemption, that is: The time in which preemption was
331 		 * disabled, minus the intervals in which any (likely
332 		 * unrelated) interrupts were handled.
333 		 * ml_adjust_preemption_disable_time() will remove those
334 		 * intervals, however we also do not even start measuring
335 		 * preemption disablement if we are already within handling of
336 		 * an interrupt when preemption was disabled (the resulting
337 		 * net time would be 0).
338 		 *
339 		 * Interrupt handling duration is handled separately, and any
340 		 * long intervals of preemption disablement are counted
341 		 * towards that.
342 		 */
343 		thread->machine.preemption_disable_adj_mt = ml_get_speculative_timebase();
344 	}
345 }
346 
347 NOINLINE void
_collect_preemption_disable_measurement(thread_t thread)348 _collect_preemption_disable_measurement(thread_t thread)
349 {
350 	bool istate = ml_set_interrupts_enabled(false);
351 	/*
352 	 * Collect start time and current time with interrupts disabled.
353 	 * Otherwise an interrupt coming in after grabbing the timestamp
354 	 * could spuriously inflate the measurement, because it will
355 	 * adjust preemption_disable_adj_mt only after we already grabbed
356 	 * it.
357 	 *
358 	 * (Even worse if we collected the current time first: Then a
359 	 * subsequent interrupt could adjust preemption_disable_adj_mt to
360 	 * make the duration go negative after subtracting the already
361 	 * grabbed time. With interrupts disabled we don't care much about
362 	 * the order.)
363 	 */
364 
365 	uint64_t const mt = thread->machine.preemption_disable_adj_mt;
366 	uint64_t const now = ml_get_speculative_timebase();
367 
368 	os_compiler_barrier(acq_rel);
369 
370 	ml_set_interrupts_enabled(istate);
371 
372 	int64_t const duration = now - mt;
373 
374 
375 	uint64_t * const max_duration = PERCPU_GET(preemption_disable_max_mt);
376 
377 	if (__improbable(duration > *max_duration)) {
378 		*max_duration = duration;
379 	}
380 
381 	uint64_t const threshold = os_atomic_load(&sched_preemption_disable_threshold_mt, relaxed);
382 	if (__improbable(threshold > 0 && duration >= threshold)) {
383 		if (sched_preemption_disable_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
384 			panic("preemption disable timeout exceeded: %llu >= %llu timebase ticks", duration, threshold);
385 		}
386 
387 		DTRACE_SCHED1(mach_preemption_expired, uint64_t, duration);
388 		if (__improbable(kdebug_debugid_enabled(sched_preemption_disable_debug_dbgid))) {
389 			KDBG(sched_preemption_disable_debug_dbgid, duration);
390 		}
391 	}
392 
393 	thread->machine.preemption_disable_adj_mt = 0;
394 }
395 
396 /*
397  * Skip predicate for sched_preemption_disable, which would trigger
398  * spuriously when kprintf spam is enabled.
399  */
400 bool
kprintf_spam_mt_pred(struct machine_timeout_spec const __unused * spec)401 kprintf_spam_mt_pred(struct machine_timeout_spec const __unused *spec)
402 {
403 	bool const kprintf_spam_enabled = !(disable_kprintf_output || disable_serial_output);
404 	return kprintf_spam_enabled;
405 }
406 
407 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
408 
409 /*
410  * To help _disable_preemption() inline everywhere with LTO,
411  * we keep these nice non inlineable functions as the panic()
412  * codegen setup is quite large and for weird reasons causes a frame.
413  */
414 __abortlike
415 static void
_disable_preemption_overflow(void)416 _disable_preemption_overflow(void)
417 {
418 	panic("Preemption count overflow");
419 }
420 
421 void
_disable_preemption(void)422 _disable_preemption(void)
423 {
424 	thread_t     thread = current_thread();
425 	unsigned int count  = thread->machine.preemption_count;
426 
427 	if (__improbable(++count == 0)) {
428 		_disable_preemption_overflow();
429 	}
430 
431 	os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
432 
433 #if SCHED_PREEMPTION_DISABLE_DEBUG
434 
435 	/*
436 	 * Note that this is not the only place preemption gets disabled,
437 	 * it also gets modified on ISR and PPL entry/exit. Both of those
438 	 * events will be treated specially however, and
439 	 * increment/decrement being paired around their entry/exit means
440 	 * that collection here is not desynced otherwise.
441 	 */
442 
443 	if (count == 1 && sched_preemption_disable_debug_mode) {
444 		_prepare_preemption_disable_measurement(thread);
445 	}
446 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
447 }
448 
449 /*
450  * This variant of _disable_preemption() allows disabling preemption
451  * without taking measurements (and later potentially triggering
452  * actions on those).
453  *
454  * We do this through a separate variant because we do not want to
455  * disturb inlinability of _disable_preemption(). However, in order to
456  * also avoid code duplication, instead of repeating common code we
457  * simply call _disable_preemption() and explicitly abandon any taken
458  * measurement.
459  */
460 void
_disable_preemption_without_measurements(void)461 _disable_preemption_without_measurements(void)
462 {
463 	_disable_preemption();
464 
465 #if SCHED_PREEMPTION_DISABLE_DEBUG
466 	/*
467 	 * Abandon a potential preemption disable measurement. Useful for
468 	 * example for the idle thread, which would just spuriously
469 	 * trigger the threshold while actually idling, which we don't
470 	 * care about.
471 	 */
472 	thread_t t = current_thread();
473 	if (t->machine.preemption_disable_adj_mt != 0) {
474 		t->machine.preemption_disable_adj_mt = 0;
475 	}
476 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
477 }
478 
479 /*
480  * This function checks whether an AST_URGENT has been pended.
481  *
482  * It is called once the preemption has been reenabled, which means the thread
483  * may have been preempted right before this was called, and when this function
484  * actually performs the check, we've changed CPU.
485  *
486  * This race is however benign: the point of AST_URGENT is to trigger a context
487  * switch, so if one happened, there's nothing left to check for, and AST_URGENT
488  * was cleared in the process.
489  *
490  * It follows that this check cannot have false negatives, which allows us
491  * to avoid fiddling with interrupt state for the vast majority of cases
492  * when the check will actually be negative.
493  */
494 static NOINLINE void
kernel_preempt_check(thread_t thread)495 kernel_preempt_check(thread_t thread)
496 {
497 	cpu_data_t *cpu_data_ptr;
498 	long        state;
499 
500 #if __arm__
501 #define INTERRUPT_MASK PSR_IRQF
502 #else   // __arm__
503 #define INTERRUPT_MASK DAIF_IRQF
504 #endif  // __arm__
505 
506 	/*
507 	 * This check is racy and could load from another CPU's pending_ast mask,
508 	 * but as described above, this can't have false negatives.
509 	 */
510 	cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
511 	if (__probable((cpu_data_ptr->cpu_pending_ast & AST_URGENT) == 0)) {
512 		return;
513 	}
514 
515 	/* If interrupts are masked, we can't take an AST here */
516 	state = get_interrupts();
517 	if ((state & INTERRUPT_MASK) == 0) {
518 		disable_interrupts_noread();                    // Disable interrupts
519 
520 		/*
521 		 * Reload cpu_data_ptr: a context switch would cause it to change.
522 		 * Now that interrupts are disabled, this will debounce false positives.
523 		 */
524 		cpu_data_ptr = os_atomic_load(&thread->machine.CpuDatap, compiler_acq_rel);
525 		if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
526 #if __arm__
527 #if __ARM_USER_PROTECT__
528 			uintptr_t up = arm_user_protect_begin(thread);
529 #endif  // __ARM_USER_PROTECT__
530 			enable_fiq();
531 #endif  // __arm__
532 			ast_taken_kernel();                 // Handle urgent AST
533 #if __arm__
534 #if __ARM_USER_PROTECT__
535 			arm_user_protect_end(thread, up, TRUE);
536 #endif  // __ARM_USER_PROTECT__
537 			enable_interrupts();
538 			return;                             // Return early on arm only due to FIQ enabling
539 #endif  // __arm__
540 		}
541 		restore_interrupts(state);              // Enable interrupts
542 	}
543 }
544 
545 /*
546  * To help _enable_preemption() inline everywhere with LTO,
547  * we keep these nice non inlineable functions as the panic()
548  * codegen setup is quite large and for weird reasons causes a frame.
549  */
550 __abortlike
551 static void
_enable_preemption_underflow(void)552 _enable_preemption_underflow(void)
553 {
554 	panic("Preemption count underflow");
555 }
556 
557 void
_enable_preemption(void)558 _enable_preemption(void)
559 {
560 	thread_t     thread = current_thread();
561 	unsigned int count  = thread->machine.preemption_count;
562 
563 	if (__improbable(count == 0)) {
564 		_enable_preemption_underflow();
565 	}
566 	count -= 1;
567 
568 #if SCHED_PREEMPTION_DISABLE_DEBUG
569 	if (count == 0 && thread->machine.preemption_disable_adj_mt != 0) {
570 		_collect_preemption_disable_measurement(thread);
571 	}
572 #endif /* SCHED_PREEMPTION_DISABLE_DEBUG */
573 
574 	os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
575 	if (count == 0) {
576 		kernel_preempt_check(thread);
577 	}
578 
579 	os_compiler_barrier();
580 }
581 
582 int
get_preemption_level(void)583 get_preemption_level(void)
584 {
585 	return current_thread()->machine.preemption_count;
586 }
587 
588 /*
589  *      Routine:        lck_spin_alloc_init
590  */
591 lck_spin_t     *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)592 lck_spin_alloc_init(
593 	lck_grp_t * grp,
594 	lck_attr_t * attr)
595 {
596 	lck_spin_t *lck;
597 
598 	lck = zalloc(KT_LCK_SPIN);
599 	lck_spin_init(lck, grp, attr);
600 	return lck;
601 }
602 
603 /*
604  *      Routine:        lck_spin_free
605  */
606 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)607 lck_spin_free(
608 	lck_spin_t * lck,
609 	lck_grp_t * grp)
610 {
611 	lck_spin_destroy(lck, grp);
612 	zfree(KT_LCK_SPIN, lck);
613 }
614 
615 /*
616  *      Routine:        lck_spin_init
617  */
618 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)619 lck_spin_init(
620 	lck_spin_t * lck,
621 	lck_grp_t * grp,
622 	__unused lck_attr_t * attr)
623 {
624 	lck->type = LCK_SPIN_TYPE;
625 	hw_lock_init(&lck->hwlock);
626 	if (grp) {
627 		lck_grp_reference(grp);
628 		lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
629 	}
630 }
631 
632 /*
633  * arm_usimple_lock is a lck_spin_t without a group or attributes
634  */
635 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)636 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
637 {
638 	lck->type = LCK_SPIN_TYPE;
639 	hw_lock_init(&lck->hwlock);
640 }
641 
642 
643 /*
644  *      Routine:        lck_spin_lock
645  */
646 void
lck_spin_lock(lck_spin_t * lock)647 lck_spin_lock(lck_spin_t *lock)
648 {
649 #if     DEVELOPMENT || DEBUG
650 	if (lock->type != LCK_SPIN_TYPE) {
651 		panic("Invalid spinlock %p", lock);
652 	}
653 #endif  // DEVELOPMENT || DEBUG
654 	hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
655 }
656 
657 void
lck_spin_lock_grp(lck_spin_t * lock,lck_grp_t * grp)658 lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
659 {
660 #pragma unused(grp)
661 #if     DEVELOPMENT || DEBUG
662 	if (lock->type != LCK_SPIN_TYPE) {
663 		panic("Invalid spinlock %p", lock);
664 	}
665 #endif  // DEVELOPMENT || DEBUG
666 	hw_lock_lock(&lock->hwlock, grp);
667 }
668 
669 /*
670  *      Routine:        lck_spin_lock_nopreempt
671  */
672 void
lck_spin_lock_nopreempt(lck_spin_t * lock)673 lck_spin_lock_nopreempt(lck_spin_t *lock)
674 {
675 #if     DEVELOPMENT || DEBUG
676 	if (lock->type != LCK_SPIN_TYPE) {
677 		panic("Invalid spinlock %p", lock);
678 	}
679 #endif  // DEVELOPMENT || DEBUG
680 	hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
681 }
682 
683 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)684 lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
685 {
686 #pragma unused(grp)
687 #if     DEVELOPMENT || DEBUG
688 	if (lock->type != LCK_SPIN_TYPE) {
689 		panic("Invalid spinlock %p", lock);
690 	}
691 #endif  // DEVELOPMENT || DEBUG
692 	hw_lock_lock_nopreempt(&lock->hwlock, grp);
693 }
694 
695 /*
696  *      Routine:        lck_spin_try_lock
697  */
698 int
lck_spin_try_lock(lck_spin_t * lock)699 lck_spin_try_lock(lck_spin_t *lock)
700 {
701 	return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
702 }
703 
704 int
lck_spin_try_lock_grp(lck_spin_t * lock,lck_grp_t * grp)705 lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
706 {
707 #pragma unused(grp)
708 	return hw_lock_try(&lock->hwlock, grp);
709 }
710 
711 /*
712  *      Routine:        lck_spin_try_lock_nopreempt
713  */
714 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)715 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
716 {
717 	return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
718 }
719 
720 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)721 lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
722 {
723 #pragma unused(grp)
724 	return hw_lock_try_nopreempt(&lock->hwlock, grp);
725 }
726 
727 /*
728  *      Routine:        lck_spin_unlock
729  */
730 void
lck_spin_unlock(lck_spin_t * lock)731 lck_spin_unlock(lck_spin_t *lock)
732 {
733 #if     DEVELOPMENT || DEBUG
734 	if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
735 		panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
736 	}
737 	if (lock->type != LCK_SPIN_TYPE) {
738 		panic("Invalid spinlock type %p", lock);
739 	}
740 #endif  // DEVELOPMENT || DEBUG
741 	hw_lock_unlock(&lock->hwlock);
742 }
743 
744 /*
745  *      Routine:        lck_spin_unlock_nopreempt
746  */
747 void
lck_spin_unlock_nopreempt(lck_spin_t * lock)748 lck_spin_unlock_nopreempt(lck_spin_t *lock)
749 {
750 #if     DEVELOPMENT || DEBUG
751 	if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) {
752 		panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
753 	}
754 	if (lock->type != LCK_SPIN_TYPE) {
755 		panic("Invalid spinlock type %p", lock);
756 	}
757 #endif  // DEVELOPMENT || DEBUG
758 	hw_lock_unlock_nopreempt(&lock->hwlock);
759 }
760 
761 /*
762  *      Routine:        lck_spin_destroy
763  */
764 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)765 lck_spin_destroy(
766 	lck_spin_t * lck,
767 	lck_grp_t * grp)
768 {
769 	if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
770 		return;
771 	}
772 	lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
773 	if (grp) {
774 		lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
775 		lck_grp_deallocate(grp);
776 	}
777 }
778 
779 /*
780  * Routine: kdp_lck_spin_is_acquired
781  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
782  */
783 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)784 kdp_lck_spin_is_acquired(lck_spin_t *lck)
785 {
786 	if (not_in_kdp) {
787 		panic("panic: spinlock acquired check done outside of kernel debugger");
788 	}
789 	return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
790 }
791 
792 /*
793  *	Initialize a usimple_lock.
794  *
795  *	No change in preemption state.
796  */
797 void
usimple_lock_init(usimple_lock_t l,unsigned short tag)798 usimple_lock_init(
799 	usimple_lock_t l,
800 	unsigned short tag)
801 {
802 	simple_lock_init((simple_lock_t) l, tag);
803 }
804 
805 
806 /*
807  *	Acquire a usimple_lock.
808  *
809  *	Returns with preemption disabled.  Note
810  *	that the hw_lock routines are responsible for
811  *	maintaining preemption state.
812  */
813 void
814 (usimple_lock)(
815 	usimple_lock_t l
816 	LCK_GRP_ARG(lck_grp_t *grp))
817 {
818 	simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
819 }
820 
821 
822 extern void     sync(void);
823 
824 /*
825  *	Release a usimple_lock.
826  *
827  *	Returns with preemption enabled.  Note
828  *	that the hw_lock routines are responsible for
829  *	maintaining preemption state.
830  */
831 void
832 (usimple_unlock)(
833 	usimple_lock_t l)
834 {
835 	simple_unlock((simple_lock_t)l);
836 }
837 
838 
839 /*
840  *	Conditionally acquire a usimple_lock.
841  *
842  *	On success, returns with preemption disabled.
843  *	On failure, returns with preemption in the same state
844  *	as when first invoked.  Note that the hw_lock routines
845  *	are responsible for maintaining preemption state.
846  *
847  *	XXX No stats are gathered on a miss; I preserved this
848  *	behavior from the original assembly-language code, but
849  *	doesn't it make sense to log misses?  XXX
850  */
851 unsigned
852 int
853 (usimple_lock_try)(
854 	usimple_lock_t l
855 	LCK_GRP_ARG(lck_grp_t *grp))
856 {
857 	return simple_lock_try((simple_lock_t) l, grp);
858 }
859 
860 /*
861  * The C portion of the mutex package.  These routines are only invoked
862  * if the optimized assembler routines can't do the work.
863  */
864 
865 /*
866  * Forward declaration
867  */
868 
869 void
870 lck_mtx_ext_init(
871 	lck_mtx_ext_t * lck,
872 	lck_grp_t * grp,
873 	lck_attr_t * attr);
874 
875 /*
876  *      Routine:        lck_mtx_alloc_init
877  */
878 lck_mtx_t      *
lck_mtx_alloc_init(lck_grp_t * grp,lck_attr_t * attr)879 lck_mtx_alloc_init(
880 	lck_grp_t * grp,
881 	lck_attr_t * attr)
882 {
883 	lck_mtx_t      *lck;
884 
885 	lck = zalloc(KT_LCK_MTX);
886 	lck_mtx_init(lck, grp, attr);
887 	return lck;
888 }
889 
890 /*
891  *      Routine:        lck_mtx_free
892  */
893 void
lck_mtx_free(lck_mtx_t * lck,lck_grp_t * grp)894 lck_mtx_free(
895 	lck_mtx_t * lck,
896 	lck_grp_t * grp)
897 {
898 	lck_mtx_destroy(lck, grp);
899 	zfree(KT_LCK_MTX, lck);
900 }
901 
902 /*
903  *      Routine:        lck_mtx_init
904  */
905 void
lck_mtx_init(lck_mtx_t * lck,lck_grp_t * grp,lck_attr_t * attr)906 lck_mtx_init(
907 	lck_mtx_t * lck,
908 	lck_grp_t * grp,
909 	lck_attr_t * attr)
910 {
911 #ifdef  BER_XXX
912 	lck_mtx_ext_t  *lck_ext;
913 #endif
914 	lck_attr_t     *lck_attr;
915 
916 	if (attr != LCK_ATTR_NULL) {
917 		lck_attr = attr;
918 	} else {
919 		lck_attr = &LockDefaultLckAttr;
920 	}
921 
922 #ifdef  BER_XXX
923 	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
924 		lck_ext = zalloc(KT_LCK_MTX_EXT);
925 		lck_mtx_ext_init(lck_ext, grp, lck_attr);
926 		lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
927 		lck->lck_mtx_ptr = lck_ext;
928 		lck->lck_mtx_type = LCK_MTX_TYPE;
929 	} else
930 #endif
931 	{
932 		*lck = (lck_mtx_t){
933 			.lck_mtx_type = LCK_MTX_TYPE,
934 		};
935 	}
936 	lck_grp_reference(grp);
937 	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
938 }
939 
940 /*
941  *      Routine:        lck_mtx_init_ext
942  */
943 void
lck_mtx_init_ext(lck_mtx_t * lck,lck_mtx_ext_t * lck_ext __unused,lck_grp_t * grp,lck_attr_t * attr)944 lck_mtx_init_ext(
945 	lck_mtx_t * lck,
946 	lck_mtx_ext_t * lck_ext __unused,
947 	lck_grp_t * grp,
948 	lck_attr_t * attr)
949 {
950 	lck_attr_t     *lck_attr;
951 
952 	if (attr != LCK_ATTR_NULL) {
953 		lck_attr = attr;
954 	} else {
955 		lck_attr = &LockDefaultLckAttr;
956 	}
957 
958 #if LOCKS_INDIRECT_ALLOW
959 	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
960 		lck_mtx_ext_init(lck_ext, grp, lck_attr);
961 		lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
962 		lck->lck_mtx_ptr = lck_ext;
963 		lck->lck_mtx_type = LCK_MTX_TYPE;
964 	} else
965 #endif /* LOCKS_INDIRECT_ALLOW */
966 	{
967 		lck->lck_mtx_waiters = 0;
968 		lck->lck_mtx_type = LCK_MTX_TYPE;
969 		ordered_store_mtx(lck, 0);
970 	}
971 	lck_grp_reference(grp);
972 	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
973 }
974 
975 /*
976  *      Routine:        lck_mtx_ext_init
977  */
978 void
lck_mtx_ext_init(lck_mtx_ext_t * lck,lck_grp_t * grp,lck_attr_t * attr)979 lck_mtx_ext_init(
980 	lck_mtx_ext_t * lck,
981 	lck_grp_t * grp,
982 	lck_attr_t * attr)
983 {
984 	bzero((void *) lck, sizeof(lck_mtx_ext_t));
985 
986 	lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
987 
988 	if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
989 		lck->lck_mtx_deb.type = MUTEX_TAG;
990 		lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
991 	}
992 	lck->lck_mtx_grp = grp;
993 
994 	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
995 		lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
996 	}
997 }
998 
999 /* The slow versions */
1000 static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
1001 static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
1002 static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
1003 
1004 /* The adaptive spin function */
1005 static spinwait_result_t lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
1006 
1007 /*
1008  *	Routine:	lck_mtx_verify
1009  *
1010  *	Verify if a mutex is valid
1011  */
1012 static inline void
lck_mtx_verify(lck_mtx_t * lock)1013 lck_mtx_verify(lck_mtx_t *lock)
1014 {
1015 	if (lock->lck_mtx_type != LCK_MTX_TYPE) {
1016 		panic("Invalid mutex %p", lock);
1017 	}
1018 #if     DEVELOPMENT || DEBUG
1019 	if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
1020 		panic("Mutex destroyed %p", lock);
1021 	}
1022 #endif  /* DEVELOPMENT || DEBUG */
1023 }
1024 
1025 /*
1026  *	Routine:	lck_mtx_check_preemption
1027  *
1028  *	Verify preemption is enabled when attempting to acquire a mutex.
1029  */
1030 
1031 static inline void
lck_mtx_check_preemption(lck_mtx_t * lock)1032 lck_mtx_check_preemption(lck_mtx_t *lock)
1033 {
1034 #if     DEVELOPMENT || DEBUG
1035 	if (current_cpu_datap()->cpu_hibernate) {
1036 		return;
1037 	}
1038 
1039 	int pl = get_preemption_level();
1040 
1041 	if (pl != 0) {
1042 		panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
1043 	}
1044 #else
1045 	(void)lock;
1046 #endif
1047 }
1048 
1049 /*
1050  *	Routine:	lck_mtx_lock
1051  */
1052 void
lck_mtx_lock(lck_mtx_t * lock)1053 lck_mtx_lock(lck_mtx_t *lock)
1054 {
1055 	thread_t        thread;
1056 
1057 	lck_mtx_verify(lock);
1058 	lck_mtx_check_preemption(lock);
1059 	thread = current_thread();
1060 	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
1061 	    0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
1062 #if     CONFIG_DTRACE
1063 		LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
1064 #endif /* CONFIG_DTRACE */
1065 		return;
1066 	}
1067 	lck_mtx_lock_contended(lock, thread, FALSE);
1068 }
1069 
1070 /*
1071  *       This is the slow version of mutex locking.
1072  */
1073 static void NOINLINE
lck_mtx_lock_contended(lck_mtx_t * lock,thread_t thread,boolean_t interlocked)1074 lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
1075 {
1076 	thread_t                holding_thread;
1077 	uintptr_t               state;
1078 	int                     waiters = 0;
1079 	spinwait_result_t       sw_res;
1080 	struct turnstile        *ts = NULL;
1081 
1082 	/* Loop waiting until I see that the mutex is unowned */
1083 	for (;;) {
1084 		sw_res = lck_mtx_lock_contended_spinwait_arm(lock, thread, interlocked);
1085 		interlocked = FALSE;
1086 
1087 		switch (sw_res) {
1088 		case SPINWAIT_ACQUIRED:
1089 			if (ts != NULL) {
1090 				interlock_lock(lock);
1091 				turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1092 				interlock_unlock(lock);
1093 			}
1094 			goto done;
1095 		case SPINWAIT_INTERLOCK:
1096 			goto set_owner;
1097 		default:
1098 			break;
1099 		}
1100 
1101 		state = ordered_load_mtx(lock);
1102 		holding_thread = LCK_MTX_STATE_TO_THREAD(state);
1103 		if (holding_thread == NULL) {
1104 			break;
1105 		}
1106 		ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
1107 		lck_mtx_lock_wait(lock, holding_thread, &ts);
1108 		/* returns interlock unlocked */
1109 	}
1110 
1111 set_owner:
1112 	/* Hooray, I'm the new owner! */
1113 	state = ordered_load_mtx(lock);
1114 
1115 	if (state & ARM_LCK_WAITERS) {
1116 		/* Skip lck_mtx_lock_acquire if there are no waiters. */
1117 		waiters = lck_mtx_lock_acquire(lock, ts);
1118 		/*
1119 		 * lck_mtx_lock_acquire will call
1120 		 * turnstile_complete
1121 		 */
1122 	} else {
1123 		if (ts != NULL) {
1124 			turnstile_complete((uintptr_t)lock, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1125 		}
1126 	}
1127 
1128 	state = LCK_MTX_THREAD_TO_STATE(thread);
1129 	if (waiters != 0) {
1130 		state |= ARM_LCK_WAITERS;
1131 	}
1132 	state |= LCK_ILOCK;                             // Preserve interlock
1133 	ordered_store_mtx(lock, state); // Set ownership
1134 	interlock_unlock(lock);                 // Release interlock, enable preemption
1135 
1136 done:
1137 	load_memory_barrier();
1138 
1139 	assert(thread->turnstile != NULL);
1140 
1141 	if (ts != NULL) {
1142 		turnstile_cleanup();
1143 	}
1144 
1145 #if CONFIG_DTRACE
1146 	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
1147 #endif /* CONFIG_DTRACE */
1148 }
1149 
1150 /*
1151  * Routine: lck_mtx_lock_spinwait_arm
1152  *
1153  * Invoked trying to acquire a mutex when there is contention but
1154  * the holder is running on another processor. We spin for up to a maximum
1155  * time waiting for the lock to be released.
1156  */
1157 static spinwait_result_t
lck_mtx_lock_contended_spinwait_arm(lck_mtx_t * lock,thread_t thread,boolean_t interlocked)1158 lck_mtx_lock_contended_spinwait_arm(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
1159 {
1160 	int                     has_interlock = (int)interlocked;
1161 	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1162 	thread_t        owner, prev_owner;
1163 	uint64_t        window_deadline, sliding_deadline, high_deadline;
1164 	uint64_t        start_time, cur_time, avg_hold_time, bias, delta;
1165 	int             loopcount = 0;
1166 	uint            i, prev_owner_cpu;
1167 	int             total_hold_time_samples, window_hold_time_samples, unfairness;
1168 	bool            owner_on_core, adjust;
1169 	uintptr_t       state, new_state, waiters;
1170 	spinwait_result_t       retval = SPINWAIT_DID_SPIN_HIGH_THR;
1171 
1172 	if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
1173 		if (!has_interlock) {
1174 			interlock_lock(lock);
1175 		}
1176 
1177 		return SPINWAIT_DID_NOT_SPIN;
1178 	}
1179 
1180 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
1181 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
1182 
1183 	start_time = mach_absolute_time();
1184 	/*
1185 	 * window_deadline represents the "learning" phase.
1186 	 * The thread collects statistics about the lock during
1187 	 * window_deadline and then it makes a decision on whether to spin more
1188 	 * or block according to the concurrency behavior
1189 	 * observed.
1190 	 *
1191 	 * Every thread can spin at least low_MutexSpin.
1192 	 */
1193 	window_deadline = start_time + low_MutexSpin;
1194 	/*
1195 	 * Sliding_deadline is the adjusted spin deadline
1196 	 * computed after the "learning" phase.
1197 	 */
1198 	sliding_deadline = window_deadline;
1199 	/*
1200 	 * High_deadline is a hard deadline. No thread
1201 	 * can spin more than this deadline.
1202 	 */
1203 	if (high_MutexSpin >= 0) {
1204 		high_deadline = start_time + high_MutexSpin;
1205 	} else {
1206 		high_deadline = start_time + low_MutexSpin * real_ncpus;
1207 	}
1208 
1209 	/*
1210 	 * Do not know yet which is the owner cpu.
1211 	 * Initialize prev_owner_cpu with next cpu.
1212 	 */
1213 	prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
1214 	total_hold_time_samples = 0;
1215 	window_hold_time_samples = 0;
1216 	avg_hold_time = 0;
1217 	adjust = TRUE;
1218 	bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
1219 
1220 	/* Snoop the lock state */
1221 	state = ordered_load_mtx(lock);
1222 	owner = LCK_MTX_STATE_TO_THREAD(state);
1223 	prev_owner = owner;
1224 
1225 	if (has_interlock) {
1226 		if (owner == NULL) {
1227 			retval = SPINWAIT_INTERLOCK;
1228 			goto done_spinning;
1229 		} else {
1230 			/*
1231 			 * We are holding the interlock, so
1232 			 * we can safely dereference owner.
1233 			 */
1234 			if (!machine_thread_on_core(owner) || (owner->state & TH_IDLE)) {
1235 				retval = SPINWAIT_DID_NOT_SPIN;
1236 				goto done_spinning;
1237 			}
1238 		}
1239 		interlock_unlock(lock);
1240 		has_interlock = 0;
1241 	}
1242 
1243 	/*
1244 	 * Spin while:
1245 	 *   - mutex is locked, and
1246 	 *   - it's locked as a spin lock, and
1247 	 *   - owner is running on another processor, and
1248 	 *   - we haven't spun for long enough.
1249 	 */
1250 	do {
1251 		/*
1252 		 * Try to acquire the lock.
1253 		 */
1254 		owner = LCK_MTX_STATE_TO_THREAD(state);
1255 		if (owner == NULL) {
1256 			waiters = state & ARM_LCK_WAITERS;
1257 			if (waiters) {
1258 				/*
1259 				 * preserve the waiter bit
1260 				 * and try acquire the interlock.
1261 				 * Note: we will successfully acquire
1262 				 * the interlock only if we can also
1263 				 * acquire the lock.
1264 				 */
1265 				new_state = ARM_LCK_WAITERS | LCK_ILOCK;
1266 				has_interlock = 1;
1267 				retval = SPINWAIT_INTERLOCK;
1268 				disable_preemption();
1269 			} else {
1270 				new_state = LCK_MTX_THREAD_TO_STATE(thread);
1271 				retval = SPINWAIT_ACQUIRED;
1272 			}
1273 
1274 			/*
1275 			 * The cmpxchg will succed only if the lock
1276 			 * is not owned (doesn't have an owner set)
1277 			 * and it is not interlocked.
1278 			 * It will not fail if there are waiters.
1279 			 */
1280 			if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
1281 			    waiters, new_state, &state, acquire)) {
1282 				goto done_spinning;
1283 			} else {
1284 				if (waiters) {
1285 					has_interlock = 0;
1286 					enable_preemption();
1287 				}
1288 			}
1289 		}
1290 
1291 		cur_time = mach_absolute_time();
1292 
1293 		/*
1294 		 * Never spin past high_deadline.
1295 		 */
1296 		if (cur_time >= high_deadline) {
1297 			retval = SPINWAIT_DID_SPIN_HIGH_THR;
1298 			break;
1299 		}
1300 
1301 		/*
1302 		 * Check if owner is on core. If not block.
1303 		 */
1304 		owner = LCK_MTX_STATE_TO_THREAD(state);
1305 		if (owner) {
1306 			i = prev_owner_cpu;
1307 			owner_on_core = FALSE;
1308 
1309 			disable_preemption();
1310 			state = ordered_load_mtx(lock);
1311 			owner = LCK_MTX_STATE_TO_THREAD(state);
1312 
1313 			/*
1314 			 * For scalability we want to check if the owner is on core
1315 			 * without locking the mutex interlock.
1316 			 * If we do not lock the mutex interlock, the owner that we see might be
1317 			 * invalid, so we cannot dereference it. Therefore we cannot check
1318 			 * any field of the thread to tell us if it is on core.
1319 			 * Check if the thread that is running on the other cpus matches the owner.
1320 			 */
1321 			if (owner) {
1322 				do {
1323 					cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
1324 					if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
1325 						owner_on_core = TRUE;
1326 						break;
1327 					}
1328 					if (++i >= real_ncpus) {
1329 						i = 0;
1330 					}
1331 				} while (i != prev_owner_cpu);
1332 				enable_preemption();
1333 
1334 				if (owner_on_core) {
1335 					prev_owner_cpu = i;
1336 				} else {
1337 					prev_owner = owner;
1338 					state = ordered_load_mtx(lock);
1339 					owner = LCK_MTX_STATE_TO_THREAD(state);
1340 					if (owner == prev_owner) {
1341 						/*
1342 						 * Owner is not on core.
1343 						 * Stop spinning.
1344 						 */
1345 						if (loopcount == 0) {
1346 							retval = SPINWAIT_DID_NOT_SPIN;
1347 						} else {
1348 							retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
1349 						}
1350 						break;
1351 					}
1352 					/*
1353 					 * Fall through if the owner changed while we were scanning.
1354 					 * The new owner could potentially be on core, so loop
1355 					 * again.
1356 					 */
1357 				}
1358 			} else {
1359 				enable_preemption();
1360 			}
1361 		}
1362 
1363 		/*
1364 		 * Save how many times we see the owner changing.
1365 		 * We can roughly estimate the the mutex hold
1366 		 * time and the fairness with that.
1367 		 */
1368 		if (owner != prev_owner) {
1369 			prev_owner = owner;
1370 			total_hold_time_samples++;
1371 			window_hold_time_samples++;
1372 		}
1373 
1374 		/*
1375 		 * Learning window expired.
1376 		 * Try to adjust the sliding_deadline.
1377 		 */
1378 		if (cur_time >= window_deadline) {
1379 			/*
1380 			 * If there was not contention during the window
1381 			 * stop spinning.
1382 			 */
1383 			if (window_hold_time_samples < 1) {
1384 				retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
1385 				break;
1386 			}
1387 
1388 			if (adjust) {
1389 				/*
1390 				 * For a fair lock, we'd wait for at most (NCPU-1) periods,
1391 				 * but the lock is unfair, so let's try to estimate by how much.
1392 				 */
1393 				unfairness = total_hold_time_samples / real_ncpus;
1394 
1395 				if (unfairness == 0) {
1396 					/*
1397 					 * We observed the owner changing `total_hold_time_samples` times which
1398 					 * let us estimate the average hold time of this mutex for the duration
1399 					 * of the spin time.
1400 					 * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
1401 					 *
1402 					 * In this case spin at max avg_hold_time * (real_ncpus - 1)
1403 					 */
1404 					delta = cur_time - start_time;
1405 					sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
1406 				} else {
1407 					/*
1408 					 * In this case at least one of the other cpus was able to get the lock twice
1409 					 * while I was spinning.
1410 					 * We could spin longer but it won't necessarily help if the system is unfair.
1411 					 * Try to randomize the wait to reduce contention.
1412 					 *
1413 					 * We compute how much time we could potentially spin
1414 					 * and distribute it over the cpus.
1415 					 *
1416 					 * bias is an integer between 0 and real_ncpus.
1417 					 * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
1418 					 */
1419 					delta = high_deadline - cur_time;
1420 					sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
1421 					adjust = FALSE;
1422 				}
1423 			}
1424 
1425 			window_deadline += low_MutexSpin;
1426 			window_hold_time_samples = 0;
1427 		}
1428 
1429 		/*
1430 		 * Stop spinning if we past
1431 		 * the adjusted deadline.
1432 		 */
1433 		if (cur_time >= sliding_deadline) {
1434 			retval = SPINWAIT_DID_SPIN_SLIDING_THR;
1435 			break;
1436 		}
1437 
1438 		/*
1439 		 * We want to arm the monitor for wfe,
1440 		 * so load exclusively the lock.
1441 		 *
1442 		 * NOTE:
1443 		 * we rely on the fact that wfe will
1444 		 * eventually return even if the cache line
1445 		 * is not modified. This way we will keep
1446 		 * looping and checking if the deadlines expired.
1447 		 */
1448 		state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
1449 		owner = LCK_MTX_STATE_TO_THREAD(state);
1450 		if (owner != NULL) {
1451 			wait_for_event();
1452 			state = ordered_load_mtx(lock);
1453 		} else {
1454 			atomic_exchange_abort();
1455 		}
1456 
1457 		loopcount++;
1458 	} while (TRUE);
1459 
1460 done_spinning:
1461 #if     CONFIG_DTRACE
1462 	/*
1463 	 * Note that we record a different probe id depending on whether
1464 	 * this is a direct or indirect mutex.  This allows us to
1465 	 * penalize only lock groups that have debug/stats enabled
1466 	 * with dtrace processing if desired.
1467 	 */
1468 #if LOCKS_INDIRECT_ALLOW
1469 	if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
1470 		LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
1471 		    mach_absolute_time() - start_time);
1472 	} else
1473 #endif /* LOCKS_INDIRECT_ALLOW */
1474 	{
1475 		LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
1476 		    mach_absolute_time() - start_time);
1477 	}
1478 	/* The lockstat acquire event is recorded by the caller. */
1479 #endif
1480 
1481 	state = ordered_load_mtx(lock);
1482 
1483 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
1484 	    trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, retval, 0);
1485 	if ((!has_interlock) && (retval != SPINWAIT_ACQUIRED)) {
1486 		/* We must own either the lock or the interlock on return. */
1487 		interlock_lock(lock);
1488 	}
1489 
1490 	return retval;
1491 }
1492 
1493 
1494 /*
1495  *	Common code for mutex locking as spinlock
1496  */
1497 static inline void
lck_mtx_lock_spin_internal(lck_mtx_t * lock,boolean_t allow_held_as_mutex)1498 lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
1499 {
1500 	uintptr_t       state;
1501 
1502 	interlock_lock(lock);
1503 	state = ordered_load_mtx(lock);
1504 	if (LCK_MTX_STATE_TO_THREAD(state)) {
1505 		if (allow_held_as_mutex) {
1506 			lck_mtx_lock_contended(lock, current_thread(), TRUE);
1507 		} else {
1508 			// "Always" variants can never block. If the lock is held and blocking is not allowed
1509 			// then someone is mixing always and non-always calls on the same lock, which is
1510 			// forbidden.
1511 			panic("Attempting to block on a lock taken as spin-always %p", lock);
1512 		}
1513 		return;
1514 	}
1515 	state &= ARM_LCK_WAITERS;                                               // Preserve waiters bit
1516 	state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);        // Add spin tag and maintain interlock
1517 	ordered_store_mtx(lock, state);
1518 	load_memory_barrier();
1519 
1520 #if     CONFIG_DTRACE
1521 	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
1522 #endif /* CONFIG_DTRACE */
1523 }
1524 
1525 /*
1526  *	Routine:	lck_mtx_lock_spin
1527  */
1528 void
lck_mtx_lock_spin(lck_mtx_t * lock)1529 lck_mtx_lock_spin(lck_mtx_t *lock)
1530 {
1531 	lck_mtx_check_preemption(lock);
1532 	lck_mtx_lock_spin_internal(lock, TRUE);
1533 }
1534 
1535 /*
1536  *	Routine:	lck_mtx_lock_spin_always
1537  */
1538 void
lck_mtx_lock_spin_always(lck_mtx_t * lock)1539 lck_mtx_lock_spin_always(lck_mtx_t *lock)
1540 {
1541 	lck_mtx_lock_spin_internal(lock, FALSE);
1542 }
1543 
1544 /*
1545  *	Routine:	lck_mtx_try_lock
1546  */
1547 boolean_t
lck_mtx_try_lock(lck_mtx_t * lock)1548 lck_mtx_try_lock(lck_mtx_t *lock)
1549 {
1550 	thread_t        thread = current_thread();
1551 
1552 	lck_mtx_verify(lock);
1553 	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
1554 	    0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
1555 #if     CONFIG_DTRACE
1556 		LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
1557 #endif /* CONFIG_DTRACE */
1558 		return TRUE;
1559 	}
1560 	return lck_mtx_try_lock_contended(lock, thread);
1561 }
1562 
1563 static boolean_t NOINLINE
lck_mtx_try_lock_contended(lck_mtx_t * lock,thread_t thread)1564 lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
1565 {
1566 	thread_t        holding_thread;
1567 	uintptr_t       state;
1568 	int             waiters;
1569 
1570 	interlock_lock(lock);
1571 	state = ordered_load_mtx(lock);
1572 	holding_thread = LCK_MTX_STATE_TO_THREAD(state);
1573 	if (holding_thread) {
1574 		interlock_unlock(lock);
1575 		return FALSE;
1576 	}
1577 	waiters = lck_mtx_lock_acquire(lock, NULL);
1578 	state = LCK_MTX_THREAD_TO_STATE(thread);
1579 	if (waiters != 0) {
1580 		state |= ARM_LCK_WAITERS;
1581 	}
1582 	state |= LCK_ILOCK;                             // Preserve interlock
1583 	ordered_store_mtx(lock, state); // Set ownership
1584 	interlock_unlock(lock);                 // Release interlock, enable preemption
1585 	load_memory_barrier();
1586 
1587 	turnstile_cleanup();
1588 
1589 	return TRUE;
1590 }
1591 
1592 static inline boolean_t
lck_mtx_try_lock_spin_internal(lck_mtx_t * lock,boolean_t allow_held_as_mutex)1593 lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
1594 {
1595 	uintptr_t       state;
1596 
1597 	if (!interlock_try(lock)) {
1598 		return FALSE;
1599 	}
1600 	state = ordered_load_mtx(lock);
1601 	if (LCK_MTX_STATE_TO_THREAD(state)) {
1602 		// Lock is held as mutex
1603 		if (allow_held_as_mutex) {
1604 			interlock_unlock(lock);
1605 		} else {
1606 			// "Always" variants can never block. If the lock is held as a normal mutex
1607 			// then someone is mixing always and non-always calls on the same lock, which is
1608 			// forbidden.
1609 			panic("Spin-mutex held as full mutex %p", lock);
1610 		}
1611 		return FALSE;
1612 	}
1613 	state &= ARM_LCK_WAITERS;                                               // Preserve waiters bit
1614 	state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);        // Add spin tag and maintain interlock
1615 	ordered_store_mtx(lock, state);
1616 	load_memory_barrier();
1617 
1618 #if     CONFIG_DTRACE
1619 	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
1620 #endif /* CONFIG_DTRACE */
1621 	return TRUE;
1622 }
1623 
1624 /*
1625  *	Routine: lck_mtx_try_lock_spin
1626  */
1627 boolean_t
lck_mtx_try_lock_spin(lck_mtx_t * lock)1628 lck_mtx_try_lock_spin(lck_mtx_t *lock)
1629 {
1630 	return lck_mtx_try_lock_spin_internal(lock, TRUE);
1631 }
1632 
1633 /*
1634  *	Routine: lck_mtx_try_lock_spin_always
1635  */
1636 boolean_t
lck_mtx_try_lock_spin_always(lck_mtx_t * lock)1637 lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
1638 {
1639 	return lck_mtx_try_lock_spin_internal(lock, FALSE);
1640 }
1641 
1642 
1643 
1644 /*
1645  *	Routine:	lck_mtx_unlock
1646  */
1647 void
lck_mtx_unlock(lck_mtx_t * lock)1648 lck_mtx_unlock(lck_mtx_t *lock)
1649 {
1650 	thread_t        thread = current_thread();
1651 	uintptr_t       state;
1652 	boolean_t       ilk_held = FALSE;
1653 
1654 	lck_mtx_verify(lock);
1655 
1656 	state = ordered_load_mtx(lock);
1657 	if (state & LCK_ILOCK) {
1658 		if (LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) {
1659 			ilk_held = TRUE;        // Interlock is held by (presumably) this thread
1660 		}
1661 		goto slow_case;
1662 	}
1663 	// Locked as a mutex
1664 	if (os_atomic_cmpxchg(&lock->lck_mtx_data,
1665 	    LCK_MTX_THREAD_TO_STATE(thread), 0, release)) {
1666 #if     CONFIG_DTRACE
1667 		LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
1668 #endif /* CONFIG_DTRACE */
1669 		return;
1670 	}
1671 slow_case:
1672 	lck_mtx_unlock_contended(lock, thread, ilk_held);
1673 }
1674 
1675 static void NOINLINE
lck_mtx_unlock_contended(lck_mtx_t * lock,thread_t thread,boolean_t ilk_held)1676 lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
1677 {
1678 	uintptr_t       state;
1679 	boolean_t               cleanup = FALSE;
1680 
1681 	if (ilk_held) {
1682 		state = ordered_load_mtx(lock);
1683 	} else {
1684 		interlock_lock(lock);
1685 		state = ordered_load_mtx(lock);
1686 		if (thread != LCK_MTX_STATE_TO_THREAD(state)) {
1687 			panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
1688 		}
1689 		if (state & ARM_LCK_WAITERS) {
1690 			if (lck_mtx_unlock_wakeup(lock, thread)) {
1691 				state = ARM_LCK_WAITERS;
1692 			} else {
1693 				state = 0;
1694 			}
1695 			cleanup = TRUE;
1696 			goto unlock;
1697 		}
1698 	}
1699 	state &= ARM_LCK_WAITERS;   /* Clear state, retain waiters bit */
1700 unlock:
1701 	state |= LCK_ILOCK;
1702 	ordered_store_mtx(lock, state);
1703 	interlock_unlock(lock);
1704 	if (cleanup) {
1705 		/*
1706 		 * Do not do any turnstile operations outside of this block.
1707 		 * lock/unlock is called at early stage of boot with single thread,
1708 		 * when turnstile is not yet initialized.
1709 		 * Even without contention we can come throught the slow path
1710 		 * if the mutex is acquired as a spin lock.
1711 		 */
1712 		turnstile_cleanup();
1713 	}
1714 
1715 #if     CONFIG_DTRACE
1716 	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
1717 #endif /* CONFIG_DTRACE */
1718 }
1719 
1720 /*
1721  *	Routine:	lck_mtx_assert
1722  */
1723 void
lck_mtx_assert(lck_mtx_t * lock,unsigned int type)1724 lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
1725 {
1726 	thread_t        thread, holder;
1727 	uintptr_t       state;
1728 
1729 	state = ordered_load_mtx(lock);
1730 	holder = LCK_MTX_STATE_TO_THREAD(state);
1731 	if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
1732 		// Lock is held in spin mode, owner is unknown.
1733 		return; // Punt
1734 	}
1735 	thread = current_thread();
1736 	if (type == LCK_MTX_ASSERT_OWNED) {
1737 		if (thread != holder) {
1738 			panic("lck_mtx_assert(): mutex (%p) owned", lock);
1739 		}
1740 	} else if (type == LCK_MTX_ASSERT_NOTOWNED) {
1741 		if (thread == holder) {
1742 			panic("lck_mtx_assert(): mutex (%p) not owned", lock);
1743 		}
1744 	} else {
1745 		panic("lck_mtx_assert(): invalid arg (%u)", type);
1746 	}
1747 }
1748 
1749 /*
1750  *	Routine:	lck_mtx_ilk_unlock
1751  */
1752 boolean_t
lck_mtx_ilk_unlock(lck_mtx_t * lock)1753 lck_mtx_ilk_unlock(lck_mtx_t *lock)
1754 {
1755 	interlock_unlock(lock);
1756 	return TRUE;
1757 }
1758 
1759 /*
1760  *	Routine:	lck_mtx_convert_spin
1761  *
1762  *	Convert a mutex held for spin into a held full mutex
1763  */
1764 void
lck_mtx_convert_spin(lck_mtx_t * lock)1765 lck_mtx_convert_spin(lck_mtx_t *lock)
1766 {
1767 	thread_t        thread = current_thread();
1768 	uintptr_t       state;
1769 	int                     waiters;
1770 
1771 	state = ordered_load_mtx(lock);
1772 	if (LCK_MTX_STATE_TO_THREAD(state) == thread) {
1773 		return;         // Already owned as mutex, return
1774 	}
1775 	if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) {
1776 		panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
1777 	}
1778 	state &= ~(LCK_MTX_THREAD_MASK);                // Clear the spin tag
1779 	ordered_store_mtx(lock, state);
1780 	waiters = lck_mtx_lock_acquire(lock, NULL);   // Acquire to manage priority boosts
1781 	state = LCK_MTX_THREAD_TO_STATE(thread);
1782 	if (waiters != 0) {
1783 		state |= ARM_LCK_WAITERS;
1784 	}
1785 	state |= LCK_ILOCK;
1786 	ordered_store_mtx(lock, state);                 // Set ownership
1787 	interlock_unlock(lock);                                 // Release interlock, enable preemption
1788 	turnstile_cleanup();
1789 }
1790 
1791 
1792 /*
1793  *      Routine:        lck_mtx_destroy
1794  */
1795 void
lck_mtx_destroy(lck_mtx_t * lck,lck_grp_t * grp)1796 lck_mtx_destroy(
1797 	lck_mtx_t * lck,
1798 	lck_grp_t * grp)
1799 {
1800 	if (lck->lck_mtx_type != LCK_MTX_TYPE) {
1801 		panic("Destroying invalid mutex %p", lck);
1802 	}
1803 	if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
1804 		panic("Destroying previously destroyed lock %p", lck);
1805 	}
1806 	lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
1807 	lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
1808 	lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
1809 	lck_grp_deallocate(grp);
1810 	return;
1811 }
1812 
1813 /*
1814  *	Routine:	lck_spin_assert
1815  */
1816 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)1817 lck_spin_assert(lck_spin_t *lock, unsigned int type)
1818 {
1819 	thread_t        thread, holder;
1820 	uintptr_t       state;
1821 
1822 	if (lock->type != LCK_SPIN_TYPE) {
1823 		panic("Invalid spinlock %p", lock);
1824 	}
1825 
1826 	state = lock->lck_spin_data;
1827 	holder = (thread_t)(state & ~LCK_ILOCK);
1828 	thread = current_thread();
1829 	if (type == LCK_ASSERT_OWNED) {
1830 		if (holder == 0) {
1831 			panic("Lock not owned %p = %lx", lock, state);
1832 		}
1833 		if (holder != thread) {
1834 			panic("Lock not owned by current thread %p = %lx", lock, state);
1835 		}
1836 		if ((state & LCK_ILOCK) == 0) {
1837 			panic("Lock bit not set %p = %lx", lock, state);
1838 		}
1839 	} else if (type == LCK_ASSERT_NOTOWNED) {
1840 		if (holder != 0) {
1841 			if (holder == thread) {
1842 				panic("Lock owned by current thread %p = %lx", lock, state);
1843 			}
1844 		}
1845 	} else {
1846 		panic("lck_spin_assert(): invalid arg (%u)", type);
1847 	}
1848 }
1849 
1850 /*
1851  * Routine: kdp_lck_mtx_lock_spin_is_acquired
1852  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1853  */
1854 boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t * lck)1855 kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
1856 {
1857 	uintptr_t       state;
1858 
1859 	if (not_in_kdp) {
1860 		panic("panic: spinlock acquired check done outside of kernel debugger");
1861 	}
1862 	state = ordered_load_mtx(lck);
1863 	if (state == LCK_MTX_TAG_DESTROYED) {
1864 		return FALSE;
1865 	}
1866 	if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK)) {
1867 		return TRUE;
1868 	}
1869 	return FALSE;
1870 }
1871 
1872 void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)1873 kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1874 {
1875 	lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
1876 	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
1877 	uintptr_t state   = ordered_load_mtx(mutex);
1878 	thread_t holder   = LCK_MTX_STATE_TO_THREAD(state);
1879 	if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
1880 		waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
1881 	} else {
1882 		assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
1883 #if LOCKS_INDIRECT_ALLOW
1884 		assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
1885 #endif /* LOCKS_INDIRECT_ALLOW */
1886 		waitinfo->owner = thread_tid(holder);
1887 	}
1888 }
1889