xref: /xnu-8796.121.2/osfmk/arm/locks_arm.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33  * Mellon University All Rights Reserved.
34  *
35  * Permission to use, copy, modify and distribute this software and its
36  * documentation is hereby granted, provided that both the copyright notice
37  * and this permission notice appear in all copies of the software,
38  * derivative works or modified versions, and any portions thereof, and that
39  * both notices appear in supporting documentation.
40  *
41  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42  * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43  * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44  *
45  * Carnegie Mellon requests users of this software to return to
46  *
47  * Software Distribution Coordinator  or  [email protected]
48  * School of Computer Science Carnegie Mellon University Pittsburgh PA
49  * 15213-3890
50  *
51  * any improvements or extensions that they make and grant Carnegie Mellon the
52  * rights to redistribute these changes.
53  */
54 /*
55  *	File:	kern/lock.c
56  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
57  *	Date:	1985
58  *
59  *	Locking primitives implementation
60  */
61 
62 #define LOCK_PRIVATE 1
63 
64 #include <mach_ldebug.h>
65 
66 #include <mach/machine/sdt.h>
67 
68 #include <kern/locks_internal.h>
69 #include <kern/zalloc.h>
70 #include <kern/lock_stat.h>
71 #include <kern/locks.h>
72 #include <kern/misc_protos.h>
73 #include <kern/thread.h>
74 #include <kern/processor.h>
75 #include <kern/sched_hygiene.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <kern/kcdata.h>
79 #include <kern/percpu.h>
80 #include <kern/hvg_hypercall.h>
81 #include <string.h>
82 #include <arm/cpu_internal.h>
83 #include <os/hash.h>
84 #include <arm/cpu_data.h>
85 
86 #include <arm/cpu_data_internal.h>
87 #include <arm64/proc_reg.h>
88 #include <arm/smp.h>
89 #include <machine/atomic.h>
90 #include <machine/machine_cpu.h>
91 
92 #include <pexpert/pexpert.h>
93 
94 #include <sys/kdebug.h>
95 
96 #define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
97 
98 // Panic in tests that check lock usage correctness
99 // These are undesirable when in a panic or a debugger is runnning.
100 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
101 
102 /* Forwards */
103 
104 extern unsigned int not_in_kdp;
105 
106 MACHINE_TIMEOUT(lock_panic_timeout, "lock-panic",
107     0xc00000 /* 12.5 m ticks ~= 524ms with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
108 
109 #define NOINLINE                __attribute__((noinline))
110 
111 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
112 
113 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
114 
115 #pragma GCC visibility push(hidden)
116 /*
117  * atomic exchange API is a low level abstraction of the operations
118  * to atomically read, modify, and write a pointer.  This abstraction works
119  * for both Intel and ARMv8.1 compare and exchange atomic instructions as
120  * well as the ARM exclusive instructions.
121  *
122  * atomic_exchange_begin() - begin exchange and retrieve current value
123  * atomic_exchange_complete() - conclude an exchange
124  * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
125  */
126 uint32_t
load_exclusive32(uint32_t * target,enum memory_order ord)127 load_exclusive32(uint32_t *target, enum memory_order ord)
128 {
129 	uint32_t        value;
130 
131 	if (_os_atomic_mo_has_acquire(ord)) {
132 		value = __builtin_arm_ldaex(target);    // ldaxr
133 	} else {
134 		value = __builtin_arm_ldrex(target);    // ldxr
135 	}
136 
137 	return value;
138 }
139 
140 boolean_t
store_exclusive32(uint32_t * target,uint32_t value,enum memory_order ord)141 store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
142 {
143 	boolean_t err;
144 
145 	if (_os_atomic_mo_has_release(ord)) {
146 		err = __builtin_arm_stlex(value, target);       // stlxr
147 	} else {
148 		err = __builtin_arm_strex(value, target);       // stxr
149 	}
150 
151 	return !err;
152 }
153 
154 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)155 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
156 {
157 	uint32_t        val;
158 
159 #if !OS_ATOMIC_USE_LLSC
160 	ord = memory_order_relaxed;
161 #endif
162 	val = load_exclusive32(target, ord);
163 	*previous = val;
164 	return val;
165 }
166 
167 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)168 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
169 {
170 #if !OS_ATOMIC_USE_LLSC
171 	return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
172 #else
173 	(void)previous;         // Previous not needed, monitor is held
174 	return store_exclusive32(target, newval, ord);
175 #endif
176 }
177 
178 void
atomic_exchange_abort(void)179 atomic_exchange_abort(void)
180 {
181 	os_atomic_clear_exclusive();
182 }
183 
184 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)185 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
186 {
187 	uint32_t                value, prev;
188 
189 	for (;;) {
190 		value = atomic_exchange_begin32(target, &prev, ord);
191 		if (value & test_mask) {
192 			if (wait) {
193 				wait_for_event();       // Wait with monitor held
194 			} else {
195 				atomic_exchange_abort();        // Clear exclusive monitor
196 			}
197 			return FALSE;
198 		}
199 		value |= set_mask;
200 		if (atomic_exchange_complete32(target, prev, value, ord)) {
201 			return TRUE;
202 		}
203 	}
204 }
205 
206 #pragma GCC visibility pop
207 #pragma mark preemption
208 
209 /*
210  * This function checks whether an AST_URGENT has been pended.
211  *
212  * It is called once the preemption has been reenabled, which means the thread
213  * may have been preempted right before this was called, and when this function
214  * actually performs the check, we've changed CPU.
215  *
216  * This race is however benign: the point of AST_URGENT is to trigger a context
217  * switch, so if one happened, there's nothing left to check for, and AST_URGENT
218  * was cleared in the process.
219  *
220  * It follows that this check cannot have false negatives, which allows us
221  * to avoid fiddling with interrupt state for the vast majority of cases
222  * when the check will actually be negative.
223  */
224 static NOINLINE void
kernel_preempt_check(void)225 kernel_preempt_check(void)
226 {
227 	uint64_t state;
228 
229 	/* If interrupts are masked, we can't take an AST here */
230 	state = __builtin_arm_rsr64("DAIF");
231 	if (state & DAIF_IRQF) {
232 		return;
233 	}
234 
235 	/* disable interrupts (IRQ FIQ ASYNCF) */
236 	__builtin_arm_wsr64("DAIFSet", DAIFSC_STANDARD_DISABLE);
237 
238 	/*
239 	 * Reload cpu_pending_ast: a context switch would cause it to change.
240 	 * Now that interrupts are disabled, this will debounce false positives.
241 	 */
242 	if (current_thread()->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
243 		ast_taken_kernel();
244 	}
245 
246 	/* restore the original interrupt mask */
247 	__builtin_arm_wsr64("DAIF", state);
248 }
249 
250 static inline void
_enable_preemption_write_count(thread_t thread,unsigned int count)251 _enable_preemption_write_count(thread_t thread, unsigned int count)
252 {
253 	os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
254 
255 	/*
256 	 * This check is racy and could load from another CPU's pending_ast mask,
257 	 * but as described above, this can't have false negatives.
258 	 */
259 	if (count == 0) {
260 		if (__improbable(thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT)) {
261 			return kernel_preempt_check();
262 		}
263 	}
264 }
265 
266 #if SCHED_HYGIENE_DEBUG
267 
268 uint64_t _Atomic PERCPU_DATA_HACK_78750602(preemption_disable_max_mt);
269 
270 MACHINE_TIMEOUT_DEV_WRITEABLE(sched_preemption_disable_threshold_mt, "sched-preemption", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, kprintf_spam_mt_pred);
271 
272 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, sched_preemption_disable_debug_mode,
273     "machine-timeouts",
274     "sched-preemption-disable-mode", /* DT property names have to be 31 chars max */
275     "sched_preemption_disable_debug_mode",
276     SCHED_HYGIENE_MODE_OFF,
277     TUNABLE_DT_CHECK_CHOSEN);
278 
279 static uint32_t const sched_preemption_disable_debug_dbgid = MACHDBG_CODE(DBG_MACH_SCHED, MACH_PREEMPTION_EXPIRED) | DBG_FUNC_NONE;
280 
281 NOINLINE void
_prepare_preemption_disable_measurement(void)282 _prepare_preemption_disable_measurement(void)
283 {
284 	thread_t thread = current_thread();
285 
286 	if (thread->machine.inthandler_timestamp == 0) {
287 		/*
288 		 * Only prepare a measurement if not currently in an interrupt
289 		 * handler.
290 		 *
291 		 * We are only interested in the net duration of disabled
292 		 * preemption, that is: The time in which preemption was
293 		 * disabled, minus the intervals in which any (likely
294 		 * unrelated) interrupts were handled.
295 		 * ml_adjust_preemption_disable_time() will remove those
296 		 * intervals, however we also do not even start measuring
297 		 * preemption disablement if we are already within handling of
298 		 * an interrupt when preemption was disabled (the resulting
299 		 * net time would be 0).
300 		 *
301 		 * Interrupt handling duration is handled separately, and any
302 		 * long intervals of preemption disablement are counted
303 		 * towards that.
304 		 */
305 
306 		bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
307 		thread->machine.preemption_disable_abandon = false;
308 		thread->machine.preemption_disable_mt = ml_get_sched_hygiene_timebase();
309 		thread->machine.preemption_disable_adjust = 0;
310 		thread->machine.preemption_count |= SCHED_HYGIENE_MARKER;
311 #if MONOTONIC
312 		if (sched_hygiene_debug_pmc) {
313 			mt_cur_cpu_cycles_instrs_speculative(&thread->machine.preemption_disable_cycles, &thread->machine.preemption_disable_instr);
314 		}
315 #endif
316 		ml_set_interrupts_enabled_with_debug(istate, false);
317 	}
318 }
319 
320 NOINLINE void
_collect_preemption_disable_measurement(void)321 _collect_preemption_disable_measurement(void)
322 {
323 	bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
324 	/*
325 	 * Collect start time and current time with interrupts disabled.
326 	 * Otherwise an interrupt coming in after grabbing the timestamp
327 	 * could spuriously inflate the measurement, because it will
328 	 * adjust preemption_disable_mt only after we already grabbed
329 	 * it.
330 	 *
331 	 * (Even worse if we collected the current time first: Then a
332 	 * subsequent interrupt could adjust preemption_disable_mt to
333 	 * make the duration go negative after subtracting the already
334 	 * grabbed time. With interrupts disabled we don't care much about
335 	 * the order.)
336 	 */
337 
338 	thread_t thread = current_thread();
339 	uint64_t const mt = thread->machine.preemption_disable_mt;
340 	uint64_t const adjust = thread->machine.preemption_disable_adjust;
341 	uint64_t const now = ml_get_sched_hygiene_timebase();
342 	thread->machine.preemption_disable_mt = 0;
343 	thread->machine.preemption_disable_adjust = 0;
344 	/* no need to clear SCHED_HYGIENE_MARKER, will be done on exit */
345 
346 	/*
347 	 * Don't need to reset (or even save) preemption_disable_abandon
348 	 * here: abandon_preemption_disable_measurement is a no-op anyway
349 	 * if preemption_disable_mt == 0 (which we just set), and it
350 	 * will stay that way until the next call to
351 	 * _collect_preemption_disable_measurement.
352 	 */
353 
354 	os_compiler_barrier(acq_rel);
355 
356 	ml_set_interrupts_enabled_with_debug(istate, false);
357 
358 	/*
359 	 * Fine to get with interrupts enabled:
360 	 * Above we set preemption_disable_mt to 0, which turns
361 	 * abandon_preemption_disable_measurement() into a no-op
362 	 * until the next collection starts.
363 	 */
364 	if (thread->machine.preemption_disable_abandon) {
365 		goto out;
366 	}
367 
368 	int64_t const gross_duration = now - mt;
369 	int64_t const net_duration = gross_duration - adjust;
370 
371 	uint64_t _Atomic * const max_duration = PERCPU_GET(preemption_disable_max_mt);
372 
373 	if (__improbable(net_duration > *max_duration)) {
374 		os_atomic_store(max_duration, net_duration, relaxed);
375 	}
376 
377 	uint64_t const threshold = os_atomic_load(&sched_preemption_disable_threshold_mt, relaxed);
378 	if (__improbable(threshold > 0 && net_duration >= threshold)) {
379 		uint64_t average_freq = 0;
380 		uint64_t average_cpi_whole = 0;
381 		uint64_t average_cpi_fractional = 0;
382 
383 #if MONOTONIC
384 		if (sched_hygiene_debug_pmc) {
385 			uint64_t current_cycles = 0, current_instrs = 0;
386 
387 			/*
388 			 * We're getting these values a bit late, but getting them
389 			 * is a bit expensive, so we take the slight hit in
390 			 * accuracy for the reported values (which aren't very
391 			 * stable anyway).
392 			 */
393 			istate = ml_set_interrupts_enabled_with_debug(false, false);
394 			mt_cur_cpu_cycles_instrs_speculative(&current_cycles, &current_instrs);
395 			ml_set_interrupts_enabled_with_debug(istate, false);
396 
397 			uint64_t duration_ns;
398 			absolutetime_to_nanoseconds(gross_duration, &duration_ns);
399 
400 			average_freq = (current_cycles - thread->machine.preemption_disable_cycles) / (duration_ns / 1000);
401 			average_cpi_whole = (current_cycles - thread->machine.preemption_disable_cycles) / (current_instrs - thread->machine.preemption_disable_instr);
402 			average_cpi_fractional =
403 			    (((current_cycles - thread->machine.preemption_disable_cycles) * 100) / (current_instrs - thread->machine.preemption_disable_instr)) % 100;
404 		}
405 #endif
406 
407 		if (sched_preemption_disable_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
408 			panic("preemption disable timeout exceeded: %llu >= %llu mt ticks (start: %llu, now: %llu, gross: %llu, inttime: %llu), "
409 			    "freq = %llu MHz, CPI = %llu.%llu",
410 			    net_duration, threshold, mt, now, gross_duration, adjust,
411 			    average_freq, average_cpi_whole, average_cpi_fractional);
412 		}
413 
414 		DTRACE_SCHED4(mach_preemption_expired, uint64_t, net_duration, uint64_t, gross_duration,
415 		    uint64_t, average_cpi_whole, uint64_t, average_cpi_fractional);
416 		if (__improbable(kdebug_debugid_enabled(sched_preemption_disable_debug_dbgid))) {
417 			KDBG(sched_preemption_disable_debug_dbgid, net_duration, gross_duration, average_cpi_whole, average_cpi_fractional);
418 		}
419 	}
420 
421 out:
422 	/*
423 	 * the preemption count is SCHED_HYGIENE_MARKER, we need to clear it.
424 	 */
425 	_enable_preemption_write_count(thread, 0);
426 }
427 
428 /*
429  * Abandon a potential preemption disable measurement. Useful for
430  * example for the idle thread, which would just spuriously
431  * trigger the threshold while actually idling, which we don't
432  * care about.
433  */
434 void
abandon_preemption_disable_measurement(void)435 abandon_preemption_disable_measurement(void)
436 {
437 	thread_t t = current_thread();
438 	bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
439 
440 	if (t->machine.preemption_disable_mt != 0) {
441 		t->machine.preemption_disable_abandon = true;
442 	}
443 	ml_set_interrupts_enabled_with_debug(istate, false);
444 }
445 
446 /*
447  * Skip predicate for sched_preemption_disable, which would trigger
448  * spuriously when kprintf spam is enabled.
449  */
450 bool
kprintf_spam_mt_pred(struct machine_timeout_spec const __unused * spec)451 kprintf_spam_mt_pred(struct machine_timeout_spec const __unused *spec)
452 {
453 	bool const kprintf_spam_enabled = !(disable_kprintf_output || disable_serial_output);
454 	return kprintf_spam_enabled;
455 }
456 
457 /*
458  * Abandon function exported for AppleCLPC, as a workaround to rdar://91668370.
459  *
460  * Only for AppleCLPC!
461  */
462 void
sched_perfcontrol_abandon_preemption_disable_measurement(void)463 sched_perfcontrol_abandon_preemption_disable_measurement(void)
464 {
465 	abandon_preemption_disable_measurement();
466 }
467 
468 #else /* SCHED_HYGIENE_DEBUG */
469 void
sched_perfcontrol_abandon_preemption_disable_measurement(void)470 sched_perfcontrol_abandon_preemption_disable_measurement(void)
471 {
472 	// No-op. Function is exported, so needs to be defined
473 }
474 #endif /* SCHED_HYGIENE_DEBUG */
475 
476 /*
477  * This function is written in a way that the codegen is extremely short.
478  *
479  * LTO isn't smart enough to inline it, yet it is profitable because
480  * the vast majority of callers use current_thread() already.
481  *
482  * TODO: It is unfortunate that we have to load
483  *       sched_preemption_disable_debug_mode
484  *
485  * /!\ Breaking inlining causes zalloc to be roughly 10% slower /!\
486  */
487 __attribute__((always_inline))
488 void
_disable_preemption(void)489 _disable_preemption(void)
490 {
491 	thread_t thread = current_thread();
492 	unsigned int count = thread->machine.preemption_count;
493 
494 	os_atomic_store(&thread->machine.preemption_count,
495 	    count + 1, compiler_acq_rel);
496 
497 #if SCHED_HYGIENE_DEBUG
498 	/*
499 	 * Note that this is not the only place preemption gets disabled,
500 	 * it also gets modified on ISR and PPL entry/exit. Both of those
501 	 * events will be treated specially however, and
502 	 * increment/decrement being paired around their entry/exit means
503 	 * that collection here is not desynced otherwise.
504 	 */
505 
506 	if (__improbable(count == 0 && sched_preemption_disable_debug_mode)) {
507 		__attribute__((musttail))
508 		return _prepare_preemption_disable_measurement();
509 	}
510 #endif /* SCHED_HYGIENE_DEBUG */
511 }
512 
513 
514 /*
515  * This variant of disable_preemption() allows disabling preemption
516  * without taking measurements (and later potentially triggering
517  * actions on those).
518  */
519 __attribute__((always_inline))
520 void
_disable_preemption_without_measurements(void)521 _disable_preemption_without_measurements(void)
522 {
523 	thread_t thread = current_thread();
524 	unsigned int count = thread->machine.preemption_count;
525 
526 #if SCHED_HYGIENE_DEBUG
527 	/*
528 	 * Inform _collect_preemption_disable_measurement()
529 	 * that we didn't really care.
530 	 */
531 	thread->machine.preemption_disable_abandon = true;
532 #endif
533 
534 	os_atomic_store(&thread->machine.preemption_count,
535 	    count + 1, compiler_acq_rel);
536 }
537 
538 /*
539  * To help _enable_preemption() inline everywhere with LTO,
540  * we keep these nice non inlineable functions as the panic()
541  * codegen setup is quite large and for weird reasons causes a frame.
542  */
543 __abortlike
544 static void
_enable_preemption_underflow(void)545 _enable_preemption_underflow(void)
546 {
547 	panic("Preemption count underflow");
548 }
549 
550 /*
551  * This function is written in a way that the codegen is extremely short.
552  *
553  * LTO isn't smart enough to inline it, yet it is profitable because
554  * the vast majority of callers use current_thread() already.
555  *
556  * The SCHED_HYGIENE_MARKER trick is used so that we do not have to load
557  * unrelated fields of current_thread().
558  *
559  * /!\ Breaking inlining causes zalloc to be roughly 10% slower /!\
560  */
561 __attribute__((always_inline))
562 void
_enable_preemption(void)563 _enable_preemption(void)
564 {
565 	thread_t thread = current_thread();
566 	unsigned int count  = thread->machine.preemption_count;
567 
568 	if (__improbable(count == 0)) {
569 		_enable_preemption_underflow();
570 	}
571 
572 #if SCHED_HYGIENE_DEBUG
573 	if (__improbable(count == SCHED_HYGIENE_MARKER + 1)) {
574 		return _collect_preemption_disable_measurement();
575 	}
576 #endif /* SCHED_HYGIENE_DEBUG */
577 
578 	_enable_preemption_write_count(thread, count - 1);
579 }
580 
581 __attribute__((always_inline))
582 unsigned int
get_preemption_level_for_thread(thread_t thread)583 get_preemption_level_for_thread(thread_t thread)
584 {
585 	unsigned int count = thread->machine.preemption_count;
586 
587 #if SCHED_HYGIENE_DEBUG
588 	/*
589 	 * hide this "flag" from callers,
590 	 * and it would make the count look negative anyway
591 	 * which some people dislike
592 	 */
593 	count &= ~SCHED_HYGIENE_MARKER;
594 #endif
595 	return (int)count;
596 }
597 
598 __attribute__((always_inline))
599 int
get_preemption_level(void)600 get_preemption_level(void)
601 {
602 	return get_preemption_level_for_thread(current_thread());
603 }
604 
605 #if CONFIG_PV_TICKET
606 __startup_func
607 void
lck_init_pv(void)608 lck_init_pv(void)
609 {
610 	uint32_t pvtck = 1;
611 	PE_parse_boot_argn("pvticket", &pvtck, sizeof(pvtck));
612 	if (pvtck == 0) {
613 		return;
614 	}
615 	has_lock_pv = hvg_is_hcall_available(HVG_HCALL_VCPU_WFK) &&
616 	    hvg_is_hcall_available(HVG_HCALL_VCPU_KICK);
617 }
618 STARTUP(LOCKS, STARTUP_RANK_FIRST, lck_init_pv);
619 #endif
620 
621 
622 #pragma mark lck_spin_t
623 #if LCK_SPIN_IS_TICKET_LOCK
624 
625 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)626 lck_spin_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
627 {
628 	lck_spin_t *lck;
629 
630 	lck = zalloc(KT_LCK_SPIN);
631 	lck_spin_init(lck, grp, attr);
632 	return lck;
633 }
634 
635 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)636 lck_spin_free(lck_spin_t *lck, lck_grp_t *grp)
637 {
638 	lck_spin_destroy(lck, grp);
639 	zfree(KT_LCK_SPIN, lck);
640 }
641 
642 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)643 lck_spin_init(lck_spin_t *lck, lck_grp_t *grp, __unused lck_attr_t *attr)
644 {
645 	lck_ticket_init(lck, grp);
646 }
647 
648 /*
649  * arm_usimple_lock is a lck_spin_t without a group or attributes
650  */
651 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)652 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
653 {
654 	lck_ticket_init((lck_ticket_t *)lck, LCK_GRP_NULL);
655 }
656 
657 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)658 lck_spin_assert(lck_spin_t *lock, unsigned int type)
659 {
660 	if (type == LCK_ASSERT_OWNED) {
661 		lck_ticket_assert_owned(lock);
662 	} else if (type == LCK_ASSERT_NOTOWNED) {
663 		lck_ticket_assert_not_owned(lock);
664 	} else {
665 		panic("lck_spin_assert(): invalid arg (%u)", type);
666 	}
667 }
668 
669 void
lck_spin_lock(lck_spin_t * lock)670 lck_spin_lock(lck_spin_t *lock)
671 {
672 	lck_ticket_lock(lock, LCK_GRP_NULL);
673 }
674 
675 void
lck_spin_lock_nopreempt(lck_spin_t * lock)676 lck_spin_lock_nopreempt(lck_spin_t *lock)
677 {
678 	lck_ticket_lock_nopreempt(lock, LCK_GRP_NULL);
679 }
680 
681 int
lck_spin_try_lock(lck_spin_t * lock)682 lck_spin_try_lock(lck_spin_t *lock)
683 {
684 	return lck_ticket_lock_try(lock, LCK_GRP_NULL);
685 }
686 
687 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)688 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
689 {
690 	return lck_ticket_lock_try_nopreempt(lock, LCK_GRP_NULL);
691 }
692 
693 void
lck_spin_unlock(lck_spin_t * lock)694 lck_spin_unlock(lck_spin_t *lock)
695 {
696 	lck_ticket_unlock(lock);
697 }
698 
699 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)700 lck_spin_destroy(lck_spin_t *lck, lck_grp_t *grp)
701 {
702 	lck_ticket_destroy(lck, grp);
703 }
704 
705 /*
706  * those really should be in an alias file instead,
707  * but you can't make that conditional.
708  *
709  * it will be good enough for perf evals for now
710  *
711  * we also can't make aliases for symbols that
712  * are in alias files like lck_spin_init and friends,
713  * so this suffers double jump penalties for kexts
714  * (LTO does the right thing for XNU).
715  */
716 #define make_alias(a, b) asm(".globl _" #a "\n" ".set   _" #a ", _" #b "\n")
717 make_alias(lck_spin_lock_grp, lck_ticket_lock);
718 make_alias(lck_spin_lock_nopreempt_grp, lck_ticket_lock_nopreempt);
719 make_alias(lck_spin_try_lock_grp, lck_ticket_lock_try);
720 make_alias(lck_spin_try_lock_nopreempt_grp, lck_ticket_lock_try_nopreempt);
721 make_alias(lck_spin_unlock_nopreempt, lck_ticket_unlock_nopreempt);
722 make_alias(kdp_lck_spin_is_acquired, kdp_lck_ticket_is_acquired);
723 #undef make_alias
724 
725 #else /* !LCK_SPIN_IS_TICKET_LOCK */
726 
727 #if DEVELOPMENT || DEBUG
728 __abortlike
729 static void
__lck_spin_invalid_panic(lck_spin_t * lck)730 __lck_spin_invalid_panic(lck_spin_t *lck)
731 {
732 	const char *how = "Invalid";
733 
734 	if (lck->type == LCK_SPIN_TYPE_DESTROYED ||
735 	    lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
736 		how = "Destroyed";
737 	}
738 
739 	panic("%s spinlock %p: <0x%016lx 0x%16lx>",
740 	    how, lck, lck->lck_spin_data, lck->type);
741 }
742 
743 static inline void
lck_spin_verify(lck_spin_t * lck)744 lck_spin_verify(lck_spin_t *lck)
745 {
746 	if (lck->type != LCK_SPIN_TYPE ||
747 	    lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
748 		__lck_spin_invalid_panic(lck);
749 	}
750 }
751 #else /* DEVELOPMENT || DEBUG */
752 #define lck_spin_verify(lck)            ((void)0)
753 #endif /* DEVELOPMENT || DEBUG */
754 
755 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)756 lck_spin_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
757 {
758 	lck_spin_t *lck;
759 
760 	lck = zalloc(KT_LCK_SPIN);
761 	lck_spin_init(lck, grp, attr);
762 	return lck;
763 }
764 
765 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)766 lck_spin_free(lck_spin_t *lck, lck_grp_t *grp)
767 {
768 	lck_spin_destroy(lck, grp);
769 	zfree(KT_LCK_SPIN, lck);
770 }
771 
772 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)773 lck_spin_init(lck_spin_t *lck, lck_grp_t *grp, __unused lck_attr_t *attr)
774 {
775 	lck->type = LCK_SPIN_TYPE;
776 	hw_lock_init(&lck->hwlock);
777 	if (grp) {
778 		lck_grp_reference(grp, &grp->lck_grp_spincnt);
779 	}
780 }
781 
782 /*
783  * arm_usimple_lock is a lck_spin_t without a group or attributes
784  */
785 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)786 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
787 {
788 	lck->type = LCK_SPIN_TYPE;
789 	hw_lock_init(&lck->hwlock);
790 }
791 
792 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)793 lck_spin_assert(lck_spin_t *lock, unsigned int type)
794 {
795 	thread_t thread, holder;
796 
797 	if (lock->type != LCK_SPIN_TYPE) {
798 		panic("Invalid spinlock %p", lock);
799 	}
800 
801 	holder = HW_LOCK_STATE_TO_THREAD(lock->lck_spin_data);
802 	thread = current_thread();
803 	if (type == LCK_ASSERT_OWNED) {
804 		if (holder == 0) {
805 			panic("Lock not owned %p = %p", lock, holder);
806 		}
807 		if (holder != thread) {
808 			panic("Lock not owned by current thread %p = %p", lock, holder);
809 		}
810 	} else if (type == LCK_ASSERT_NOTOWNED) {
811 		if (holder != THREAD_NULL && holder == thread) {
812 			panic("Lock owned by current thread %p = %p", lock, holder);
813 		}
814 	} else {
815 		panic("lck_spin_assert(): invalid arg (%u)", type);
816 	}
817 }
818 
819 void
lck_spin_lock(lck_spin_t * lock)820 lck_spin_lock(lck_spin_t *lock)
821 {
822 	lck_spin_verify(lock);
823 	hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
824 }
825 
826 void
lck_spin_lock_grp(lck_spin_t * lock,lck_grp_t * grp)827 lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
828 {
829 #pragma unused(grp)
830 	lck_spin_verify(lock);
831 	hw_lock_lock(&lock->hwlock, grp);
832 }
833 
834 void
lck_spin_lock_nopreempt(lck_spin_t * lock)835 lck_spin_lock_nopreempt(lck_spin_t *lock)
836 {
837 	lck_spin_verify(lock);
838 	hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
839 }
840 
841 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)842 lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
843 {
844 #pragma unused(grp)
845 	lck_spin_verify(lock);
846 	hw_lock_lock_nopreempt(&lock->hwlock, grp);
847 }
848 
849 int
lck_spin_try_lock(lck_spin_t * lock)850 lck_spin_try_lock(lck_spin_t *lock)
851 {
852 	lck_spin_verify(lock);
853 	return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
854 }
855 
856 int
lck_spin_try_lock_grp(lck_spin_t * lock,lck_grp_t * grp)857 lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
858 {
859 #pragma unused(grp)
860 	lck_spin_verify(lock);
861 	return hw_lock_try(&lock->hwlock, grp);
862 }
863 
864 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)865 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
866 {
867 	lck_spin_verify(lock);
868 	return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
869 }
870 
871 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)872 lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
873 {
874 #pragma unused(grp)
875 	lck_spin_verify(lock);
876 	return hw_lock_try_nopreempt(&lock->hwlock, grp);
877 }
878 
879 void
lck_spin_unlock(lck_spin_t * lock)880 lck_spin_unlock(lck_spin_t *lock)
881 {
882 	lck_spin_verify(lock);
883 	hw_lock_unlock(&lock->hwlock);
884 }
885 
886 void
lck_spin_unlock_nopreempt(lck_spin_t * lock)887 lck_spin_unlock_nopreempt(lck_spin_t *lock)
888 {
889 	lck_spin_verify(lock);
890 	hw_lock_unlock_nopreempt(&lock->hwlock);
891 }
892 
893 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)894 lck_spin_destroy(lck_spin_t *lck, lck_grp_t *grp)
895 {
896 	lck_spin_verify(lck);
897 	*lck = (lck_spin_t){
898 		.lck_spin_data = LCK_SPIN_TAG_DESTROYED,
899 		.type = LCK_SPIN_TYPE_DESTROYED,
900 	};
901 	if (grp) {
902 		lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
903 	}
904 }
905 
906 /*
907  * Routine: kdp_lck_spin_is_acquired
908  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
909  */
910 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)911 kdp_lck_spin_is_acquired(lck_spin_t *lck)
912 {
913 	if (not_in_kdp) {
914 		panic("panic: spinlock acquired check done outside of kernel debugger");
915 	}
916 	return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
917 }
918 
919 #endif /* !LCK_SPIN_IS_TICKET_LOCK */
920 
921 /*
922  *	Initialize a usimple_lock.
923  *
924  *	No change in preemption state.
925  */
926 void
usimple_lock_init(usimple_lock_t l,unsigned short tag)927 usimple_lock_init(
928 	usimple_lock_t l,
929 	unsigned short tag)
930 {
931 	simple_lock_init((simple_lock_t) l, tag);
932 }
933 
934 
935 /*
936  *	Acquire a usimple_lock.
937  *
938  *	Returns with preemption disabled.  Note
939  *	that the hw_lock routines are responsible for
940  *	maintaining preemption state.
941  */
942 void
943 (usimple_lock)(
944 	usimple_lock_t l
945 	LCK_GRP_ARG(lck_grp_t *grp))
946 {
947 	simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
948 }
949 
950 
951 /*
952  *	Release a usimple_lock.
953  *
954  *	Returns with preemption enabled.  Note
955  *	that the hw_lock routines are responsible for
956  *	maintaining preemption state.
957  */
958 void
959 (usimple_unlock)(
960 	usimple_lock_t l)
961 {
962 	simple_unlock((simple_lock_t)l);
963 }
964 
965 
966 /*
967  *	Conditionally acquire a usimple_lock.
968  *
969  *	On success, returns with preemption disabled.
970  *	On failure, returns with preemption in the same state
971  *	as when first invoked.  Note that the hw_lock routines
972  *	are responsible for maintaining preemption state.
973  *
974  *	XXX No stats are gathered on a miss; I preserved this
975  *	behavior from the original assembly-language code, but
976  *	doesn't it make sense to log misses?  XXX
977  */
978 unsigned
979 int
980 (usimple_lock_try)(
981 	usimple_lock_t l
982 	LCK_GRP_ARG(lck_grp_t *grp))
983 {
984 	return simple_lock_try((simple_lock_t) l, grp);
985 }
986