xref: /xnu-10002.1.13/osfmk/arm/locks_arm.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33  * Mellon University All Rights Reserved.
34  *
35  * Permission to use, copy, modify and distribute this software and its
36  * documentation is hereby granted, provided that both the copyright notice
37  * and this permission notice appear in all copies of the software,
38  * derivative works or modified versions, and any portions thereof, and that
39  * both notices appear in supporting documentation.
40  *
41  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42  * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43  * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44  *
45  * Carnegie Mellon requests users of this software to return to
46  *
47  * Software Distribution Coordinator  or  [email protected]
48  * School of Computer Science Carnegie Mellon University Pittsburgh PA
49  * 15213-3890
50  *
51  * any improvements or extensions that they make and grant Carnegie Mellon the
52  * rights to redistribute these changes.
53  */
54 /*
55  *	File:	kern/lock.c
56  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
57  *	Date:	1985
58  *
59  *	Locking primitives implementation
60  */
61 
62 #define LOCK_PRIVATE 1
63 
64 #include <mach_ldebug.h>
65 
66 #include <mach/machine/sdt.h>
67 
68 #include <kern/locks_internal.h>
69 #include <kern/zalloc.h>
70 #include <kern/lock_stat.h>
71 #include <kern/locks.h>
72 #include <kern/misc_protos.h>
73 #include <kern/thread.h>
74 #include <kern/processor.h>
75 #include <kern/sched_hygiene.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <kern/kcdata.h>
79 #include <kern/percpu.h>
80 #include <kern/hvg_hypercall.h>
81 #include <string.h>
82 #include <arm/cpu_internal.h>
83 #include <os/hash.h>
84 #include <arm/cpu_data.h>
85 
86 #include <arm/cpu_data_internal.h>
87 #include <arm64/proc_reg.h>
88 #include <arm/smp.h>
89 #include <machine/atomic.h>
90 #include <machine/machine_cpu.h>
91 
92 #include <pexpert/pexpert.h>
93 
94 #include <sys/kdebug.h>
95 
96 #define ANY_LOCK_DEBUG  (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
97 
98 // Panic in tests that check lock usage correctness
99 // These are undesirable when in a panic or a debugger is runnning.
100 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
101 
102 /* Forwards */
103 
104 extern unsigned int not_in_kdp;
105 
106 MACHINE_TIMEOUT(lock_panic_timeout, "lock-panic",
107     0xc00000 /* 12.5 m ticks ~= 524ms with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
108 
109 #define NOINLINE                __attribute__((noinline))
110 
111 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
112 
113 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
114 
115 #pragma GCC visibility push(hidden)
116 /*
117  * atomic exchange API is a low level abstraction of the operations
118  * to atomically read, modify, and write a pointer.  This abstraction works
119  * for both Intel and ARMv8.1 compare and exchange atomic instructions as
120  * well as the ARM exclusive instructions.
121  *
122  * atomic_exchange_begin() - begin exchange and retrieve current value
123  * atomic_exchange_complete() - conclude an exchange
124  * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
125  */
126 uint32_t
load_exclusive32(uint32_t * target,enum memory_order ord)127 load_exclusive32(uint32_t *target, enum memory_order ord)
128 {
129 	uint32_t        value;
130 
131 	if (_os_atomic_mo_has_acquire(ord)) {
132 		value = __builtin_arm_ldaex(target);    // ldaxr
133 	} else {
134 		value = __builtin_arm_ldrex(target);    // ldxr
135 	}
136 
137 	return value;
138 }
139 
140 boolean_t
store_exclusive32(uint32_t * target,uint32_t value,enum memory_order ord)141 store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
142 {
143 	boolean_t err;
144 
145 	if (_os_atomic_mo_has_release(ord)) {
146 		err = __builtin_arm_stlex(value, target);       // stlxr
147 	} else {
148 		err = __builtin_arm_strex(value, target);       // stxr
149 	}
150 
151 	return !err;
152 }
153 
154 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)155 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
156 {
157 	uint32_t        val;
158 
159 #if !OS_ATOMIC_USE_LLSC
160 	ord = memory_order_relaxed;
161 #endif
162 	val = load_exclusive32(target, ord);
163 	*previous = val;
164 	return val;
165 }
166 
167 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)168 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
169 {
170 #if !OS_ATOMIC_USE_LLSC
171 	return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
172 #else
173 	(void)previous;         // Previous not needed, monitor is held
174 	return store_exclusive32(target, newval, ord);
175 #endif
176 }
177 
178 void
atomic_exchange_abort(void)179 atomic_exchange_abort(void)
180 {
181 	os_atomic_clear_exclusive();
182 }
183 
184 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)185 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
186 {
187 	uint32_t                value, prev;
188 
189 	for (;;) {
190 		value = atomic_exchange_begin32(target, &prev, ord);
191 		if (value & test_mask) {
192 			if (wait) {
193 				wait_for_event();       // Wait with monitor held
194 			} else {
195 				atomic_exchange_abort();        // Clear exclusive monitor
196 			}
197 			return FALSE;
198 		}
199 		value |= set_mask;
200 		if (atomic_exchange_complete32(target, prev, value, ord)) {
201 			return TRUE;
202 		}
203 	}
204 }
205 
206 #pragma GCC visibility pop
207 #pragma mark preemption
208 
209 /*
210  * This function checks whether an AST_URGENT has been pended.
211  *
212  * It is called once the preemption has been reenabled, which means the thread
213  * may have been preempted right before this was called, and when this function
214  * actually performs the check, we've changed CPU.
215  *
216  * This race is however benign: the point of AST_URGENT is to trigger a context
217  * switch, so if one happened, there's nothing left to check for, and AST_URGENT
218  * was cleared in the process.
219  *
220  * It follows that this check cannot have false negatives, which allows us
221  * to avoid fiddling with interrupt state for the vast majority of cases
222  * when the check will actually be negative.
223  */
224 static NOINLINE void
kernel_preempt_check(void)225 kernel_preempt_check(void)
226 {
227 	uint64_t state;
228 
229 	/* If interrupts are masked, we can't take an AST here */
230 	state = __builtin_arm_rsr64("DAIF");
231 	if (state & DAIF_IRQF) {
232 		return;
233 	}
234 
235 	/* disable interrupts (IRQ FIQ ASYNCF) */
236 	__builtin_arm_wsr64("DAIFSet", DAIFSC_STANDARD_DISABLE);
237 
238 	/*
239 	 * Reload cpu_pending_ast: a context switch would cause it to change.
240 	 * Now that interrupts are disabled, this will debounce false positives.
241 	 */
242 	if (current_thread()->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
243 		ast_taken_kernel();
244 	}
245 
246 	/* restore the original interrupt mask */
247 	__builtin_arm_wsr64("DAIF", state);
248 }
249 
250 static inline void
_enable_preemption_write_count(thread_t thread,unsigned int count)251 _enable_preemption_write_count(thread_t thread, unsigned int count)
252 {
253 	os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
254 
255 	/*
256 	 * This check is racy and could load from another CPU's pending_ast mask,
257 	 * but as described above, this can't have false negatives.
258 	 */
259 	if (count == 0) {
260 		if (__improbable(thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT)) {
261 			return kernel_preempt_check();
262 		}
263 	}
264 }
265 
266 #if SCHED_HYGIENE_DEBUG
267 
268 uint64_t _Atomic PERCPU_DATA_HACK_78750602(preemption_disable_max_mt);
269 
270 #if XNU_PLATFORM_iPhoneOS
271 #define DEFAULT_PREEMPTION_TIMEOUT 120000 /* 5ms */
272 #define DEFAULT_PREEMPTION_MODE SCHED_HYGIENE_MODE_PANIC
273 #else
274 #define DEFAULT_PREEMPTION_TIMEOUT 0      /* Disabled */
275 #define DEFAULT_PREEMPTION_MODE SCHED_HYGIENE_MODE_OFF
276 #endif /* XNU_PLATFORM_iPhoneOS */
277 
278 MACHINE_TIMEOUT_DEV_WRITEABLE(sched_preemption_disable_threshold_mt, "sched-preemption",
279     DEFAULT_PREEMPTION_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, kprintf_spam_mt_pred);
280 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, sched_preemption_disable_debug_mode,
281     "machine-timeouts",
282     "sched-preemption-disable-mode", /* DT property names have to be 31 chars max */
283     "sched_preemption_disable_debug_mode",
284     DEFAULT_PREEMPTION_MODE,
285     TUNABLE_DT_CHECK_CHOSEN);
286 
287 static uint32_t const sched_preemption_disable_debug_dbgid = MACHDBG_CODE(DBG_MACH_SCHED, MACH_PREEMPTION_EXPIRED) | DBG_FUNC_NONE;
288 
289 NOINLINE void
_prepare_preemption_disable_measurement(void)290 _prepare_preemption_disable_measurement(void)
291 {
292 	thread_t thread = current_thread();
293 
294 	if (thread->machine.inthandler_timestamp == 0) {
295 		/*
296 		 * Only prepare a measurement if not currently in an interrupt
297 		 * handler.
298 		 *
299 		 * We are only interested in the net duration of disabled
300 		 * preemption, that is: The time in which preemption was
301 		 * disabled, minus the intervals in which any (likely
302 		 * unrelated) interrupts were handled.
303 		 * ml_adjust_preemption_disable_time() will remove those
304 		 * intervals, however we also do not even start measuring
305 		 * preemption disablement if we are already within handling of
306 		 * an interrupt when preemption was disabled (the resulting
307 		 * net time would be 0).
308 		 *
309 		 * Interrupt handling duration is handled separately, and any
310 		 * long intervals of preemption disablement are counted
311 		 * towards that.
312 		 */
313 
314 		bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
315 		thread->machine.preemption_disable_abandon = false;
316 		thread->machine.preemption_disable_mt = ml_get_sched_hygiene_timebase();
317 		thread->machine.preemption_disable_adjust = 0;
318 		thread->machine.preemption_count |= SCHED_HYGIENE_MARKER;
319 #if MONOTONIC
320 		if (sched_hygiene_debug_pmc) {
321 			mt_cur_cpu_cycles_instrs_speculative(&thread->machine.preemption_disable_cycles, &thread->machine.preemption_disable_instr);
322 		}
323 #endif
324 		ml_set_interrupts_enabled_with_debug(istate, false);
325 	}
326 }
327 
328 NOINLINE void
_collect_preemption_disable_measurement(void)329 _collect_preemption_disable_measurement(void)
330 {
331 	bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
332 	/*
333 	 * Collect start time and current time with interrupts disabled.
334 	 * Otherwise an interrupt coming in after grabbing the timestamp
335 	 * could spuriously inflate the measurement, because it will
336 	 * adjust preemption_disable_mt only after we already grabbed
337 	 * it.
338 	 *
339 	 * (Even worse if we collected the current time first: Then a
340 	 * subsequent interrupt could adjust preemption_disable_mt to
341 	 * make the duration go negative after subtracting the already
342 	 * grabbed time. With interrupts disabled we don't care much about
343 	 * the order.)
344 	 */
345 
346 	thread_t thread = current_thread();
347 	uint64_t const mt = thread->machine.preemption_disable_mt;
348 	uint64_t const adjust = thread->machine.preemption_disable_adjust;
349 	uint64_t const now = ml_get_sched_hygiene_timebase();
350 	thread->machine.preemption_disable_mt = 0;
351 	thread->machine.preemption_disable_adjust = 0;
352 	/* no need to clear SCHED_HYGIENE_MARKER, will be done on exit */
353 
354 	/*
355 	 * Don't need to reset (or even save) preemption_disable_abandon
356 	 * here: abandon_preemption_disable_measurement is a no-op anyway
357 	 * if preemption_disable_mt == 0 (which we just set), and it
358 	 * will stay that way until the next call to
359 	 * _collect_preemption_disable_measurement.
360 	 */
361 
362 	os_compiler_barrier(acq_rel);
363 
364 	ml_set_interrupts_enabled_with_debug(istate, false);
365 
366 	/*
367 	 * Fine to get with interrupts enabled:
368 	 * Above we set preemption_disable_mt to 0, which turns
369 	 * abandon_preemption_disable_measurement() into a no-op
370 	 * until the next collection starts.
371 	 */
372 	if (thread->machine.preemption_disable_abandon) {
373 		goto out;
374 	}
375 
376 	int64_t const gross_duration = now - mt;
377 	int64_t const net_duration = gross_duration - adjust;
378 
379 	uint64_t _Atomic * const max_duration = PERCPU_GET(preemption_disable_max_mt);
380 
381 	if (__improbable(net_duration > *max_duration)) {
382 		os_atomic_store(max_duration, net_duration, relaxed);
383 	}
384 
385 	uint64_t const threshold = os_atomic_load(&sched_preemption_disable_threshold_mt, relaxed);
386 	if (__improbable(threshold > 0 && net_duration >= threshold)) {
387 		uint64_t average_freq = 0;
388 		uint64_t average_cpi_whole = 0;
389 		uint64_t average_cpi_fractional = 0;
390 
391 #if MONOTONIC
392 		if (sched_hygiene_debug_pmc) {
393 			uint64_t current_cycles = 0, current_instrs = 0;
394 
395 			/*
396 			 * We're getting these values a bit late, but getting them
397 			 * is a bit expensive, so we take the slight hit in
398 			 * accuracy for the reported values (which aren't very
399 			 * stable anyway).
400 			 */
401 			istate = ml_set_interrupts_enabled_with_debug(false, false);
402 			mt_cur_cpu_cycles_instrs_speculative(&current_cycles, &current_instrs);
403 			ml_set_interrupts_enabled_with_debug(istate, false);
404 
405 			uint64_t duration_ns;
406 			absolutetime_to_nanoseconds(gross_duration, &duration_ns);
407 
408 			average_freq = (current_cycles - thread->machine.preemption_disable_cycles) / (duration_ns / 1000);
409 			average_cpi_whole = (current_cycles - thread->machine.preemption_disable_cycles) / (current_instrs - thread->machine.preemption_disable_instr);
410 			average_cpi_fractional =
411 			    (((current_cycles - thread->machine.preemption_disable_cycles) * 100) / (current_instrs - thread->machine.preemption_disable_instr)) % 100;
412 		}
413 #endif
414 
415 		if (sched_preemption_disable_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
416 			panic("preemption disable timeout exceeded: %llu >= %llu mt ticks (start: %llu, now: %llu, gross: %llu, inttime: %llu), "
417 			    "freq = %llu MHz, CPI = %llu.%llu",
418 			    net_duration, threshold, mt, now, gross_duration, adjust,
419 			    average_freq, average_cpi_whole, average_cpi_fractional);
420 		}
421 
422 		DTRACE_SCHED4(mach_preemption_expired, uint64_t, net_duration, uint64_t, gross_duration,
423 		    uint64_t, average_cpi_whole, uint64_t, average_cpi_fractional);
424 		if (__improbable(kdebug_debugid_enabled(sched_preemption_disable_debug_dbgid))) {
425 			KDBG(sched_preemption_disable_debug_dbgid, net_duration, gross_duration, average_cpi_whole, average_cpi_fractional);
426 		}
427 	}
428 
429 out:
430 	/*
431 	 * the preemption count is SCHED_HYGIENE_MARKER, we need to clear it.
432 	 */
433 	_enable_preemption_write_count(thread, 0);
434 }
435 
436 /*
437  * Abandon a potential preemption disable measurement. Useful for
438  * example for the idle thread, which would just spuriously
439  * trigger the threshold while actually idling, which we don't
440  * care about.
441  */
442 void
abandon_preemption_disable_measurement(void)443 abandon_preemption_disable_measurement(void)
444 {
445 	thread_t t = current_thread();
446 	bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
447 
448 	if (t->machine.preemption_disable_mt != 0) {
449 		t->machine.preemption_disable_abandon = true;
450 	}
451 	ml_set_interrupts_enabled_with_debug(istate, false);
452 }
453 
454 /*
455  * Skip predicate for sched_preemption_disable, which would trigger
456  * spuriously when kprintf spam is enabled.
457  */
458 bool
kprintf_spam_mt_pred(struct machine_timeout_spec const __unused * spec)459 kprintf_spam_mt_pred(struct machine_timeout_spec const __unused *spec)
460 {
461 	bool const kprintf_spam_enabled = !(disable_kprintf_output || disable_serial_output);
462 	return kprintf_spam_enabled;
463 }
464 
465 /*
466  * Abandon function exported for AppleCLPC, as a workaround to rdar://91668370.
467  *
468  * Only for AppleCLPC!
469  */
470 void
sched_perfcontrol_abandon_preemption_disable_measurement(void)471 sched_perfcontrol_abandon_preemption_disable_measurement(void)
472 {
473 	abandon_preemption_disable_measurement();
474 }
475 
476 #else /* SCHED_HYGIENE_DEBUG */
477 void
sched_perfcontrol_abandon_preemption_disable_measurement(void)478 sched_perfcontrol_abandon_preemption_disable_measurement(void)
479 {
480 	// No-op. Function is exported, so needs to be defined
481 }
482 #endif /* SCHED_HYGIENE_DEBUG */
483 
484 /*
485  * This function is written in a way that the codegen is extremely short.
486  *
487  * LTO isn't smart enough to inline it, yet it is profitable because
488  * the vast majority of callers use current_thread() already.
489  *
490  * TODO: It is unfortunate that we have to load
491  *       sched_preemption_disable_debug_mode
492  *
493  * /!\ Breaking inlining causes zalloc to be roughly 10% slower /!\
494  */
495 __attribute__((always_inline))
496 void
_disable_preemption(void)497 _disable_preemption(void)
498 {
499 	thread_t thread = current_thread();
500 	unsigned int count = thread->machine.preemption_count;
501 
502 	os_atomic_store(&thread->machine.preemption_count,
503 	    count + 1, compiler_acq_rel);
504 
505 #if SCHED_HYGIENE_DEBUG
506 	/*
507 	 * Note that this is not the only place preemption gets disabled,
508 	 * it also gets modified on ISR and PPL entry/exit. Both of those
509 	 * events will be treated specially however, and
510 	 * increment/decrement being paired around their entry/exit means
511 	 * that collection here is not desynced otherwise.
512 	 */
513 
514 	if (__improbable(count == 0 && sched_preemption_disable_debug_mode)) {
515 		__attribute__((musttail))
516 		return _prepare_preemption_disable_measurement();
517 	}
518 #endif /* SCHED_HYGIENE_DEBUG */
519 }
520 
521 
522 /*
523  * This variant of disable_preemption() allows disabling preemption
524  * without taking measurements (and later potentially triggering
525  * actions on those).
526  */
527 __attribute__((always_inline))
528 void
_disable_preemption_without_measurements(void)529 _disable_preemption_without_measurements(void)
530 {
531 	thread_t thread = current_thread();
532 	unsigned int count = thread->machine.preemption_count;
533 
534 #if SCHED_HYGIENE_DEBUG
535 	/*
536 	 * Inform _collect_preemption_disable_measurement()
537 	 * that we didn't really care.
538 	 */
539 	thread->machine.preemption_disable_abandon = true;
540 #endif
541 
542 	os_atomic_store(&thread->machine.preemption_count,
543 	    count + 1, compiler_acq_rel);
544 }
545 
546 /*
547  * To help _enable_preemption() inline everywhere with LTO,
548  * we keep these nice non inlineable functions as the panic()
549  * codegen setup is quite large and for weird reasons causes a frame.
550  */
551 __abortlike
552 static void
_enable_preemption_underflow(void)553 _enable_preemption_underflow(void)
554 {
555 	panic("Preemption count underflow");
556 }
557 
558 /*
559  * This function is written in a way that the codegen is extremely short.
560  *
561  * LTO isn't smart enough to inline it, yet it is profitable because
562  * the vast majority of callers use current_thread() already.
563  *
564  * The SCHED_HYGIENE_MARKER trick is used so that we do not have to load
565  * unrelated fields of current_thread().
566  *
567  * /!\ Breaking inlining causes zalloc to be roughly 10% slower /!\
568  */
569 __attribute__((always_inline))
570 void
_enable_preemption(void)571 _enable_preemption(void)
572 {
573 	thread_t thread = current_thread();
574 	unsigned int count  = thread->machine.preemption_count;
575 
576 	if (__improbable(count == 0)) {
577 		_enable_preemption_underflow();
578 	}
579 
580 #if SCHED_HYGIENE_DEBUG
581 	if (__improbable(count == SCHED_HYGIENE_MARKER + 1)) {
582 		return _collect_preemption_disable_measurement();
583 	}
584 #endif /* SCHED_HYGIENE_DEBUG */
585 
586 	_enable_preemption_write_count(thread, count - 1);
587 }
588 
589 __attribute__((always_inline))
590 unsigned int
get_preemption_level_for_thread(thread_t thread)591 get_preemption_level_for_thread(thread_t thread)
592 {
593 	unsigned int count = thread->machine.preemption_count;
594 
595 #if SCHED_HYGIENE_DEBUG
596 	/*
597 	 * hide this "flag" from callers,
598 	 * and it would make the count look negative anyway
599 	 * which some people dislike
600 	 */
601 	count &= ~SCHED_HYGIENE_MARKER;
602 #endif
603 	return (int)count;
604 }
605 
606 __attribute__((always_inline))
607 int
get_preemption_level(void)608 get_preemption_level(void)
609 {
610 	return get_preemption_level_for_thread(current_thread());
611 }
612 
613 #if CONFIG_PV_TICKET
614 __startup_func
615 void
lck_init_pv(void)616 lck_init_pv(void)
617 {
618 	uint32_t pvtck = 1;
619 	PE_parse_boot_argn("pvticket", &pvtck, sizeof(pvtck));
620 	if (pvtck == 0) {
621 		return;
622 	}
623 	has_lock_pv = hvg_is_hcall_available(HVG_HCALL_VCPU_WFK) &&
624 	    hvg_is_hcall_available(HVG_HCALL_VCPU_KICK);
625 }
626 STARTUP(LOCKS, STARTUP_RANK_FIRST, lck_init_pv);
627 #endif
628 
629 
630 #pragma mark lck_spin_t
631 #if LCK_SPIN_IS_TICKET_LOCK
632 
633 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)634 lck_spin_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
635 {
636 	lck_spin_t *lck;
637 
638 	lck = zalloc(KT_LCK_SPIN);
639 	lck_spin_init(lck, grp, attr);
640 	return lck;
641 }
642 
643 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)644 lck_spin_free(lck_spin_t *lck, lck_grp_t *grp)
645 {
646 	lck_spin_destroy(lck, grp);
647 	zfree(KT_LCK_SPIN, lck);
648 }
649 
650 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)651 lck_spin_init(lck_spin_t *lck, lck_grp_t *grp, __unused lck_attr_t *attr)
652 {
653 	lck_ticket_init(lck, grp);
654 }
655 
656 /*
657  * arm_usimple_lock is a lck_spin_t without a group or attributes
658  */
659 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)660 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
661 {
662 	lck_ticket_init((lck_ticket_t *)lck, LCK_GRP_NULL);
663 }
664 
665 void
lck_spin_assert(const lck_spin_t * lock,unsigned int type)666 lck_spin_assert(const lck_spin_t *lock, unsigned int type)
667 {
668 	if (type == LCK_ASSERT_OWNED) {
669 		lck_ticket_assert_owned(lock);
670 	} else if (type == LCK_ASSERT_NOTOWNED) {
671 		lck_ticket_assert_not_owned(lock);
672 	} else {
673 		panic("lck_spin_assert(): invalid arg (%u)", type);
674 	}
675 }
676 
677 void
lck_spin_lock(lck_spin_t * lock)678 lck_spin_lock(lck_spin_t *lock)
679 {
680 	lck_ticket_lock(lock, LCK_GRP_NULL);
681 }
682 
683 void
lck_spin_lock_nopreempt(lck_spin_t * lock)684 lck_spin_lock_nopreempt(lck_spin_t *lock)
685 {
686 	lck_ticket_lock_nopreempt(lock, LCK_GRP_NULL);
687 }
688 
689 int
lck_spin_try_lock(lck_spin_t * lock)690 lck_spin_try_lock(lck_spin_t *lock)
691 {
692 	return lck_ticket_lock_try(lock, LCK_GRP_NULL);
693 }
694 
695 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)696 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
697 {
698 	return lck_ticket_lock_try_nopreempt(lock, LCK_GRP_NULL);
699 }
700 
701 void
lck_spin_unlock(lck_spin_t * lock)702 lck_spin_unlock(lck_spin_t *lock)
703 {
704 	lck_ticket_unlock(lock);
705 }
706 
707 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)708 lck_spin_destroy(lck_spin_t *lck, lck_grp_t *grp)
709 {
710 	lck_ticket_destroy(lck, grp);
711 }
712 
713 /*
714  * those really should be in an alias file instead,
715  * but you can't make that conditional.
716  *
717  * it will be good enough for perf evals for now
718  *
719  * we also can't make aliases for symbols that
720  * are in alias files like lck_spin_init and friends,
721  * so this suffers double jump penalties for kexts
722  * (LTO does the right thing for XNU).
723  */
724 #define make_alias(a, b) asm(".globl _" #a "\n" ".set   _" #a ", _" #b "\n")
725 make_alias(lck_spin_lock_grp, lck_ticket_lock);
726 make_alias(lck_spin_lock_nopreempt_grp, lck_ticket_lock_nopreempt);
727 make_alias(lck_spin_try_lock_grp, lck_ticket_lock_try);
728 make_alias(lck_spin_try_lock_nopreempt_grp, lck_ticket_lock_try_nopreempt);
729 make_alias(lck_spin_unlock_nopreempt, lck_ticket_unlock_nopreempt);
730 make_alias(kdp_lck_spin_is_acquired, kdp_lck_ticket_is_acquired);
731 #undef make_alias
732 
733 #else /* !LCK_SPIN_IS_TICKET_LOCK */
734 
735 #if DEVELOPMENT || DEBUG
736 __abortlike
737 static void
__lck_spin_invalid_panic(lck_spin_t * lck)738 __lck_spin_invalid_panic(lck_spin_t *lck)
739 {
740 	const char *how = "Invalid";
741 
742 	if (lck->type == LCK_SPIN_TYPE_DESTROYED ||
743 	    lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
744 		how = "Destroyed";
745 	}
746 
747 	panic("%s spinlock %p: <0x%016lx 0x%16lx>",
748 	    how, lck, lck->lck_spin_data, lck->type);
749 }
750 
751 static inline void
lck_spin_verify(lck_spin_t * lck)752 lck_spin_verify(lck_spin_t *lck)
753 {
754 	if (lck->type != LCK_SPIN_TYPE ||
755 	    lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
756 		__lck_spin_invalid_panic(lck);
757 	}
758 }
759 #else /* DEVELOPMENT || DEBUG */
760 #define lck_spin_verify(lck)            ((void)0)
761 #endif /* DEVELOPMENT || DEBUG */
762 
763 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)764 lck_spin_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
765 {
766 	lck_spin_t *lck;
767 
768 	lck = zalloc(KT_LCK_SPIN);
769 	lck_spin_init(lck, grp, attr);
770 	return lck;
771 }
772 
773 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)774 lck_spin_free(lck_spin_t *lck, lck_grp_t *grp)
775 {
776 	lck_spin_destroy(lck, grp);
777 	zfree(KT_LCK_SPIN, lck);
778 }
779 
780 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)781 lck_spin_init(lck_spin_t *lck, lck_grp_t *grp, __unused lck_attr_t *attr)
782 {
783 	lck->type = LCK_SPIN_TYPE;
784 	hw_lock_init(&lck->hwlock);
785 	if (grp) {
786 		lck_grp_reference(grp, &grp->lck_grp_spincnt);
787 	}
788 }
789 
790 /*
791  * arm_usimple_lock is a lck_spin_t without a group or attributes
792  */
793 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)794 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
795 {
796 	lck->type = LCK_SPIN_TYPE;
797 	hw_lock_init(&lck->hwlock);
798 }
799 
800 void
lck_spin_assert(const lck_spin_t * lock,unsigned int type)801 lck_spin_assert(const lck_spin_t *lock, unsigned int type)
802 {
803 	thread_t thread, holder;
804 
805 	if (lock->type != LCK_SPIN_TYPE) {
806 		panic("Invalid spinlock %p", lock);
807 	}
808 
809 	holder = HW_LOCK_STATE_TO_THREAD(lock->lck_spin_data);
810 	thread = current_thread();
811 	if (type == LCK_ASSERT_OWNED) {
812 		if (holder == 0) {
813 			panic("Lock not owned %p = %p", lock, holder);
814 		}
815 		if (holder != thread) {
816 			panic("Lock not owned by current thread %p = %p", lock, holder);
817 		}
818 	} else if (type == LCK_ASSERT_NOTOWNED) {
819 		if (holder != THREAD_NULL && holder == thread) {
820 			panic("Lock owned by current thread %p = %p", lock, holder);
821 		}
822 	} else {
823 		panic("lck_spin_assert(): invalid arg (%u)", type);
824 	}
825 }
826 
827 void
lck_spin_lock(lck_spin_t * lock)828 lck_spin_lock(lck_spin_t *lock)
829 {
830 	lck_spin_verify(lock);
831 	hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
832 }
833 
834 void
lck_spin_lock_grp(lck_spin_t * lock,lck_grp_t * grp)835 lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
836 {
837 #pragma unused(grp)
838 	lck_spin_verify(lock);
839 	hw_lock_lock(&lock->hwlock, grp);
840 }
841 
842 void
lck_spin_lock_nopreempt(lck_spin_t * lock)843 lck_spin_lock_nopreempt(lck_spin_t *lock)
844 {
845 	lck_spin_verify(lock);
846 	hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
847 }
848 
849 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)850 lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
851 {
852 #pragma unused(grp)
853 	lck_spin_verify(lock);
854 	hw_lock_lock_nopreempt(&lock->hwlock, grp);
855 }
856 
857 int
lck_spin_try_lock(lck_spin_t * lock)858 lck_spin_try_lock(lck_spin_t *lock)
859 {
860 	lck_spin_verify(lock);
861 	return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
862 }
863 
864 int
lck_spin_try_lock_grp(lck_spin_t * lock,lck_grp_t * grp)865 lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
866 {
867 #pragma unused(grp)
868 	lck_spin_verify(lock);
869 	return hw_lock_try(&lock->hwlock, grp);
870 }
871 
872 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)873 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
874 {
875 	lck_spin_verify(lock);
876 	return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
877 }
878 
879 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)880 lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
881 {
882 #pragma unused(grp)
883 	lck_spin_verify(lock);
884 	return hw_lock_try_nopreempt(&lock->hwlock, grp);
885 }
886 
887 void
lck_spin_unlock(lck_spin_t * lock)888 lck_spin_unlock(lck_spin_t *lock)
889 {
890 	lck_spin_verify(lock);
891 	hw_lock_unlock(&lock->hwlock);
892 }
893 
894 void
lck_spin_unlock_nopreempt(lck_spin_t * lock)895 lck_spin_unlock_nopreempt(lck_spin_t *lock)
896 {
897 	lck_spin_verify(lock);
898 	hw_lock_unlock_nopreempt(&lock->hwlock);
899 }
900 
901 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)902 lck_spin_destroy(lck_spin_t *lck, lck_grp_t *grp)
903 {
904 	lck_spin_verify(lck);
905 	*lck = (lck_spin_t){
906 		.lck_spin_data = LCK_SPIN_TAG_DESTROYED,
907 		.type = LCK_SPIN_TYPE_DESTROYED,
908 	};
909 	if (grp) {
910 		lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
911 	}
912 }
913 
914 /*
915  * Routine: kdp_lck_spin_is_acquired
916  * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
917  */
918 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)919 kdp_lck_spin_is_acquired(lck_spin_t *lck)
920 {
921 	if (not_in_kdp) {
922 		panic("panic: spinlock acquired check done outside of kernel debugger");
923 	}
924 	return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
925 }
926 
927 #endif /* !LCK_SPIN_IS_TICKET_LOCK */
928 
929 /*
930  *	Initialize a usimple_lock.
931  *
932  *	No change in preemption state.
933  */
934 void
usimple_lock_init(usimple_lock_t l,unsigned short tag)935 usimple_lock_init(
936 	usimple_lock_t l,
937 	unsigned short tag)
938 {
939 	simple_lock_init((simple_lock_t) l, tag);
940 }
941 
942 
943 /*
944  *	Acquire a usimple_lock.
945  *
946  *	Returns with preemption disabled.  Note
947  *	that the hw_lock routines are responsible for
948  *	maintaining preemption state.
949  */
950 void
951 (usimple_lock)(
952 	usimple_lock_t l
953 	LCK_GRP_ARG(lck_grp_t *grp))
954 {
955 	simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
956 }
957 
958 
959 /*
960  *	Release a usimple_lock.
961  *
962  *	Returns with preemption enabled.  Note
963  *	that the hw_lock routines are responsible for
964  *	maintaining preemption state.
965  */
966 void
967 (usimple_unlock)(
968 	usimple_lock_t l)
969 {
970 	simple_unlock((simple_lock_t)l);
971 }
972 
973 
974 /*
975  *	Conditionally acquire a usimple_lock.
976  *
977  *	On success, returns with preemption disabled.
978  *	On failure, returns with preemption in the same state
979  *	as when first invoked.  Note that the hw_lock routines
980  *	are responsible for maintaining preemption state.
981  *
982  *	XXX No stats are gathered on a miss; I preserved this
983  *	behavior from the original assembly-language code, but
984  *	doesn't it make sense to log misses?  XXX
985  */
986 unsigned
987 int
988 (usimple_lock_try)(
989 	usimple_lock_t l
990 	LCK_GRP_ARG(lck_grp_t *grp))
991 {
992 	return simple_lock_try((simple_lock_t) l, grp);
993 }
994