1 /*
2 * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
34 *
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
40 *
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44 *
45 * Carnegie Mellon requests users of this software to return to
46 *
47 * Software Distribution Coordinator or [email protected]
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
50 *
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54 /*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
62 #define LOCK_PRIVATE 1
63
64 #include <mach_ldebug.h>
65
66 #include <mach/machine/sdt.h>
67
68 #include <kern/locks_internal.h>
69 #include <kern/zalloc.h>
70 #include <kern/lock_stat.h>
71 #include <kern/locks.h>
72 #include <kern/misc_protos.h>
73 #include <kern/thread.h>
74 #include <kern/processor.h>
75 #include <kern/sched_hygiene.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <kern/kcdata.h>
79 #include <kern/percpu.h>
80 #include <kern/hvg_hypercall.h>
81 #include <string.h>
82 #include <arm/cpu_internal.h>
83 #include <os/hash.h>
84 #include <arm/cpu_data.h>
85
86 #include <arm/cpu_data_internal.h>
87 #include <arm64/proc_reg.h>
88 #include <arm/smp.h>
89 #include <machine/atomic.h>
90 #include <machine/machine_cpu.h>
91
92 #include <pexpert/pexpert.h>
93
94 #include <sys/kdebug.h>
95
96 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
97
98 // Panic in tests that check lock usage correctness
99 // These are undesirable when in a panic or a debugger is runnning.
100 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
101
102 /* Forwards */
103
104 extern unsigned int not_in_kdp;
105
106 MACHINE_TIMEOUT(lock_panic_timeout, "lock-panic",
107 0xc00000 /* 12.5 m ticks ~= 524ms with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
108
109 #define NOINLINE __attribute__((noinline))
110
111 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
112
113 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
114
115 #pragma GCC visibility push(hidden)
116 /*
117 * atomic exchange API is a low level abstraction of the operations
118 * to atomically read, modify, and write a pointer. This abstraction works
119 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
120 * well as the ARM exclusive instructions.
121 *
122 * atomic_exchange_begin() - begin exchange and retrieve current value
123 * atomic_exchange_complete() - conclude an exchange
124 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
125 */
126 uint32_t
load_exclusive32(uint32_t * target,enum memory_order ord)127 load_exclusive32(uint32_t *target, enum memory_order ord)
128 {
129 uint32_t value;
130
131 if (_os_atomic_mo_has_acquire(ord)) {
132 value = __builtin_arm_ldaex(target); // ldaxr
133 } else {
134 value = __builtin_arm_ldrex(target); // ldxr
135 }
136
137 return value;
138 }
139
140 boolean_t
store_exclusive32(uint32_t * target,uint32_t value,enum memory_order ord)141 store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
142 {
143 boolean_t err;
144
145 if (_os_atomic_mo_has_release(ord)) {
146 err = __builtin_arm_stlex(value, target); // stlxr
147 } else {
148 err = __builtin_arm_strex(value, target); // stxr
149 }
150
151 return !err;
152 }
153
154 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)155 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
156 {
157 uint32_t val;
158
159 #if !OS_ATOMIC_USE_LLSC
160 ord = memory_order_relaxed;
161 #endif
162 val = load_exclusive32(target, ord);
163 *previous = val;
164 return val;
165 }
166
167 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)168 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
169 {
170 #if !OS_ATOMIC_USE_LLSC
171 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
172 #else
173 (void)previous; // Previous not needed, monitor is held
174 return store_exclusive32(target, newval, ord);
175 #endif
176 }
177
178 void
atomic_exchange_abort(void)179 atomic_exchange_abort(void)
180 {
181 os_atomic_clear_exclusive();
182 }
183
184 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)185 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
186 {
187 uint32_t value, prev;
188
189 for (;;) {
190 value = atomic_exchange_begin32(target, &prev, ord);
191 if (value & test_mask) {
192 if (wait) {
193 wait_for_event(); // Wait with monitor held
194 } else {
195 atomic_exchange_abort(); // Clear exclusive monitor
196 }
197 return FALSE;
198 }
199 value |= set_mask;
200 if (atomic_exchange_complete32(target, prev, value, ord)) {
201 return TRUE;
202 }
203 }
204 }
205
206 #pragma GCC visibility pop
207 #pragma mark preemption
208
209 /*
210 * This function checks whether an AST_URGENT has been pended.
211 *
212 * It is called once the preemption has been reenabled, which means the thread
213 * may have been preempted right before this was called, and when this function
214 * actually performs the check, we've changed CPU.
215 *
216 * This race is however benign: the point of AST_URGENT is to trigger a context
217 * switch, so if one happened, there's nothing left to check for, and AST_URGENT
218 * was cleared in the process.
219 *
220 * It follows that this check cannot have false negatives, which allows us
221 * to avoid fiddling with interrupt state for the vast majority of cases
222 * when the check will actually be negative.
223 */
224 static NOINLINE void
kernel_preempt_check(void)225 kernel_preempt_check(void)
226 {
227 uint64_t state;
228
229 /* If interrupts are masked, we can't take an AST here */
230 state = __builtin_arm_rsr64("DAIF");
231 if (state & DAIF_IRQF) {
232 return;
233 }
234
235 /* disable interrupts (IRQ FIQ ASYNCF) */
236 __builtin_arm_wsr64("DAIFSet", DAIFSC_STANDARD_DISABLE);
237
238 /*
239 * Reload cpu_pending_ast: a context switch would cause it to change.
240 * Now that interrupts are disabled, this will debounce false positives.
241 */
242 if (current_thread()->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
243 ast_taken_kernel();
244 }
245
246 /* restore the original interrupt mask */
247 __builtin_arm_wsr64("DAIF", state);
248 }
249
250 static inline void
_enable_preemption_write_count(thread_t thread,unsigned int count)251 _enable_preemption_write_count(thread_t thread, unsigned int count)
252 {
253 os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
254
255 /*
256 * This check is racy and could load from another CPU's pending_ast mask,
257 * but as described above, this can't have false negatives.
258 */
259 if (count == 0) {
260 if (__improbable(thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT)) {
261 return kernel_preempt_check();
262 }
263 }
264 }
265
266 #if SCHED_HYGIENE_DEBUG
267
268 uint64_t _Atomic PERCPU_DATA_HACK_78750602(preemption_disable_max_mt);
269
270 #if XNU_PLATFORM_iPhoneOS
271 #define DEFAULT_PREEMPTION_TIMEOUT 120000 /* 5ms */
272 #define DEFAULT_PREEMPTION_MODE SCHED_HYGIENE_MODE_PANIC
273 #else
274 #define DEFAULT_PREEMPTION_TIMEOUT 0 /* Disabled */
275 #define DEFAULT_PREEMPTION_MODE SCHED_HYGIENE_MODE_OFF
276 #endif /* XNU_PLATFORM_iPhoneOS */
277
278 MACHINE_TIMEOUT_DEV_WRITEABLE(sched_preemption_disable_threshold_mt, "sched-preemption",
279 DEFAULT_PREEMPTION_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, kprintf_spam_mt_pred);
280 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, sched_preemption_disable_debug_mode,
281 "machine-timeouts",
282 "sched-preemption-disable-mode", /* DT property names have to be 31 chars max */
283 "sched_preemption_disable_debug_mode",
284 DEFAULT_PREEMPTION_MODE,
285 TUNABLE_DT_CHECK_CHOSEN);
286
287 static uint32_t const sched_preemption_disable_debug_dbgid = MACHDBG_CODE(DBG_MACH_SCHED, MACH_PREEMPTION_EXPIRED) | DBG_FUNC_NONE;
288
289 NOINLINE void
_prepare_preemption_disable_measurement(void)290 _prepare_preemption_disable_measurement(void)
291 {
292 thread_t thread = current_thread();
293
294 if (thread->machine.inthandler_timestamp == 0) {
295 /*
296 * Only prepare a measurement if not currently in an interrupt
297 * handler.
298 *
299 * We are only interested in the net duration of disabled
300 * preemption, that is: The time in which preemption was
301 * disabled, minus the intervals in which any (likely
302 * unrelated) interrupts were handled.
303 * ml_adjust_preemption_disable_time() will remove those
304 * intervals, however we also do not even start measuring
305 * preemption disablement if we are already within handling of
306 * an interrupt when preemption was disabled (the resulting
307 * net time would be 0).
308 *
309 * Interrupt handling duration is handled separately, and any
310 * long intervals of preemption disablement are counted
311 * towards that.
312 */
313
314 bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
315 thread->machine.preemption_disable_abandon = false;
316 thread->machine.preemption_disable_mt = ml_get_sched_hygiene_timebase();
317 thread->machine.preemption_disable_adjust = 0;
318 thread->machine.preemption_count |= SCHED_HYGIENE_MARKER;
319 #if MONOTONIC
320 if (sched_hygiene_debug_pmc) {
321 mt_cur_cpu_cycles_instrs_speculative(&thread->machine.preemption_disable_cycles, &thread->machine.preemption_disable_instr);
322 }
323 #endif
324 ml_set_interrupts_enabled_with_debug(istate, false);
325 }
326 }
327
328 NOINLINE void
_collect_preemption_disable_measurement(void)329 _collect_preemption_disable_measurement(void)
330 {
331 bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
332 /*
333 * Collect start time and current time with interrupts disabled.
334 * Otherwise an interrupt coming in after grabbing the timestamp
335 * could spuriously inflate the measurement, because it will
336 * adjust preemption_disable_mt only after we already grabbed
337 * it.
338 *
339 * (Even worse if we collected the current time first: Then a
340 * subsequent interrupt could adjust preemption_disable_mt to
341 * make the duration go negative after subtracting the already
342 * grabbed time. With interrupts disabled we don't care much about
343 * the order.)
344 */
345
346 thread_t thread = current_thread();
347 uint64_t const mt = thread->machine.preemption_disable_mt;
348 uint64_t const adjust = thread->machine.preemption_disable_adjust;
349 uint64_t const now = ml_get_sched_hygiene_timebase();
350 thread->machine.preemption_disable_mt = 0;
351 thread->machine.preemption_disable_adjust = 0;
352 /* no need to clear SCHED_HYGIENE_MARKER, will be done on exit */
353
354 /*
355 * Don't need to reset (or even save) preemption_disable_abandon
356 * here: abandon_preemption_disable_measurement is a no-op anyway
357 * if preemption_disable_mt == 0 (which we just set), and it
358 * will stay that way until the next call to
359 * _collect_preemption_disable_measurement.
360 */
361
362 os_compiler_barrier(acq_rel);
363
364 ml_set_interrupts_enabled_with_debug(istate, false);
365
366 /*
367 * Fine to get with interrupts enabled:
368 * Above we set preemption_disable_mt to 0, which turns
369 * abandon_preemption_disable_measurement() into a no-op
370 * until the next collection starts.
371 */
372 if (thread->machine.preemption_disable_abandon) {
373 goto out;
374 }
375
376 int64_t const gross_duration = now - mt;
377 int64_t const net_duration = gross_duration - adjust;
378
379 uint64_t _Atomic * const max_duration = PERCPU_GET(preemption_disable_max_mt);
380
381 if (__improbable(net_duration > *max_duration)) {
382 os_atomic_store(max_duration, net_duration, relaxed);
383 }
384
385 uint64_t const threshold = os_atomic_load(&sched_preemption_disable_threshold_mt, relaxed);
386 if (__improbable(threshold > 0 && net_duration >= threshold)) {
387 uint64_t average_freq = 0;
388 uint64_t average_cpi_whole = 0;
389 uint64_t average_cpi_fractional = 0;
390
391 #if MONOTONIC
392 if (sched_hygiene_debug_pmc) {
393 uint64_t current_cycles = 0, current_instrs = 0;
394
395 /*
396 * We're getting these values a bit late, but getting them
397 * is a bit expensive, so we take the slight hit in
398 * accuracy for the reported values (which aren't very
399 * stable anyway).
400 */
401 istate = ml_set_interrupts_enabled_with_debug(false, false);
402 mt_cur_cpu_cycles_instrs_speculative(¤t_cycles, ¤t_instrs);
403 ml_set_interrupts_enabled_with_debug(istate, false);
404
405 uint64_t duration_ns;
406 absolutetime_to_nanoseconds(gross_duration, &duration_ns);
407
408 average_freq = (current_cycles - thread->machine.preemption_disable_cycles) / (duration_ns / 1000);
409 average_cpi_whole = (current_cycles - thread->machine.preemption_disable_cycles) / (current_instrs - thread->machine.preemption_disable_instr);
410 average_cpi_fractional =
411 (((current_cycles - thread->machine.preemption_disable_cycles) * 100) / (current_instrs - thread->machine.preemption_disable_instr)) % 100;
412 }
413 #endif
414
415 if (sched_preemption_disable_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
416 panic("preemption disable timeout exceeded: %llu >= %llu mt ticks (start: %llu, now: %llu, gross: %llu, inttime: %llu), "
417 "freq = %llu MHz, CPI = %llu.%llu",
418 net_duration, threshold, mt, now, gross_duration, adjust,
419 average_freq, average_cpi_whole, average_cpi_fractional);
420 }
421
422 DTRACE_SCHED4(mach_preemption_expired, uint64_t, net_duration, uint64_t, gross_duration,
423 uint64_t, average_cpi_whole, uint64_t, average_cpi_fractional);
424 if (__improbable(kdebug_debugid_enabled(sched_preemption_disable_debug_dbgid))) {
425 KDBG(sched_preemption_disable_debug_dbgid, net_duration, gross_duration, average_cpi_whole, average_cpi_fractional);
426 }
427 }
428
429 out:
430 /*
431 * the preemption count is SCHED_HYGIENE_MARKER, we need to clear it.
432 */
433 _enable_preemption_write_count(thread, 0);
434 }
435
436 /*
437 * Abandon a potential preemption disable measurement. Useful for
438 * example for the idle thread, which would just spuriously
439 * trigger the threshold while actually idling, which we don't
440 * care about.
441 */
442 void
abandon_preemption_disable_measurement(void)443 abandon_preemption_disable_measurement(void)
444 {
445 thread_t t = current_thread();
446 bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
447
448 if (t->machine.preemption_disable_mt != 0) {
449 t->machine.preemption_disable_abandon = true;
450 }
451 ml_set_interrupts_enabled_with_debug(istate, false);
452 }
453
454 /*
455 * Skip predicate for sched_preemption_disable, which would trigger
456 * spuriously when kprintf spam is enabled.
457 */
458 bool
kprintf_spam_mt_pred(struct machine_timeout_spec const __unused * spec)459 kprintf_spam_mt_pred(struct machine_timeout_spec const __unused *spec)
460 {
461 bool const kprintf_spam_enabled = !(disable_kprintf_output || disable_serial_output);
462 return kprintf_spam_enabled;
463 }
464
465 /*
466 * Abandon function exported for AppleCLPC, as a workaround to rdar://91668370.
467 *
468 * Only for AppleCLPC!
469 */
470 void
sched_perfcontrol_abandon_preemption_disable_measurement(void)471 sched_perfcontrol_abandon_preemption_disable_measurement(void)
472 {
473 abandon_preemption_disable_measurement();
474 }
475
476 #else /* SCHED_HYGIENE_DEBUG */
477 void
sched_perfcontrol_abandon_preemption_disable_measurement(void)478 sched_perfcontrol_abandon_preemption_disable_measurement(void)
479 {
480 // No-op. Function is exported, so needs to be defined
481 }
482 #endif /* SCHED_HYGIENE_DEBUG */
483
484 /*
485 * This function is written in a way that the codegen is extremely short.
486 *
487 * LTO isn't smart enough to inline it, yet it is profitable because
488 * the vast majority of callers use current_thread() already.
489 *
490 * TODO: It is unfortunate that we have to load
491 * sched_preemption_disable_debug_mode
492 *
493 * /!\ Breaking inlining causes zalloc to be roughly 10% slower /!\
494 */
495 __attribute__((always_inline))
496 void
_disable_preemption(void)497 _disable_preemption(void)
498 {
499 thread_t thread = current_thread();
500 unsigned int count = thread->machine.preemption_count;
501
502 os_atomic_store(&thread->machine.preemption_count,
503 count + 1, compiler_acq_rel);
504
505 #if SCHED_HYGIENE_DEBUG
506 /*
507 * Note that this is not the only place preemption gets disabled,
508 * it also gets modified on ISR and PPL entry/exit. Both of those
509 * events will be treated specially however, and
510 * increment/decrement being paired around their entry/exit means
511 * that collection here is not desynced otherwise.
512 */
513
514 if (__improbable(count == 0 && sched_preemption_disable_debug_mode)) {
515 __attribute__((musttail))
516 return _prepare_preemption_disable_measurement();
517 }
518 #endif /* SCHED_HYGIENE_DEBUG */
519 }
520
521
522 /*
523 * This variant of disable_preemption() allows disabling preemption
524 * without taking measurements (and later potentially triggering
525 * actions on those).
526 */
527 __attribute__((always_inline))
528 void
_disable_preemption_without_measurements(void)529 _disable_preemption_without_measurements(void)
530 {
531 thread_t thread = current_thread();
532 unsigned int count = thread->machine.preemption_count;
533
534 #if SCHED_HYGIENE_DEBUG
535 /*
536 * Inform _collect_preemption_disable_measurement()
537 * that we didn't really care.
538 */
539 thread->machine.preemption_disable_abandon = true;
540 #endif
541
542 os_atomic_store(&thread->machine.preemption_count,
543 count + 1, compiler_acq_rel);
544 }
545
546 /*
547 * To help _enable_preemption() inline everywhere with LTO,
548 * we keep these nice non inlineable functions as the panic()
549 * codegen setup is quite large and for weird reasons causes a frame.
550 */
551 __abortlike
552 static void
_enable_preemption_underflow(void)553 _enable_preemption_underflow(void)
554 {
555 panic("Preemption count underflow");
556 }
557
558 /*
559 * This function is written in a way that the codegen is extremely short.
560 *
561 * LTO isn't smart enough to inline it, yet it is profitable because
562 * the vast majority of callers use current_thread() already.
563 *
564 * The SCHED_HYGIENE_MARKER trick is used so that we do not have to load
565 * unrelated fields of current_thread().
566 *
567 * /!\ Breaking inlining causes zalloc to be roughly 10% slower /!\
568 */
569 __attribute__((always_inline))
570 void
_enable_preemption(void)571 _enable_preemption(void)
572 {
573 thread_t thread = current_thread();
574 unsigned int count = thread->machine.preemption_count;
575
576 if (__improbable(count == 0)) {
577 _enable_preemption_underflow();
578 }
579
580 #if SCHED_HYGIENE_DEBUG
581 if (__improbable(count == SCHED_HYGIENE_MARKER + 1)) {
582 return _collect_preemption_disable_measurement();
583 }
584 #endif /* SCHED_HYGIENE_DEBUG */
585
586 _enable_preemption_write_count(thread, count - 1);
587 }
588
589 __attribute__((always_inline))
590 unsigned int
get_preemption_level_for_thread(thread_t thread)591 get_preemption_level_for_thread(thread_t thread)
592 {
593 unsigned int count = thread->machine.preemption_count;
594
595 #if SCHED_HYGIENE_DEBUG
596 /*
597 * hide this "flag" from callers,
598 * and it would make the count look negative anyway
599 * which some people dislike
600 */
601 count &= ~SCHED_HYGIENE_MARKER;
602 #endif
603 return (int)count;
604 }
605
606 __attribute__((always_inline))
607 int
get_preemption_level(void)608 get_preemption_level(void)
609 {
610 return get_preemption_level_for_thread(current_thread());
611 }
612
613 #if CONFIG_PV_TICKET
614 __startup_func
615 void
lck_init_pv(void)616 lck_init_pv(void)
617 {
618 uint32_t pvtck = 1;
619 PE_parse_boot_argn("pvticket", &pvtck, sizeof(pvtck));
620 if (pvtck == 0) {
621 return;
622 }
623 has_lock_pv = hvg_is_hcall_available(HVG_HCALL_VCPU_WFK) &&
624 hvg_is_hcall_available(HVG_HCALL_VCPU_KICK);
625 }
626 STARTUP(LOCKS, STARTUP_RANK_FIRST, lck_init_pv);
627 #endif
628
629
630 #pragma mark lck_spin_t
631 #if LCK_SPIN_IS_TICKET_LOCK
632
633 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)634 lck_spin_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
635 {
636 lck_spin_t *lck;
637
638 lck = zalloc(KT_LCK_SPIN);
639 lck_spin_init(lck, grp, attr);
640 return lck;
641 }
642
643 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)644 lck_spin_free(lck_spin_t *lck, lck_grp_t *grp)
645 {
646 lck_spin_destroy(lck, grp);
647 zfree(KT_LCK_SPIN, lck);
648 }
649
650 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)651 lck_spin_init(lck_spin_t *lck, lck_grp_t *grp, __unused lck_attr_t *attr)
652 {
653 lck_ticket_init(lck, grp);
654 }
655
656 /*
657 * arm_usimple_lock is a lck_spin_t without a group or attributes
658 */
659 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)660 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
661 {
662 lck_ticket_init((lck_ticket_t *)lck, LCK_GRP_NULL);
663 }
664
665 void
lck_spin_assert(const lck_spin_t * lock,unsigned int type)666 lck_spin_assert(const lck_spin_t *lock, unsigned int type)
667 {
668 if (type == LCK_ASSERT_OWNED) {
669 lck_ticket_assert_owned(lock);
670 } else if (type == LCK_ASSERT_NOTOWNED) {
671 lck_ticket_assert_not_owned(lock);
672 } else {
673 panic("lck_spin_assert(): invalid arg (%u)", type);
674 }
675 }
676
677 void
lck_spin_lock(lck_spin_t * lock)678 lck_spin_lock(lck_spin_t *lock)
679 {
680 lck_ticket_lock(lock, LCK_GRP_NULL);
681 }
682
683 void
lck_spin_lock_nopreempt(lck_spin_t * lock)684 lck_spin_lock_nopreempt(lck_spin_t *lock)
685 {
686 lck_ticket_lock_nopreempt(lock, LCK_GRP_NULL);
687 }
688
689 int
lck_spin_try_lock(lck_spin_t * lock)690 lck_spin_try_lock(lck_spin_t *lock)
691 {
692 return lck_ticket_lock_try(lock, LCK_GRP_NULL);
693 }
694
695 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)696 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
697 {
698 return lck_ticket_lock_try_nopreempt(lock, LCK_GRP_NULL);
699 }
700
701 void
lck_spin_unlock(lck_spin_t * lock)702 lck_spin_unlock(lck_spin_t *lock)
703 {
704 lck_ticket_unlock(lock);
705 }
706
707 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)708 lck_spin_destroy(lck_spin_t *lck, lck_grp_t *grp)
709 {
710 lck_ticket_destroy(lck, grp);
711 }
712
713 /*
714 * those really should be in an alias file instead,
715 * but you can't make that conditional.
716 *
717 * it will be good enough for perf evals for now
718 *
719 * we also can't make aliases for symbols that
720 * are in alias files like lck_spin_init and friends,
721 * so this suffers double jump penalties for kexts
722 * (LTO does the right thing for XNU).
723 */
724 #define make_alias(a, b) asm(".globl _" #a "\n" ".set _" #a ", _" #b "\n")
725 make_alias(lck_spin_lock_grp, lck_ticket_lock);
726 make_alias(lck_spin_lock_nopreempt_grp, lck_ticket_lock_nopreempt);
727 make_alias(lck_spin_try_lock_grp, lck_ticket_lock_try);
728 make_alias(lck_spin_try_lock_nopreempt_grp, lck_ticket_lock_try_nopreempt);
729 make_alias(lck_spin_unlock_nopreempt, lck_ticket_unlock_nopreempt);
730 make_alias(kdp_lck_spin_is_acquired, kdp_lck_ticket_is_acquired);
731 #undef make_alias
732
733 #else /* !LCK_SPIN_IS_TICKET_LOCK */
734
735 #if DEVELOPMENT || DEBUG
736 __abortlike
737 static void
__lck_spin_invalid_panic(lck_spin_t * lck)738 __lck_spin_invalid_panic(lck_spin_t *lck)
739 {
740 const char *how = "Invalid";
741
742 if (lck->type == LCK_SPIN_TYPE_DESTROYED ||
743 lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
744 how = "Destroyed";
745 }
746
747 panic("%s spinlock %p: <0x%016lx 0x%16lx>",
748 how, lck, lck->lck_spin_data, lck->type);
749 }
750
751 static inline void
lck_spin_verify(lck_spin_t * lck)752 lck_spin_verify(lck_spin_t *lck)
753 {
754 if (lck->type != LCK_SPIN_TYPE ||
755 lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
756 __lck_spin_invalid_panic(lck);
757 }
758 }
759 #else /* DEVELOPMENT || DEBUG */
760 #define lck_spin_verify(lck) ((void)0)
761 #endif /* DEVELOPMENT || DEBUG */
762
763 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)764 lck_spin_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
765 {
766 lck_spin_t *lck;
767
768 lck = zalloc(KT_LCK_SPIN);
769 lck_spin_init(lck, grp, attr);
770 return lck;
771 }
772
773 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)774 lck_spin_free(lck_spin_t *lck, lck_grp_t *grp)
775 {
776 lck_spin_destroy(lck, grp);
777 zfree(KT_LCK_SPIN, lck);
778 }
779
780 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)781 lck_spin_init(lck_spin_t *lck, lck_grp_t *grp, __unused lck_attr_t *attr)
782 {
783 lck->type = LCK_SPIN_TYPE;
784 hw_lock_init(&lck->hwlock);
785 if (grp) {
786 lck_grp_reference(grp, &grp->lck_grp_spincnt);
787 }
788 }
789
790 /*
791 * arm_usimple_lock is a lck_spin_t without a group or attributes
792 */
793 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)794 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
795 {
796 lck->type = LCK_SPIN_TYPE;
797 hw_lock_init(&lck->hwlock);
798 }
799
800 void
lck_spin_assert(const lck_spin_t * lock,unsigned int type)801 lck_spin_assert(const lck_spin_t *lock, unsigned int type)
802 {
803 thread_t thread, holder;
804
805 if (lock->type != LCK_SPIN_TYPE) {
806 panic("Invalid spinlock %p", lock);
807 }
808
809 holder = HW_LOCK_STATE_TO_THREAD(lock->lck_spin_data);
810 thread = current_thread();
811 if (type == LCK_ASSERT_OWNED) {
812 if (holder == 0) {
813 panic("Lock not owned %p = %p", lock, holder);
814 }
815 if (holder != thread) {
816 panic("Lock not owned by current thread %p = %p", lock, holder);
817 }
818 } else if (type == LCK_ASSERT_NOTOWNED) {
819 if (holder != THREAD_NULL && holder == thread) {
820 panic("Lock owned by current thread %p = %p", lock, holder);
821 }
822 } else {
823 panic("lck_spin_assert(): invalid arg (%u)", type);
824 }
825 }
826
827 void
lck_spin_lock(lck_spin_t * lock)828 lck_spin_lock(lck_spin_t *lock)
829 {
830 lck_spin_verify(lock);
831 hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
832 }
833
834 void
lck_spin_lock_grp(lck_spin_t * lock,lck_grp_t * grp)835 lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
836 {
837 #pragma unused(grp)
838 lck_spin_verify(lock);
839 hw_lock_lock(&lock->hwlock, grp);
840 }
841
842 void
lck_spin_lock_nopreempt(lck_spin_t * lock)843 lck_spin_lock_nopreempt(lck_spin_t *lock)
844 {
845 lck_spin_verify(lock);
846 hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
847 }
848
849 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)850 lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
851 {
852 #pragma unused(grp)
853 lck_spin_verify(lock);
854 hw_lock_lock_nopreempt(&lock->hwlock, grp);
855 }
856
857 int
lck_spin_try_lock(lck_spin_t * lock)858 lck_spin_try_lock(lck_spin_t *lock)
859 {
860 lck_spin_verify(lock);
861 return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
862 }
863
864 int
lck_spin_try_lock_grp(lck_spin_t * lock,lck_grp_t * grp)865 lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
866 {
867 #pragma unused(grp)
868 lck_spin_verify(lock);
869 return hw_lock_try(&lock->hwlock, grp);
870 }
871
872 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)873 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
874 {
875 lck_spin_verify(lock);
876 return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
877 }
878
879 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)880 lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
881 {
882 #pragma unused(grp)
883 lck_spin_verify(lock);
884 return hw_lock_try_nopreempt(&lock->hwlock, grp);
885 }
886
887 void
lck_spin_unlock(lck_spin_t * lock)888 lck_spin_unlock(lck_spin_t *lock)
889 {
890 lck_spin_verify(lock);
891 hw_lock_unlock(&lock->hwlock);
892 }
893
894 void
lck_spin_unlock_nopreempt(lck_spin_t * lock)895 lck_spin_unlock_nopreempt(lck_spin_t *lock)
896 {
897 lck_spin_verify(lock);
898 hw_lock_unlock_nopreempt(&lock->hwlock);
899 }
900
901 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)902 lck_spin_destroy(lck_spin_t *lck, lck_grp_t *grp)
903 {
904 lck_spin_verify(lck);
905 *lck = (lck_spin_t){
906 .lck_spin_data = LCK_SPIN_TAG_DESTROYED,
907 .type = LCK_SPIN_TYPE_DESTROYED,
908 };
909 if (grp) {
910 lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
911 }
912 }
913
914 /*
915 * Routine: kdp_lck_spin_is_acquired
916 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
917 */
918 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)919 kdp_lck_spin_is_acquired(lck_spin_t *lck)
920 {
921 if (not_in_kdp) {
922 panic("panic: spinlock acquired check done outside of kernel debugger");
923 }
924 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
925 }
926
927 #endif /* !LCK_SPIN_IS_TICKET_LOCK */
928
929 /*
930 * Initialize a usimple_lock.
931 *
932 * No change in preemption state.
933 */
934 void
usimple_lock_init(usimple_lock_t l,unsigned short tag)935 usimple_lock_init(
936 usimple_lock_t l,
937 unsigned short tag)
938 {
939 simple_lock_init((simple_lock_t) l, tag);
940 }
941
942
943 /*
944 * Acquire a usimple_lock.
945 *
946 * Returns with preemption disabled. Note
947 * that the hw_lock routines are responsible for
948 * maintaining preemption state.
949 */
950 void
951 (usimple_lock)(
952 usimple_lock_t l
953 LCK_GRP_ARG(lck_grp_t *grp))
954 {
955 simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
956 }
957
958
959 /*
960 * Release a usimple_lock.
961 *
962 * Returns with preemption enabled. Note
963 * that the hw_lock routines are responsible for
964 * maintaining preemption state.
965 */
966 void
967 (usimple_unlock)(
968 usimple_lock_t l)
969 {
970 simple_unlock((simple_lock_t)l);
971 }
972
973
974 /*
975 * Conditionally acquire a usimple_lock.
976 *
977 * On success, returns with preemption disabled.
978 * On failure, returns with preemption in the same state
979 * as when first invoked. Note that the hw_lock routines
980 * are responsible for maintaining preemption state.
981 *
982 * XXX No stats are gathered on a miss; I preserved this
983 * behavior from the original assembly-language code, but
984 * doesn't it make sense to log misses? XXX
985 */
986 unsigned
987 int
988 (usimple_lock_try)(
989 usimple_lock_t l
990 LCK_GRP_ARG(lck_grp_t *grp))
991 {
992 return simple_lock_try((simple_lock_t) l, grp);
993 }
994