1 /*
2 * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
34 *
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
40 *
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44 *
45 * Carnegie Mellon requests users of this software to return to
46 *
47 * Software Distribution Coordinator or [email protected]
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
50 *
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54 /*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
62 #define LOCK_PRIVATE 1
63
64 #include <mach_ldebug.h>
65
66 #include <mach/machine/sdt.h>
67
68 #include <kern/locks_internal.h>
69 #include <kern/zalloc.h>
70 #include <kern/lock_stat.h>
71 #include <kern/locks.h>
72 #include <kern/misc_protos.h>
73 #include <kern/thread.h>
74 #include <kern/processor.h>
75 #include <kern/sched_hygiene.h>
76 #include <kern/sched_prim.h>
77 #include <kern/debug.h>
78 #include <kern/kcdata.h>
79 #include <kern/percpu.h>
80 #include <kern/hvg_hypercall.h>
81 #include <string.h>
82 #include <arm/cpu_internal.h>
83 #include <os/hash.h>
84 #include <arm/cpu_data.h>
85
86 #include <arm/cpu_data_internal.h>
87 #include <arm64/proc_reg.h>
88 #include <arm/smp.h>
89 #include <machine/atomic.h>
90 #include <machine/machine_cpu.h>
91
92 #include <pexpert/pexpert.h>
93
94 #include <sys/kdebug.h>
95
96 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
97
98 // Panic in tests that check lock usage correctness
99 // These are undesirable when in a panic or a debugger is runnning.
100 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
101
102 /* Forwards */
103
104 extern unsigned int not_in_kdp;
105
106 MACHINE_TIMEOUT(lock_panic_timeout, "lock-panic",
107 0xc00000 /* 12.5 m ticks ~= 524ms with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
108
109 #define NOINLINE __attribute__((noinline))
110
111 #define interrupts_disabled(mask) (mask & DAIF_IRQF)
112
113 KALLOC_TYPE_DEFINE(KT_LCK_SPIN, lck_spin_t, KT_PRIV_ACCT);
114
115 #pragma GCC visibility push(hidden)
116 /*
117 * atomic exchange API is a low level abstraction of the operations
118 * to atomically read, modify, and write a pointer. This abstraction works
119 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
120 * well as the ARM exclusive instructions.
121 *
122 * atomic_exchange_begin() - begin exchange and retrieve current value
123 * atomic_exchange_complete() - conclude an exchange
124 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
125 */
126 uint32_t
load_exclusive32(uint32_t * target,enum memory_order ord)127 load_exclusive32(uint32_t *target, enum memory_order ord)
128 {
129 uint32_t value;
130
131 if (_os_atomic_mo_has_acquire(ord)) {
132 value = __builtin_arm_ldaex(target); // ldaxr
133 } else {
134 value = __builtin_arm_ldrex(target); // ldxr
135 }
136
137 return value;
138 }
139
140 boolean_t
store_exclusive32(uint32_t * target,uint32_t value,enum memory_order ord)141 store_exclusive32(uint32_t *target, uint32_t value, enum memory_order ord)
142 {
143 boolean_t err;
144
145 if (_os_atomic_mo_has_release(ord)) {
146 err = __builtin_arm_stlex(value, target); // stlxr
147 } else {
148 err = __builtin_arm_strex(value, target); // stxr
149 }
150
151 return !err;
152 }
153
154 uint32_t
atomic_exchange_begin32(uint32_t * target,uint32_t * previous,enum memory_order ord)155 atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
156 {
157 uint32_t val;
158
159 #if !OS_ATOMIC_USE_LLSC
160 ord = memory_order_relaxed;
161 #endif
162 val = load_exclusive32(target, ord);
163 *previous = val;
164 return val;
165 }
166
167 boolean_t
atomic_exchange_complete32(uint32_t * target,uint32_t previous,uint32_t newval,enum memory_order ord)168 atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
169 {
170 #if !OS_ATOMIC_USE_LLSC
171 return __c11_atomic_compare_exchange_strong((_Atomic uint32_t *)target, &previous, newval, ord, memory_order_relaxed);
172 #else
173 (void)previous; // Previous not needed, monitor is held
174 return store_exclusive32(target, newval, ord);
175 #endif
176 }
177
178 void
atomic_exchange_abort(void)179 atomic_exchange_abort(void)
180 {
181 os_atomic_clear_exclusive();
182 }
183
184 boolean_t
atomic_test_and_set32(uint32_t * target,uint32_t test_mask,uint32_t set_mask,enum memory_order ord,boolean_t wait)185 atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
186 {
187 uint32_t value, prev;
188
189 for (;;) {
190 value = atomic_exchange_begin32(target, &prev, ord);
191 if (value & test_mask) {
192 if (wait) {
193 wait_for_event(); // Wait with monitor held
194 } else {
195 atomic_exchange_abort(); // Clear exclusive monitor
196 }
197 return FALSE;
198 }
199 value |= set_mask;
200 if (atomic_exchange_complete32(target, prev, value, ord)) {
201 return TRUE;
202 }
203 }
204 }
205
206 #pragma GCC visibility pop
207 #pragma mark preemption
208
209 /*
210 * This function checks whether an AST_URGENT has been pended.
211 *
212 * It is called once the preemption has been reenabled, which means the thread
213 * may have been preempted right before this was called, and when this function
214 * actually performs the check, we've changed CPU.
215 *
216 * This race is however benign: the point of AST_URGENT is to trigger a context
217 * switch, so if one happened, there's nothing left to check for, and AST_URGENT
218 * was cleared in the process.
219 *
220 * It follows that this check cannot have false negatives, which allows us
221 * to avoid fiddling with interrupt state for the vast majority of cases
222 * when the check will actually be negative.
223 */
224 static NOINLINE void
kernel_preempt_check(void)225 kernel_preempt_check(void)
226 {
227 uint64_t state;
228
229 /* If interrupts are masked, we can't take an AST here */
230 state = __builtin_arm_rsr64("DAIF");
231 if (state & DAIF_IRQF) {
232 return;
233 }
234
235 /* disable interrupts (IRQ FIQ ASYNCF) */
236 __builtin_arm_wsr64("DAIFSet", DAIFSC_STANDARD_DISABLE);
237
238 /*
239 * Reload cpu_pending_ast: a context switch would cause it to change.
240 * Now that interrupts are disabled, this will debounce false positives.
241 */
242 if (current_thread()->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
243 ast_taken_kernel();
244 }
245
246 /* restore the original interrupt mask */
247 __builtin_arm_wsr64("DAIF", state);
248 }
249
250 static inline void
_enable_preemption_write_count(thread_t thread,unsigned int count)251 _enable_preemption_write_count(thread_t thread, unsigned int count)
252 {
253 os_atomic_store(&thread->machine.preemption_count, count, compiler_acq_rel);
254
255 /*
256 * This check is racy and could load from another CPU's pending_ast mask,
257 * but as described above, this can't have false negatives.
258 */
259 if (count == 0) {
260 if (__improbable(thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT)) {
261 return kernel_preempt_check();
262 }
263 }
264 }
265
266 #if SCHED_HYGIENE_DEBUG
267
268 uint64_t _Atomic PERCPU_DATA_HACK_78750602(preemption_disable_max_mt);
269
270 MACHINE_TIMEOUT_DEV_WRITEABLE(sched_preemption_disable_threshold_mt, "sched-preemption", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, kprintf_spam_mt_pred);
271
272 TUNABLE_DT_WRITEABLE(sched_hygiene_mode_t, sched_preemption_disable_debug_mode,
273 "machine-timeouts",
274 "sched-preemption-disable-mode", /* DT property names have to be 31 chars max */
275 "sched_preemption_disable_debug_mode",
276 SCHED_HYGIENE_MODE_OFF,
277 TUNABLE_DT_CHECK_CHOSEN);
278
279 static uint32_t const sched_preemption_disable_debug_dbgid = MACHDBG_CODE(DBG_MACH_SCHED, MACH_PREEMPTION_EXPIRED) | DBG_FUNC_NONE;
280
281 NOINLINE void
_prepare_preemption_disable_measurement(void)282 _prepare_preemption_disable_measurement(void)
283 {
284 thread_t thread = current_thread();
285
286 if (thread->machine.inthandler_timestamp == 0) {
287 /*
288 * Only prepare a measurement if not currently in an interrupt
289 * handler.
290 *
291 * We are only interested in the net duration of disabled
292 * preemption, that is: The time in which preemption was
293 * disabled, minus the intervals in which any (likely
294 * unrelated) interrupts were handled.
295 * ml_adjust_preemption_disable_time() will remove those
296 * intervals, however we also do not even start measuring
297 * preemption disablement if we are already within handling of
298 * an interrupt when preemption was disabled (the resulting
299 * net time would be 0).
300 *
301 * Interrupt handling duration is handled separately, and any
302 * long intervals of preemption disablement are counted
303 * towards that.
304 */
305
306 bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
307 thread->machine.preemption_disable_abandon = false;
308 thread->machine.preemption_disable_mt = ml_get_sched_hygiene_timebase();
309 thread->machine.preemption_disable_adjust = 0;
310 thread->machine.preemption_count |= SCHED_HYGIENE_MARKER;
311 #if MONOTONIC
312 if (sched_hygiene_debug_pmc) {
313 mt_cur_cpu_cycles_instrs_speculative(&thread->machine.preemption_disable_cycles, &thread->machine.preemption_disable_instr);
314 }
315 #endif
316 ml_set_interrupts_enabled_with_debug(istate, false);
317 }
318 }
319
320 NOINLINE void
_collect_preemption_disable_measurement(void)321 _collect_preemption_disable_measurement(void)
322 {
323 bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
324 /*
325 * Collect start time and current time with interrupts disabled.
326 * Otherwise an interrupt coming in after grabbing the timestamp
327 * could spuriously inflate the measurement, because it will
328 * adjust preemption_disable_mt only after we already grabbed
329 * it.
330 *
331 * (Even worse if we collected the current time first: Then a
332 * subsequent interrupt could adjust preemption_disable_mt to
333 * make the duration go negative after subtracting the already
334 * grabbed time. With interrupts disabled we don't care much about
335 * the order.)
336 */
337
338 thread_t thread = current_thread();
339 uint64_t const mt = thread->machine.preemption_disable_mt;
340 uint64_t const adjust = thread->machine.preemption_disable_adjust;
341 uint64_t const now = ml_get_sched_hygiene_timebase();
342 thread->machine.preemption_disable_mt = 0;
343 thread->machine.preemption_disable_adjust = 0;
344 /* no need to clear SCHED_HYGIENE_MARKER, will be done on exit */
345
346 /*
347 * Don't need to reset (or even save) preemption_disable_abandon
348 * here: abandon_preemption_disable_measurement is a no-op anyway
349 * if preemption_disable_mt == 0 (which we just set), and it
350 * will stay that way until the next call to
351 * _collect_preemption_disable_measurement.
352 */
353
354 os_compiler_barrier(acq_rel);
355
356 ml_set_interrupts_enabled_with_debug(istate, false);
357
358 /*
359 * Fine to get with interrupts enabled:
360 * Above we set preemption_disable_mt to 0, which turns
361 * abandon_preemption_disable_measurement() into a no-op
362 * until the next collection starts.
363 */
364 if (thread->machine.preemption_disable_abandon) {
365 goto out;
366 }
367
368 int64_t const gross_duration = now - mt;
369 int64_t const net_duration = gross_duration - adjust;
370
371 uint64_t _Atomic * const max_duration = PERCPU_GET(preemption_disable_max_mt);
372
373 if (__improbable(net_duration > *max_duration)) {
374 os_atomic_store(max_duration, net_duration, relaxed);
375 }
376
377 uint64_t const threshold = os_atomic_load(&sched_preemption_disable_threshold_mt, relaxed);
378 if (__improbable(threshold > 0 && net_duration >= threshold)) {
379 uint64_t average_freq = 0;
380 uint64_t average_cpi_whole = 0;
381 uint64_t average_cpi_fractional = 0;
382
383 #if MONOTONIC
384 if (sched_hygiene_debug_pmc) {
385 uint64_t current_cycles = 0, current_instrs = 0;
386
387 /*
388 * We're getting these values a bit late, but getting them
389 * is a bit expensive, so we take the slight hit in
390 * accuracy for the reported values (which aren't very
391 * stable anyway).
392 */
393 istate = ml_set_interrupts_enabled_with_debug(false, false);
394 mt_cur_cpu_cycles_instrs_speculative(¤t_cycles, ¤t_instrs);
395 ml_set_interrupts_enabled_with_debug(istate, false);
396
397 uint64_t duration_ns;
398 absolutetime_to_nanoseconds(gross_duration, &duration_ns);
399
400 average_freq = (current_cycles - thread->machine.preemption_disable_cycles) / (duration_ns / 1000);
401 average_cpi_whole = (current_cycles - thread->machine.preemption_disable_cycles) / (current_instrs - thread->machine.preemption_disable_instr);
402 average_cpi_fractional =
403 (((current_cycles - thread->machine.preemption_disable_cycles) * 100) / (current_instrs - thread->machine.preemption_disable_instr)) % 100;
404 }
405 #endif
406
407 if (sched_preemption_disable_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
408 panic("preemption disable timeout exceeded: %llu >= %llu mt ticks (start: %llu, now: %llu, gross: %llu, inttime: %llu), "
409 "freq = %llu MHz, CPI = %llu.%llu",
410 net_duration, threshold, mt, now, gross_duration, adjust,
411 average_freq, average_cpi_whole, average_cpi_fractional);
412 }
413
414 DTRACE_SCHED4(mach_preemption_expired, uint64_t, net_duration, uint64_t, gross_duration,
415 uint64_t, average_cpi_whole, uint64_t, average_cpi_fractional);
416 if (__improbable(kdebug_debugid_enabled(sched_preemption_disable_debug_dbgid))) {
417 KDBG(sched_preemption_disable_debug_dbgid, net_duration, gross_duration, average_cpi_whole, average_cpi_fractional);
418 }
419 }
420
421 out:
422 /*
423 * the preemption count is SCHED_HYGIENE_MARKER, we need to clear it.
424 */
425 _enable_preemption_write_count(thread, 0);
426 }
427
428 /*
429 * Abandon a potential preemption disable measurement. Useful for
430 * example for the idle thread, which would just spuriously
431 * trigger the threshold while actually idling, which we don't
432 * care about.
433 */
434 void
abandon_preemption_disable_measurement(void)435 abandon_preemption_disable_measurement(void)
436 {
437 thread_t t = current_thread();
438 bool istate = ml_set_interrupts_enabled_with_debug(false, false); // don't take int masked timestamp
439
440 if (t->machine.preemption_disable_mt != 0) {
441 t->machine.preemption_disable_abandon = true;
442 }
443 ml_set_interrupts_enabled_with_debug(istate, false);
444 }
445
446 /*
447 * Skip predicate for sched_preemption_disable, which would trigger
448 * spuriously when kprintf spam is enabled.
449 */
450 bool
kprintf_spam_mt_pred(struct machine_timeout_spec const __unused * spec)451 kprintf_spam_mt_pred(struct machine_timeout_spec const __unused *spec)
452 {
453 bool const kprintf_spam_enabled = !(disable_kprintf_output || disable_serial_output);
454 return kprintf_spam_enabled;
455 }
456
457 /*
458 * Abandon function exported for AppleCLPC, as a workaround to rdar://91668370.
459 *
460 * Only for AppleCLPC!
461 */
462 void
sched_perfcontrol_abandon_preemption_disable_measurement(void)463 sched_perfcontrol_abandon_preemption_disable_measurement(void)
464 {
465 abandon_preemption_disable_measurement();
466 }
467
468 #else /* SCHED_HYGIENE_DEBUG */
469 void
sched_perfcontrol_abandon_preemption_disable_measurement(void)470 sched_perfcontrol_abandon_preemption_disable_measurement(void)
471 {
472 // No-op. Function is exported, so needs to be defined
473 }
474 #endif /* SCHED_HYGIENE_DEBUG */
475
476 /*
477 * This function is written in a way that the codegen is extremely short.
478 *
479 * LTO isn't smart enough to inline it, yet it is profitable because
480 * the vast majority of callers use current_thread() already.
481 *
482 * TODO: It is unfortunate that we have to load
483 * sched_preemption_disable_debug_mode
484 *
485 * /!\ Breaking inlining causes zalloc to be roughly 10% slower /!\
486 */
487 __attribute__((always_inline))
488 void
_disable_preemption(void)489 _disable_preemption(void)
490 {
491 thread_t thread = current_thread();
492 unsigned int count = thread->machine.preemption_count;
493
494 os_atomic_store(&thread->machine.preemption_count,
495 count + 1, compiler_acq_rel);
496
497 #if SCHED_HYGIENE_DEBUG
498 /*
499 * Note that this is not the only place preemption gets disabled,
500 * it also gets modified on ISR and PPL entry/exit. Both of those
501 * events will be treated specially however, and
502 * increment/decrement being paired around their entry/exit means
503 * that collection here is not desynced otherwise.
504 */
505
506 if (__improbable(count == 0 && sched_preemption_disable_debug_mode)) {
507 __attribute__((musttail))
508 return _prepare_preemption_disable_measurement();
509 }
510 #endif /* SCHED_HYGIENE_DEBUG */
511 }
512
513
514 /*
515 * This variant of disable_preemption() allows disabling preemption
516 * without taking measurements (and later potentially triggering
517 * actions on those).
518 */
519 __attribute__((always_inline))
520 void
_disable_preemption_without_measurements(void)521 _disable_preemption_without_measurements(void)
522 {
523 thread_t thread = current_thread();
524 unsigned int count = thread->machine.preemption_count;
525
526 #if SCHED_HYGIENE_DEBUG
527 /*
528 * Inform _collect_preemption_disable_measurement()
529 * that we didn't really care.
530 */
531 thread->machine.preemption_disable_abandon = true;
532 #endif
533
534 os_atomic_store(&thread->machine.preemption_count,
535 count + 1, compiler_acq_rel);
536 }
537
538 /*
539 * To help _enable_preemption() inline everywhere with LTO,
540 * we keep these nice non inlineable functions as the panic()
541 * codegen setup is quite large and for weird reasons causes a frame.
542 */
543 __abortlike
544 static void
_enable_preemption_underflow(void)545 _enable_preemption_underflow(void)
546 {
547 panic("Preemption count underflow");
548 }
549
550 /*
551 * This function is written in a way that the codegen is extremely short.
552 *
553 * LTO isn't smart enough to inline it, yet it is profitable because
554 * the vast majority of callers use current_thread() already.
555 *
556 * The SCHED_HYGIENE_MARKER trick is used so that we do not have to load
557 * unrelated fields of current_thread().
558 *
559 * /!\ Breaking inlining causes zalloc to be roughly 10% slower /!\
560 */
561 __attribute__((always_inline))
562 void
_enable_preemption(void)563 _enable_preemption(void)
564 {
565 thread_t thread = current_thread();
566 unsigned int count = thread->machine.preemption_count;
567
568 if (__improbable(count == 0)) {
569 _enable_preemption_underflow();
570 }
571
572 #if SCHED_HYGIENE_DEBUG
573 if (__improbable(count == SCHED_HYGIENE_MARKER + 1)) {
574 return _collect_preemption_disable_measurement();
575 }
576 #endif /* SCHED_HYGIENE_DEBUG */
577
578 _enable_preemption_write_count(thread, count - 1);
579 }
580
581 __attribute__((always_inline))
582 unsigned int
get_preemption_level_for_thread(thread_t thread)583 get_preemption_level_for_thread(thread_t thread)
584 {
585 unsigned int count = thread->machine.preemption_count;
586
587 #if SCHED_HYGIENE_DEBUG
588 /*
589 * hide this "flag" from callers,
590 * and it would make the count look negative anyway
591 * which some people dislike
592 */
593 count &= ~SCHED_HYGIENE_MARKER;
594 #endif
595 return (int)count;
596 }
597
598 __attribute__((always_inline))
599 int
get_preemption_level(void)600 get_preemption_level(void)
601 {
602 return get_preemption_level_for_thread(current_thread());
603 }
604
605 #if CONFIG_PV_TICKET
606 __startup_func
607 void
lck_init_pv(void)608 lck_init_pv(void)
609 {
610 uint32_t pvtck = 1;
611 PE_parse_boot_argn("pvticket", &pvtck, sizeof(pvtck));
612 if (pvtck == 0) {
613 return;
614 }
615 has_lock_pv = hvg_is_hcall_available(HVG_HCALL_VCPU_WFK) &&
616 hvg_is_hcall_available(HVG_HCALL_VCPU_KICK);
617 }
618 STARTUP(LOCKS, STARTUP_RANK_FIRST, lck_init_pv);
619 #endif
620
621
622 #pragma mark lck_spin_t
623 #if LCK_SPIN_IS_TICKET_LOCK
624
625 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)626 lck_spin_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
627 {
628 lck_spin_t *lck;
629
630 lck = zalloc(KT_LCK_SPIN);
631 lck_spin_init(lck, grp, attr);
632 return lck;
633 }
634
635 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)636 lck_spin_free(lck_spin_t *lck, lck_grp_t *grp)
637 {
638 lck_spin_destroy(lck, grp);
639 zfree(KT_LCK_SPIN, lck);
640 }
641
642 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)643 lck_spin_init(lck_spin_t *lck, lck_grp_t *grp, __unused lck_attr_t *attr)
644 {
645 lck_ticket_init(lck, grp);
646 }
647
648 /*
649 * arm_usimple_lock is a lck_spin_t without a group or attributes
650 */
651 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)652 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
653 {
654 lck_ticket_init((lck_ticket_t *)lck, LCK_GRP_NULL);
655 }
656
657 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)658 lck_spin_assert(lck_spin_t *lock, unsigned int type)
659 {
660 if (type == LCK_ASSERT_OWNED) {
661 lck_ticket_assert_owned(lock);
662 } else if (type == LCK_ASSERT_NOTOWNED) {
663 lck_ticket_assert_not_owned(lock);
664 } else {
665 panic("lck_spin_assert(): invalid arg (%u)", type);
666 }
667 }
668
669 void
lck_spin_lock(lck_spin_t * lock)670 lck_spin_lock(lck_spin_t *lock)
671 {
672 lck_ticket_lock(lock, LCK_GRP_NULL);
673 }
674
675 void
lck_spin_lock_nopreempt(lck_spin_t * lock)676 lck_spin_lock_nopreempt(lck_spin_t *lock)
677 {
678 lck_ticket_lock_nopreempt(lock, LCK_GRP_NULL);
679 }
680
681 int
lck_spin_try_lock(lck_spin_t * lock)682 lck_spin_try_lock(lck_spin_t *lock)
683 {
684 return lck_ticket_lock_try(lock, LCK_GRP_NULL);
685 }
686
687 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)688 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
689 {
690 return lck_ticket_lock_try_nopreempt(lock, LCK_GRP_NULL);
691 }
692
693 void
lck_spin_unlock(lck_spin_t * lock)694 lck_spin_unlock(lck_spin_t *lock)
695 {
696 lck_ticket_unlock(lock);
697 }
698
699 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)700 lck_spin_destroy(lck_spin_t *lck, lck_grp_t *grp)
701 {
702 lck_ticket_destroy(lck, grp);
703 }
704
705 /*
706 * those really should be in an alias file instead,
707 * but you can't make that conditional.
708 *
709 * it will be good enough for perf evals for now
710 *
711 * we also can't make aliases for symbols that
712 * are in alias files like lck_spin_init and friends,
713 * so this suffers double jump penalties for kexts
714 * (LTO does the right thing for XNU).
715 */
716 #define make_alias(a, b) asm(".globl _" #a "\n" ".set _" #a ", _" #b "\n")
717 make_alias(lck_spin_lock_grp, lck_ticket_lock);
718 make_alias(lck_spin_lock_nopreempt_grp, lck_ticket_lock_nopreempt);
719 make_alias(lck_spin_try_lock_grp, lck_ticket_lock_try);
720 make_alias(lck_spin_try_lock_nopreempt_grp, lck_ticket_lock_try_nopreempt);
721 make_alias(lck_spin_unlock_nopreempt, lck_ticket_unlock_nopreempt);
722 make_alias(kdp_lck_spin_is_acquired, kdp_lck_ticket_is_acquired);
723 #undef make_alias
724
725 #else /* !LCK_SPIN_IS_TICKET_LOCK */
726
727 #if DEVELOPMENT || DEBUG
728 __abortlike
729 static void
__lck_spin_invalid_panic(lck_spin_t * lck)730 __lck_spin_invalid_panic(lck_spin_t *lck)
731 {
732 const char *how = "Invalid";
733
734 if (lck->type == LCK_SPIN_TYPE_DESTROYED ||
735 lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
736 how = "Destroyed";
737 }
738
739 panic("%s spinlock %p: <0x%016lx 0x%16lx>",
740 how, lck, lck->lck_spin_data, lck->type);
741 }
742
743 static inline void
lck_spin_verify(lck_spin_t * lck)744 lck_spin_verify(lck_spin_t *lck)
745 {
746 if (lck->type != LCK_SPIN_TYPE ||
747 lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) {
748 __lck_spin_invalid_panic(lck);
749 }
750 }
751 #else /* DEVELOPMENT || DEBUG */
752 #define lck_spin_verify(lck) ((void)0)
753 #endif /* DEVELOPMENT || DEBUG */
754
755 lck_spin_t *
lck_spin_alloc_init(lck_grp_t * grp,lck_attr_t * attr)756 lck_spin_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
757 {
758 lck_spin_t *lck;
759
760 lck = zalloc(KT_LCK_SPIN);
761 lck_spin_init(lck, grp, attr);
762 return lck;
763 }
764
765 void
lck_spin_free(lck_spin_t * lck,lck_grp_t * grp)766 lck_spin_free(lck_spin_t *lck, lck_grp_t *grp)
767 {
768 lck_spin_destroy(lck, grp);
769 zfree(KT_LCK_SPIN, lck);
770 }
771
772 void
lck_spin_init(lck_spin_t * lck,lck_grp_t * grp,__unused lck_attr_t * attr)773 lck_spin_init(lck_spin_t *lck, lck_grp_t *grp, __unused lck_attr_t *attr)
774 {
775 lck->type = LCK_SPIN_TYPE;
776 hw_lock_init(&lck->hwlock);
777 if (grp) {
778 lck_grp_reference(grp, &grp->lck_grp_spincnt);
779 }
780 }
781
782 /*
783 * arm_usimple_lock is a lck_spin_t without a group or attributes
784 */
785 MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck,__unused unsigned short initial_value)786 arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
787 {
788 lck->type = LCK_SPIN_TYPE;
789 hw_lock_init(&lck->hwlock);
790 }
791
792 void
lck_spin_assert(lck_spin_t * lock,unsigned int type)793 lck_spin_assert(lck_spin_t *lock, unsigned int type)
794 {
795 thread_t thread, holder;
796
797 if (lock->type != LCK_SPIN_TYPE) {
798 panic("Invalid spinlock %p", lock);
799 }
800
801 holder = HW_LOCK_STATE_TO_THREAD(lock->lck_spin_data);
802 thread = current_thread();
803 if (type == LCK_ASSERT_OWNED) {
804 if (holder == 0) {
805 panic("Lock not owned %p = %p", lock, holder);
806 }
807 if (holder != thread) {
808 panic("Lock not owned by current thread %p = %p", lock, holder);
809 }
810 } else if (type == LCK_ASSERT_NOTOWNED) {
811 if (holder != THREAD_NULL && holder == thread) {
812 panic("Lock owned by current thread %p = %p", lock, holder);
813 }
814 } else {
815 panic("lck_spin_assert(): invalid arg (%u)", type);
816 }
817 }
818
819 void
lck_spin_lock(lck_spin_t * lock)820 lck_spin_lock(lck_spin_t *lock)
821 {
822 lck_spin_verify(lock);
823 hw_lock_lock(&lock->hwlock, LCK_GRP_NULL);
824 }
825
826 void
lck_spin_lock_grp(lck_spin_t * lock,lck_grp_t * grp)827 lck_spin_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
828 {
829 #pragma unused(grp)
830 lck_spin_verify(lock);
831 hw_lock_lock(&lock->hwlock, grp);
832 }
833
834 void
lck_spin_lock_nopreempt(lck_spin_t * lock)835 lck_spin_lock_nopreempt(lck_spin_t *lock)
836 {
837 lck_spin_verify(lock);
838 hw_lock_lock_nopreempt(&lock->hwlock, LCK_GRP_NULL);
839 }
840
841 void
lck_spin_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)842 lck_spin_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
843 {
844 #pragma unused(grp)
845 lck_spin_verify(lock);
846 hw_lock_lock_nopreempt(&lock->hwlock, grp);
847 }
848
849 int
lck_spin_try_lock(lck_spin_t * lock)850 lck_spin_try_lock(lck_spin_t *lock)
851 {
852 lck_spin_verify(lock);
853 return hw_lock_try(&lock->hwlock, LCK_GRP_NULL);
854 }
855
856 int
lck_spin_try_lock_grp(lck_spin_t * lock,lck_grp_t * grp)857 lck_spin_try_lock_grp(lck_spin_t *lock, lck_grp_t *grp)
858 {
859 #pragma unused(grp)
860 lck_spin_verify(lock);
861 return hw_lock_try(&lock->hwlock, grp);
862 }
863
864 int
lck_spin_try_lock_nopreempt(lck_spin_t * lock)865 lck_spin_try_lock_nopreempt(lck_spin_t *lock)
866 {
867 lck_spin_verify(lock);
868 return hw_lock_try_nopreempt(&lock->hwlock, LCK_GRP_NULL);
869 }
870
871 int
lck_spin_try_lock_nopreempt_grp(lck_spin_t * lock,lck_grp_t * grp)872 lck_spin_try_lock_nopreempt_grp(lck_spin_t *lock, lck_grp_t *grp)
873 {
874 #pragma unused(grp)
875 lck_spin_verify(lock);
876 return hw_lock_try_nopreempt(&lock->hwlock, grp);
877 }
878
879 void
lck_spin_unlock(lck_spin_t * lock)880 lck_spin_unlock(lck_spin_t *lock)
881 {
882 lck_spin_verify(lock);
883 hw_lock_unlock(&lock->hwlock);
884 }
885
886 void
lck_spin_unlock_nopreempt(lck_spin_t * lock)887 lck_spin_unlock_nopreempt(lck_spin_t *lock)
888 {
889 lck_spin_verify(lock);
890 hw_lock_unlock_nopreempt(&lock->hwlock);
891 }
892
893 void
lck_spin_destroy(lck_spin_t * lck,lck_grp_t * grp)894 lck_spin_destroy(lck_spin_t *lck, lck_grp_t *grp)
895 {
896 lck_spin_verify(lck);
897 *lck = (lck_spin_t){
898 .lck_spin_data = LCK_SPIN_TAG_DESTROYED,
899 .type = LCK_SPIN_TYPE_DESTROYED,
900 };
901 if (grp) {
902 lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
903 }
904 }
905
906 /*
907 * Routine: kdp_lck_spin_is_acquired
908 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
909 */
910 boolean_t
kdp_lck_spin_is_acquired(lck_spin_t * lck)911 kdp_lck_spin_is_acquired(lck_spin_t *lck)
912 {
913 if (not_in_kdp) {
914 panic("panic: spinlock acquired check done outside of kernel debugger");
915 }
916 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
917 }
918
919 #endif /* !LCK_SPIN_IS_TICKET_LOCK */
920
921 /*
922 * Initialize a usimple_lock.
923 *
924 * No change in preemption state.
925 */
926 void
usimple_lock_init(usimple_lock_t l,unsigned short tag)927 usimple_lock_init(
928 usimple_lock_t l,
929 unsigned short tag)
930 {
931 simple_lock_init((simple_lock_t) l, tag);
932 }
933
934
935 /*
936 * Acquire a usimple_lock.
937 *
938 * Returns with preemption disabled. Note
939 * that the hw_lock routines are responsible for
940 * maintaining preemption state.
941 */
942 void
943 (usimple_lock)(
944 usimple_lock_t l
945 LCK_GRP_ARG(lck_grp_t *grp))
946 {
947 simple_lock((simple_lock_t) l, LCK_GRP_PROBEARG(grp));
948 }
949
950
951 /*
952 * Release a usimple_lock.
953 *
954 * Returns with preemption enabled. Note
955 * that the hw_lock routines are responsible for
956 * maintaining preemption state.
957 */
958 void
959 (usimple_unlock)(
960 usimple_lock_t l)
961 {
962 simple_unlock((simple_lock_t)l);
963 }
964
965
966 /*
967 * Conditionally acquire a usimple_lock.
968 *
969 * On success, returns with preemption disabled.
970 * On failure, returns with preemption in the same state
971 * as when first invoked. Note that the hw_lock routines
972 * are responsible for maintaining preemption state.
973 *
974 * XXX No stats are gathered on a miss; I preserved this
975 * behavior from the original assembly-language code, but
976 * doesn't it make sense to log misses? XXX
977 */
978 unsigned
979 int
980 (usimple_lock_try)(
981 usimple_lock_t l
982 LCK_GRP_ARG(lck_grp_t *grp))
983 {
984 return simple_lock_try((simple_lock_t) l, grp);
985 }
986