1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #define LOCK_PRIVATE 1
58
59 #include <mach_ldebug.h>
60 #include <debug.h>
61
62 #include <mach/kern_return.h>
63
64 #include <kern/lock_stat.h>
65 #include <kern/locks.h>
66 #include <kern/misc_protos.h>
67 #include <kern/zalloc.h>
68 #include <kern/thread.h>
69 #include <kern/processor.h>
70 #include <kern/sched_prim.h>
71 #include <kern/debug.h>
72 #include <libkern/section_keywords.h>
73 #include <machine/atomic.h>
74 #include <machine/machine_cpu.h>
75 #include <string.h>
76 #include <vm/pmap.h>
77
78 #include <sys/kdebug.h>
79
80 #define LCK_MTX_SLEEP_CODE 0
81 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
82 #define LCK_MTX_LCK_WAIT_CODE 2
83 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
84
85 #if MACH_LDEBUG
86 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
87 #else
88 #define ALIGN_TEST(p, t) do{}while(0)
89 #endif
90
91 #define NOINLINE __attribute__((noinline))
92
93 #define ordered_load_hw(lock) os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
94 #define ordered_store_hw(lock, value) os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
95
96 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
97
98 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
99 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
100
101 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
102
103 #if DEBUG
104 TUNABLE(uint32_t, LcksOpts, "lcks", enaLkDeb);
105 #else
106 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
107 #endif
108
109 #if CONFIG_DTRACE
110 #if defined (__x86_64__)
111 uint32_t _Atomic dtrace_spin_threshold = 500; // 500ns
112 #define lock_enable_preemption enable_preemption
113 #elif defined(__arm__) || defined(__arm64__)
114 MACHINE_TIMEOUT32(dtrace_spin_threshold, "dtrace-spin-threshold",
115 0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
116 #endif
117 #endif
118
119 uintptr_t
unslide_for_kdebug(void * object)120 unslide_for_kdebug(void* object)
121 {
122 if (__improbable(kdebug_enable)) {
123 return VM_KERNEL_UNSLIDE_OR_PERM(object);
124 } else {
125 return 0;
126 }
127 }
128
129 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)130 __lck_require_preemption_disabled_panic(void *lock)
131 {
132 panic("Attempt to take no-preempt lock %p in preemptible context", lock);
133 }
134
135 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)136 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
137 {
138 if (__improbable(!lock_preemption_disabled_for_thread(self))) {
139 __lck_require_preemption_disabled_panic(lock);
140 }
141 }
142
143 /*
144 * Routine: hw_lock_init
145 *
146 * Initialize a hardware lock.
147 */
148 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)149 hw_lock_init(hw_lock_t lock)
150 {
151 ordered_store_hw(lock, 0);
152 }
153
154 __result_use_check
155 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)156 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
157 {
158 #if OS_ATOMIC_USE_LLSC
159 uintptr_t oldval;
160 os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
161 if (oldval != 0) {
162 wait_for_event(); // clears the monitor so we don't need give_up()
163 return false;
164 }
165 });
166 return true;
167 #else // !OS_ATOMIC_USE_LLSC
168 #if OS_ATOMIC_HAS_LLSC
169 uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
170 if (oldval != 0) {
171 wait_for_event(); // clears the monitor so we don't need give_up()
172 return false;
173 }
174 #elif LOCK_PRETEST
175 if (ordered_load_hw(lock) != 0) {
176 return false;
177 }
178 #endif
179 return os_atomic_cmpxchg(&lock->lock_data, 0, newval, acquire);
180 #endif // !OS_ATOMIC_USE_LLSC
181 }
182
183 /*
184 * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
185 */
186 __attribute__((always_inline))
187 uint64_t
188 #if INTERRUPT_MASKED_DEBUG
hw_lock_compute_timeout(uint64_t in_timeout,uint64_t default_timeout,__unused bool in_ppl,__unused bool interruptible)189 hw_lock_compute_timeout(uint64_t in_timeout, uint64_t default_timeout, __unused bool in_ppl, __unused bool interruptible)
190 #else
191 hw_lock_compute_timeout(uint64_t in_timeout, uint64_t default_timeout)
192 #endif /* INTERRUPT_MASKED_DEBUG */
193 {
194 uint64_t timeout = in_timeout;
195 if (timeout == 0) {
196 timeout = default_timeout;
197 #if INTERRUPT_MASKED_DEBUG
198 #ifndef KASAN
199 if (timeout > 0 && !in_ppl) {
200 if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC && !interruptible) {
201 uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
202 if (int_timeout < timeout) {
203 timeout = int_timeout;
204 }
205 }
206 }
207 #endif /* !KASAN */
208 #endif /* INTERRUPT_MASKED_DEBUG */
209 }
210
211 return timeout;
212 }
213
214 __attribute__((always_inline))
215 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)216 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
217 {
218 #if DEBUG || DEVELOPMENT
219 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
220 #else
221 (void)owner;
222 #endif
223 }
224
225 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)226 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
227 {
228 lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
229
230 /* strip possible bits used by the lock implementations */
231 owner &= ~0x7ul;
232
233 lsti->lock = lck;
234 lsti->owner_thread_cur = owner;
235 lsti->owner_cpu = ~0u;
236 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
237
238 if (owner == 0) {
239 /* if the owner isn't known, just bail */
240 goto out;
241 }
242
243 for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
244 cpu_data_t *data = cpu_datap(i);
245 if (data && (uintptr_t)data->cpu_active_thread == owner) {
246 lsti->owner_cpu = i;
247 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
248 #if __x86_64__
249 if ((uint32_t)cpu_number() != i) {
250 /* Cause NMI and panic on the owner's cpu */
251 NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
252 }
253 #endif
254 break;
255 }
256 }
257
258 out:
259 return lsti;
260 }
261
262 /*
263 * Routine: hw_lock_trylock_mask_allow_invalid
264 *
265 * Tries to acquire a lock of possibly even unmapped memory.
266 * It assumes a valid lock MUST have another bit set (different from
267 * the one being set to lock).
268 */
269 __result_use_check
270 extern hw_lock_status_t
271 hw_lock_trylock_mask_allow_invalid(uint32_t *lock, uint32_t mask);
272
273 __result_use_check
274 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)275 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
276 {
277 uint32_t mask = 1u << bit;
278
279 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
280 uint32_t oldval, newval;
281 os_atomic_rmw_loop(target, oldval, newval, acquire, {
282 newval = oldval | mask;
283 if (__improbable(oldval & mask)) {
284 #if OS_ATOMIC_HAS_LLSC
285 if (wait) {
286 wait_for_event(); // clears the monitor so we don't need give_up()
287 } else {
288 os_atomic_clear_exclusive();
289 }
290 #else
291 if (wait) {
292 cpu_pause();
293 }
294 #endif
295 return false;
296 }
297 });
298 return true;
299 #else
300 uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
301 if (__improbable(oldval & mask)) {
302 if (wait) {
303 wait_for_event(); // clears the monitor so we don't need give_up()
304 } else {
305 os_atomic_clear_exclusive();
306 }
307 return false;
308 }
309 return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
310 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
311 }
312
313 static hw_lock_timeout_status_t
hw_lock_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)314 hw_lock_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
315 {
316 #pragma unused(interrupt_time)
317
318 hw_lock_t lock = _lock;
319 uintptr_t owner = lock->lock_data & ~0x7ul;
320 lck_spinlock_to_info_t lsti;
321
322 if (!spinlock_timeout_panic) {
323 /* keep spinning rather than panicing */
324 return HW_LOCK_TIMEOUT_CONTINUE;
325 }
326
327 if (pmap_in_ppl()) {
328 /*
329 * This code is used by the PPL and can't write to globals.
330 */
331 panic("Spinlock[%p] timeout after %llu ticks; "
332 "current owner: %p, "
333 "start time: %llu, now: %llu, timeout: %llu",
334 lock, now - start, (void *)owner,
335 start, now, timeout);
336 }
337
338 // Capture the actual time spent blocked, which may be higher than the timeout
339 // if a misbehaving interrupt stole this thread's CPU time.
340 lsti = lck_spinlock_timeout_hit(lock, owner);
341 panic("Spinlock[%p] timeout after %llu ticks; "
342 "current owner: %p (on cpu %d), "
343 #if DEBUG || DEVELOPMENT
344 "initial owner: %p, "
345 #endif /* DEBUG || DEVELOPMENT */
346 #if INTERRUPT_MASKED_DEBUG
347 "interrupt time: %llu, "
348 #endif /* INTERRUPT_MASKED_DEBUG */
349 "start time: %llu, now: %llu, timeout: %llu",
350 lock, now - start,
351 (void *)lsti->owner_thread_cur, lsti->owner_cpu,
352 #if DEBUG || DEVELOPMENT
353 (void *)lsti->owner_thread_orig,
354 #endif /* DEBUG || DEVELOPMENT */
355 #if INTERRUPT_MASKED_DEBUG
356 interrupt_time,
357 #endif /* INTERRUPT_MASKED_DEBUG */
358 start, now, timeout);
359 }
360
361 static hw_lock_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)362 hw_lock_bit_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
363 {
364 #pragma unused(interrupt_time)
365
366 hw_lock_t lock = _lock;
367 uintptr_t state = lock->lock_data;
368
369 if (!spinlock_timeout_panic) {
370 /* keep spinning rather than panicing */
371 return HW_LOCK_TIMEOUT_CONTINUE;
372 }
373
374 panic("Spinlock[%p] timeout after %llu ticks; "
375 "current state: %p, "
376 #if INTERRUPT_MASKED_DEBUG
377 "interrupt time: %llu, "
378 #endif /* INTERRUPT_MASKED_DEBUG */
379 "start time: %llu, now: %llu, timeout: %llu",
380 lock, now - start, (void*) state,
381 #if INTERRUPT_MASKED_DEBUG
382 interrupt_time,
383 #endif /* INTERRUPT_MASKED_DEBUG */
384 start, now, timeout);
385 }
386
387 /*
388 * Routine: hw_lock_lock_contended
389 *
390 * Spin until lock is acquired or timeout expires.
391 * timeout is in mach_absolute_time ticks. Called with
392 * preemption disabled.
393 */
394 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,thread_t thread,uintptr_t data,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))395 hw_lock_lock_contended(hw_lock_t lock, thread_t thread, uintptr_t data, uint64_t timeout,
396 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
397 {
398 #pragma unused(thread)
399
400 uint64_t end = 0, start = 0, interrupts = 0;
401 uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
402 bool has_timeout = true, in_ppl = pmap_in_ppl();
403 #if INTERRUPT_MASKED_DEBUG
404 /* Note we can't check if we are interruptible if in ppl */
405 bool interruptible = !in_ppl && ml_get_interrupts_enabled();
406 uint64_t start_interrupts = 0;
407 #endif /* INTERRUPT_MASKED_DEBUG */
408
409 #if CONFIG_DTRACE || LOCK_STATS
410 uint64_t begin = 0;
411 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
412
413 if (__improbable(stat_enabled)) {
414 begin = mach_absolute_time();
415 }
416 #endif /* CONFIG_DTRACE || LOCK_STATS */
417
418 if (!in_ppl) {
419 /*
420 * This code is used by the PPL and can't write to globals.
421 */
422 lck_spinlock_timeout_set_orig_owner(lock->lock_data);
423 }
424
425 #if INTERRUPT_MASKED_DEBUG
426 timeout = hw_lock_compute_timeout(timeout, default_timeout, in_ppl, interruptible);
427 #else
428 timeout = hw_lock_compute_timeout(timeout, default_timeout);
429 #endif /* INTERRUPT_MASKED_DEBUG */
430 if (timeout == 0) {
431 has_timeout = false;
432 }
433
434 for (;;) {
435 for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
436 cpu_pause();
437 if (hw_lock_trylock_contended(lock, data)) {
438 #if CONFIG_DTRACE || LOCK_STATS
439 if (__improbable(stat_enabled)) {
440 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
441 mach_absolute_time() - begin);
442 }
443 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
444 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
445 #endif /* CONFIG_DTRACE || LOCK_STATS */
446 return HW_LOCK_ACQUIRED;
447 }
448 }
449 if (has_timeout) {
450 uint64_t now = ml_get_timebase();
451 if (end == 0) {
452 #if INTERRUPT_MASKED_DEBUG
453 if (interruptible) {
454 start_interrupts = thread->machine.int_time_mt;
455 }
456 #endif /* INTERRUPT_MASKED_DEBUG */
457 start = now;
458 end = now + timeout;
459 } else if (now < end) {
460 /* keep spinning */
461 } else {
462 #if INTERRUPT_MASKED_DEBUG
463 if (interruptible) {
464 interrupts = thread->machine.int_time_mt - start_interrupts;
465 }
466 #endif /* INTERRUPT_MASKED_DEBUG */
467 if (handler(lock, timeout, start, now, interrupts)) {
468 /* push the deadline */
469 end += timeout;
470 } else {
471 #if CONFIG_DTRACE || LOCK_STATS
472 if (__improbable(stat_enabled)) {
473 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
474 mach_absolute_time() - begin);
475 }
476 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
477 #endif /* CONFIG_DTRACE || LOCK_STATS */
478 return HW_LOCK_CONTENDED;
479 }
480 }
481 }
482 }
483 }
484
485 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)486 hw_wait_while_equals32(uint32_t *address, uint32_t current)
487 {
488 uint32_t v;
489 uint64_t end = 0, timeout = 0;
490 uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
491 bool has_timeout = true;
492 #if INTERRUPT_MASKED_DEBUG
493 thread_t thread = current_thread();
494 bool in_ppl = pmap_in_ppl();
495 /* Note we can't check if we are interruptible if in ppl */
496 bool interruptible = !in_ppl && ml_get_interrupts_enabled();
497 uint64_t interrupts = 0, start_interrupts = 0;
498
499 timeout = hw_lock_compute_timeout(0, default_timeout, in_ppl, interruptible);
500 #else
501 timeout = hw_lock_compute_timeout(0, default_timeout);
502 #endif /* INTERRUPT_MASKED_DEBUG */
503 if (timeout == 0) {
504 has_timeout = false;
505 }
506
507 for (;;) {
508 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
509 cpu_pause();
510 #if OS_ATOMIC_HAS_LLSC
511 v = os_atomic_load_exclusive(address, relaxed);
512 if (__probable(v != current)) {
513 os_atomic_clear_exclusive();
514 return v;
515 }
516 wait_for_event();
517 #else
518 v = os_atomic_load(address, relaxed);
519 if (__probable(v != current)) {
520 return v;
521 }
522 #endif // OS_ATOMIC_HAS_LLSC
523 }
524 if (has_timeout) {
525 if (end == 0) {
526 end = ml_get_timebase() + timeout;
527 #if INTERRUPT_MASKED_DEBUG
528 if (interruptible) {
529 start_interrupts = thread->machine.int_time_mt;
530 }
531 #endif /* INTERRUPT_MASKED_DEBUG */
532 } else if (ml_get_timebase() >= end) {
533 #if INTERRUPT_MASKED_DEBUG
534 if (interruptible) {
535 interrupts = thread->machine.int_time_mt - start_interrupts;
536 panic("Wait while equals timeout @ *%p == 0x%x, "
537 "interrupt_time %llu", address, v, interrupts);
538 }
539 #endif /* INTERRUPT_MASKED_DEBUG */
540 panic("Wait while equals timeout @ *%p == 0x%x",
541 address, v);
542 }
543 }
544 }
545 }
546
547 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)548 hw_wait_while_equals64(uint64_t *address, uint64_t current)
549 {
550 uint64_t v;
551 uint64_t end = 0, timeout = 0;
552 uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
553 bool has_timeout = true;
554
555 #if INTERRUPT_MASKED_DEBUG
556 thread_t thread = current_thread();
557 bool in_ppl = pmap_in_ppl();
558 /* Note we can't check if we are interruptible if in ppl */
559 bool interruptible = !in_ppl && ml_get_interrupts_enabled();
560 uint64_t interrupts = 0, start_interrupts = 0;
561
562 timeout = hw_lock_compute_timeout(0, default_timeout, in_ppl, interruptible);
563 #else
564 timeout = hw_lock_compute_timeout(0, default_timeout);
565 #endif /* INTERRUPT_MASKED_DEBUG */
566 if (timeout == 0) {
567 has_timeout = false;
568 }
569
570 for (;;) {
571 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
572 cpu_pause();
573 #if OS_ATOMIC_HAS_LLSC
574 v = os_atomic_load_exclusive(address, relaxed);
575 if (__probable(v != current)) {
576 os_atomic_clear_exclusive();
577 return v;
578 }
579 wait_for_event();
580 #else
581 v = os_atomic_load(address, relaxed);
582 if (__probable(v != current)) {
583 return v;
584 }
585 #endif // OS_ATOMIC_HAS_LLSC
586 }
587 if (has_timeout) {
588 if (end == 0) {
589 end = ml_get_timebase() + timeout;
590 #if INTERRUPT_MASKED_DEBUG
591 if (interruptible) {
592 start_interrupts = thread->machine.int_time_mt;
593 }
594 #endif /* INTERRUPT_MASKED_DEBUG */
595 } else if (ml_get_timebase() >= end) {
596 #if INTERRUPT_MASKED_DEBUG
597 if (interruptible) {
598 interrupts = thread->machine.int_time_mt - start_interrupts;
599 panic("Wait while equals timeout @ *%p == 0x%llx, "
600 "interrupt_time %llu", address, v, interrupts);
601 }
602 #endif /* INTERRUPT_MASKED_DEBUG */
603 panic("Wait while equals timeout @ *%p == 0x%llx",
604 address, v);
605 }
606 }
607 }
608 }
609
610 __result_use_check
611 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))612 hw_lock_to_internal(hw_lock_t lock, thread_t thread, uint64_t timeout,
613 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
614 {
615 uintptr_t state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
616
617 if (__probable(hw_lock_trylock_contended(lock, state))) {
618 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
619 return HW_LOCK_ACQUIRED;
620 }
621
622 return hw_lock_lock_contended(lock, thread, state, timeout, handler LCK_GRP_ARG(grp));
623 }
624
625 /*
626 * Routine: hw_lock_lock
627 *
628 * Acquire lock, spinning until it becomes available,
629 * return with preemption disabled.
630 */
631 void
632 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
633 {
634 thread_t thread = current_thread();
635 lock_disable_preemption_for_thread(thread);
636 (void)hw_lock_to_internal(lock, thread, 0, hw_lock_timeout_panic LCK_GRP_ARG(grp));
637 }
638
639 /*
640 * Routine: hw_lock_lock_nopreempt
641 *
642 * Acquire lock, spinning until it becomes available.
643 */
644 void
645 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
646 {
647 thread_t thread = current_thread();
648 __lck_require_preemption_disabled(lock, thread);
649 (void)hw_lock_to_internal(lock, thread, 0, hw_lock_timeout_panic LCK_GRP_ARG(grp));
650 }
651
652 /*
653 * Routine: hw_lock_to
654 *
655 * Acquire lock, spinning until it becomes available or timeout.
656 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
657 * preemption disabled.
658 */
659 unsigned
660 int
661 (hw_lock_to)(hw_lock_t lock, uint64_t timeout, hw_lock_timeout_handler_t handler
662 LCK_GRP_ARG(lck_grp_t *grp))
663 {
664 thread_t thread = current_thread();
665 lock_disable_preemption_for_thread(thread);
666 return (unsigned)hw_lock_to_internal(lock, thread, timeout, handler LCK_GRP_ARG(grp));
667 }
668
669 /*
670 * Routine: hw_lock_to_nopreempt
671 *
672 * Acquire lock, spinning until it becomes available or timeout.
673 * Timeout is in mach_absolute_time ticks, called and return with
674 * preemption disabled.
675 */
676 unsigned
677 int
678 (hw_lock_to_nopreempt)(hw_lock_t lock, uint64_t timeout,
679 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
680 {
681 thread_t thread = current_thread();
682 __lck_require_preemption_disabled(lock, thread);
683 return (unsigned)hw_lock_to_internal(lock, thread, timeout, handler LCK_GRP_ARG(grp));
684 }
685
686 __result_use_check
687 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))688 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
689 {
690 int success = 0;
691
692 #if LOCK_PRETEST
693 if (__improbable(ordered_load_hw(lock) != 0)) {
694 return 0;
695 }
696 #endif // LOCK_PRETEST
697
698 success = os_atomic_cmpxchg(&lock->lock_data, 0,
699 LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK, acquire);
700
701 if (success) {
702 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
703 }
704 return success;
705 }
706
707 /*
708 * Routine: hw_lock_try
709 *
710 * returns with preemption disabled on success.
711 */
712 unsigned
713 int
714 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
715 {
716 thread_t thread = current_thread();
717 lock_disable_preemption_for_thread(thread);
718 unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
719 if (!success) {
720 lock_enable_preemption();
721 }
722 return success;
723 }
724
725 unsigned
726 int
727 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
728 {
729 thread_t thread = current_thread();
730 __lck_require_preemption_disabled(lock, thread);
731 return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
732 }
733
734 /*
735 * Routine: hw_lock_unlock
736 *
737 * Unconditionally release lock, release preemption level.
738 */
739 static inline void
hw_lock_unlock_internal(hw_lock_t lock)740 hw_lock_unlock_internal(hw_lock_t lock)
741 {
742 os_atomic_store(&lock->lock_data, 0, release);
743 #if __arm__ || __arm64__
744 // ARM tests are only for open-source exclusion
745 set_event();
746 #endif // __arm__ || __arm64__
747 #if CONFIG_DTRACE
748 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
749 #endif /* CONFIG_DTRACE */
750 }
751
752 void
753 (hw_lock_unlock)(hw_lock_t lock)
754 {
755 hw_lock_unlock_internal(lock);
756 lock_enable_preemption();
757 }
758
759 void
760 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
761 {
762 hw_lock_unlock_internal(lock);
763 }
764
765 /*
766 * Routine hw_lock_held, doesn't change preemption state.
767 * N.B. Racy, of course.
768 */
769 unsigned int
hw_lock_held(hw_lock_t lock)770 hw_lock_held(hw_lock_t lock)
771 {
772 return ordered_load_hw(lock) != 0;
773 }
774
775 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,uint64_t timeout,hw_lock_timeout_handler_t handler,bool validate LCK_GRP_ARG (lck_grp_t * grp))776 hw_lock_bit_to_contended(
777 hw_lock_bit_t *lock,
778 uint32_t bit,
779 uint64_t timeout,
780 hw_lock_timeout_handler_t handler,
781 bool validate
782 LCK_GRP_ARG(lck_grp_t *grp))
783 {
784 uint64_t end = 0, start = 0, interrupts = 0;
785 uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
786 bool has_timeout = true;
787 hw_lock_status_t rc;
788 uint32_t mask = 1u << bit;
789 #if INTERRUPT_MASKED_DEBUG
790 thread_t thread = current_thread();
791 bool in_ppl = pmap_in_ppl();
792 /* Note we can't check if we are interruptible if in ppl */
793 bool interruptible = !in_ppl && ml_get_interrupts_enabled();
794 uint64_t start_interrupts = 0;
795 #endif /* INTERRUPT_MASKED_DEBUG */
796
797 #if CONFIG_DTRACE || LOCK_STATS
798 uint64_t begin = 0;
799 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
800
801 if (__improbable(stat_enabled)) {
802 begin = mach_absolute_time();
803 }
804 #endif /* LOCK_STATS || CONFIG_DTRACE */
805
806 #if INTERRUPT_MASKED_DEBUG
807 timeout = hw_lock_compute_timeout(timeout, default_timeout, in_ppl, interruptible);
808 #else
809 timeout = hw_lock_compute_timeout(timeout, default_timeout);
810 #endif /* INTERRUPT_MASKED_DEBUG */
811 if (timeout == 0) {
812 has_timeout = false;
813 }
814
815 for (;;) {
816 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
817 // Always load-exclusive before wfe
818 // This grabs the monitor and wakes up on a release event
819 if (validate) {
820 rc = hw_lock_trylock_mask_allow_invalid(lock, mask);
821 if (rc == HW_LOCK_INVALID) {
822 lock_enable_preemption();
823 return rc;
824 }
825 } else {
826 rc = hw_lock_trylock_bit(lock, bit, true);
827 }
828 if (rc == HW_LOCK_ACQUIRED) {
829 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
830 goto end;
831 }
832 }
833 if (has_timeout) {
834 uint64_t now = ml_get_timebase();
835 if (end == 0) {
836 #if INTERRUPT_MASKED_DEBUG
837 if (interruptible) {
838 start_interrupts = thread->machine.int_time_mt;
839 }
840 #endif /* INTERRUPT_MASKED_DEBUG */
841 start = now;
842 end = now + timeout;
843 } else if (now < end) {
844 /* keep spinning */
845 } else {
846 #if INTERRUPT_MASKED_DEBUG
847 if (interruptible) {
848 interrupts = thread->machine.int_time_mt - start_interrupts;
849 }
850 #endif /* INTERRUPT_MASKED_DEBUG */
851 if (handler(lock, timeout, start, now, interrupts)) {
852 /* push the deadline */
853 end += timeout;
854 } else {
855 assert(rc == HW_LOCK_CONTENDED);
856 break;
857 }
858 }
859 }
860 }
861
862 end:
863 #if CONFIG_DTRACE || LOCK_STATS
864 if (__improbable(stat_enabled)) {
865 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
866 mach_absolute_time() - begin);
867 }
868 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
869 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
870 return rc;
871 }
872
873 __result_use_check
874 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))875 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint64_t timeout,
876 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
877 {
878 if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
879 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
880 return HW_LOCK_ACQUIRED;
881 }
882
883 return (unsigned)hw_lock_bit_to_contended(lock, bit, timeout, handler,
884 false LCK_GRP_ARG(grp));
885 }
886
887 /*
888 * Routine: hw_lock_bit_to
889 *
890 * Acquire bit lock, spinning until it becomes available or timeout.
891 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
892 * preemption disabled.
893 */
894 unsigned
895 int
896 (hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint64_t timeout,
897 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
898 {
899 _disable_preemption();
900 return hw_lock_bit_to_internal(lock, bit, timeout, handler LCK_GRP_ARG(grp));
901 }
902
903 /*
904 * Routine: hw_lock_bit
905 *
906 * Acquire bit lock, spinning until it becomes available,
907 * return with preemption disabled.
908 */
909 void
910 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
911 {
912 _disable_preemption();
913 (void)hw_lock_bit_to_internal(lock, bit, 0, hw_lock_bit_timeout_panic LCK_GRP_ARG(grp));
914 }
915
916 /*
917 * Routine: hw_lock_bit_nopreempt
918 *
919 * Acquire bit lock, spinning until it becomes available.
920 */
921 void
922 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
923 {
924 __lck_require_preemption_disabled(lock, current_thread());
925 (void)hw_lock_bit_to_internal(lock, bit, 0, hw_lock_bit_timeout_panic LCK_GRP_ARG(grp));
926 }
927
928
hw_lock_status_t(hw_lock_bit_to_allow_invalid)929 hw_lock_status_t
930 (hw_lock_bit_to_allow_invalid)(hw_lock_bit_t * lock, unsigned int bit,
931 uint64_t timeout, hw_lock_timeout_handler_t handler
932 LCK_GRP_ARG(lck_grp_t *grp))
933 {
934 int rc;
935
936 _disable_preemption();
937
938 rc = hw_lock_trylock_mask_allow_invalid(lock, 1u << bit);
939 if (__probable(rc == HW_LOCK_ACQUIRED)) {
940 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
941 return HW_LOCK_ACQUIRED;
942 }
943
944 if (__probable(rc == HW_LOCK_CONTENDED)) {
945 return hw_lock_bit_to_contended(lock, bit, timeout, handler,
946 true LCK_GRP_ARG(grp));
947 }
948
949 lock_enable_preemption();
950 return HW_LOCK_INVALID;
951 }
952
953 unsigned
954 int
955 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
956 {
957 boolean_t success = false;
958
959 _disable_preemption();
960 success = hw_lock_trylock_bit(lock, bit, false);
961 if (!success) {
962 lock_enable_preemption();
963 }
964
965 if (success) {
966 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
967 }
968
969 return success;
970 }
971
972 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)973 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
974 {
975 os_atomic_andnot(lock, 1u << bit, release);
976 #if __arm__
977 set_event();
978 #endif
979 #if CONFIG_DTRACE
980 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
981 #endif
982 }
983
984 /*
985 * Routine: hw_unlock_bit
986 *
987 * Release spin-lock. The second parameter is the bit number to test and set.
988 * Decrement the preemption level.
989 */
990 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)991 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
992 {
993 hw_unlock_bit_internal(lock, bit);
994 lock_enable_preemption();
995 }
996
997 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)998 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
999 {
1000 __lck_require_preemption_disabled(lock, current_thread());
1001 hw_unlock_bit_internal(lock, bit);
1002 }
1003
1004 /*
1005 * Routine: lck_spin_sleep
1006 */
1007 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1008 lck_spin_sleep_grp(
1009 lck_spin_t *lck,
1010 lck_sleep_action_t lck_sleep_action,
1011 event_t event,
1012 wait_interrupt_t interruptible,
1013 lck_grp_t *grp)
1014 {
1015 wait_result_t res;
1016
1017 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1018 panic("Invalid lock sleep action %x", lck_sleep_action);
1019 }
1020
1021 res = assert_wait(event, interruptible);
1022 if (res == THREAD_WAITING) {
1023 lck_spin_unlock(lck);
1024 res = thread_block(THREAD_CONTINUE_NULL);
1025 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1026 lck_spin_lock_grp(lck, grp);
1027 }
1028 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1029 lck_spin_unlock(lck);
1030 }
1031
1032 return res;
1033 }
1034
1035 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1036 lck_spin_sleep(
1037 lck_spin_t *lck,
1038 lck_sleep_action_t lck_sleep_action,
1039 event_t event,
1040 wait_interrupt_t interruptible)
1041 {
1042 return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1043 }
1044
1045 /*
1046 * Routine: lck_spin_sleep_deadline
1047 */
1048 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1049 lck_spin_sleep_deadline(
1050 lck_spin_t *lck,
1051 lck_sleep_action_t lck_sleep_action,
1052 event_t event,
1053 wait_interrupt_t interruptible,
1054 uint64_t deadline)
1055 {
1056 wait_result_t res;
1057
1058 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1059 panic("Invalid lock sleep action %x", lck_sleep_action);
1060 }
1061
1062 res = assert_wait_deadline(event, interruptible, deadline);
1063 if (res == THREAD_WAITING) {
1064 lck_spin_unlock(lck);
1065 res = thread_block(THREAD_CONTINUE_NULL);
1066 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1067 lck_spin_lock(lck);
1068 }
1069 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1070 lck_spin_unlock(lck);
1071 }
1072
1073 return res;
1074 }
1075
1076 /*
1077 * Routine: lck_mtx_sleep
1078 */
1079 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1080 lck_mtx_sleep(
1081 lck_mtx_t *lck,
1082 lck_sleep_action_t lck_sleep_action,
1083 event_t event,
1084 wait_interrupt_t interruptible)
1085 {
1086 wait_result_t res;
1087 thread_pri_floor_t token;
1088
1089 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1090 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1091
1092 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1093 panic("Invalid lock sleep action %x", lck_sleep_action);
1094 }
1095
1096 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1097 /*
1098 * We get a priority floor
1099 * during the time that this thread is asleep, so that when it
1100 * is re-awakened (and not yet contending on the mutex), it is
1101 * runnable at a reasonably high priority.
1102 */
1103 token = thread_priority_floor_start();
1104 }
1105
1106 res = assert_wait(event, interruptible);
1107 if (res == THREAD_WAITING) {
1108 lck_mtx_unlock(lck);
1109 res = thread_block(THREAD_CONTINUE_NULL);
1110 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1111 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1112 lck_mtx_lock_spin(lck);
1113 } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1114 lck_mtx_lock_spin_always(lck);
1115 } else {
1116 lck_mtx_lock(lck);
1117 }
1118 }
1119 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1120 lck_mtx_unlock(lck);
1121 }
1122
1123 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1124 thread_priority_floor_end(&token);
1125 }
1126
1127 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1128
1129 return res;
1130 }
1131
1132
1133 /*
1134 * Routine: lck_mtx_sleep_deadline
1135 */
1136 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1137 lck_mtx_sleep_deadline(
1138 lck_mtx_t *lck,
1139 lck_sleep_action_t lck_sleep_action,
1140 event_t event,
1141 wait_interrupt_t interruptible,
1142 uint64_t deadline)
1143 {
1144 wait_result_t res;
1145 thread_pri_floor_t token;
1146
1147 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1148 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1149
1150 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1151 panic("Invalid lock sleep action %x", lck_sleep_action);
1152 }
1153
1154 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1155 /*
1156 * See lck_mtx_sleep().
1157 */
1158 token = thread_priority_floor_start();
1159 }
1160
1161 res = assert_wait_deadline(event, interruptible, deadline);
1162 if (res == THREAD_WAITING) {
1163 lck_mtx_unlock(lck);
1164 res = thread_block(THREAD_CONTINUE_NULL);
1165 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1166 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1167 lck_mtx_lock_spin(lck);
1168 } else {
1169 lck_mtx_lock(lck);
1170 }
1171 }
1172 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1173 lck_mtx_unlock(lck);
1174 }
1175
1176 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1177 thread_priority_floor_end(&token);
1178 }
1179
1180 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1181
1182 return res;
1183 }
1184
1185 /*
1186 * Lock Boosting Invariants:
1187 *
1188 * The lock owner is always promoted to the max priority of all its waiters.
1189 * Max priority is capped at MAXPRI_PROMOTE.
1190 *
1191 * The last waiter is not given a promotion when it wakes up or acquires the lock.
1192 * When the last waiter is waking up, a new contender can always come in and
1193 * steal the lock without having to wait for the last waiter to make forward progress.
1194 */
1195
1196 /*
1197 * Routine: lck_mtx_lock_wait
1198 *
1199 * Invoked in order to wait on contention.
1200 *
1201 * Called with the interlock locked and
1202 * returns it unlocked.
1203 *
1204 * Always aggressively sets the owning thread to promoted,
1205 * even if it's the same or higher priority
1206 * This prevents it from lowering its own priority while holding a lock
1207 *
1208 * TODO: Come up with a more efficient way to handle same-priority promotions
1209 * <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
1210 */
1211 void
lck_mtx_lock_wait(lck_mtx_t * lck,thread_t holder,struct turnstile ** ts)1212 lck_mtx_lock_wait(
1213 lck_mtx_t *lck,
1214 thread_t holder,
1215 struct turnstile **ts)
1216 {
1217 thread_t thread = current_thread();
1218 lck_mtx_t *mutex = lck;
1219 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1220
1221 #if CONFIG_DTRACE
1222 uint64_t sleep_start = 0;
1223
1224 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1225 sleep_start = mach_absolute_time();
1226 }
1227 #endif
1228
1229 #if LOCKS_INDIRECT_ALLOW
1230 if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1231 mutex = &lck->lck_mtx_ptr->lck_mtx;
1232 }
1233 #endif /* LOCKS_INDIRECT_ALLOW */
1234
1235 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1236 trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1237
1238 mutex->lck_mtx_waiters++;
1239
1240 if (*ts == NULL) {
1241 *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1242 }
1243
1244 struct turnstile *turnstile = *ts;
1245 thread_set_pending_block_hint(thread, kThreadWaitKernelMutex);
1246 turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1247
1248 waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
1249
1250 lck_mtx_ilk_unlock(mutex);
1251
1252 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
1253
1254 thread_block(THREAD_CONTINUE_NULL);
1255
1256 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1257 #if CONFIG_DTRACE
1258 /*
1259 * Record the DTrace lockstat probe for blocking, block time
1260 * measured from when we were entered.
1261 */
1262 if (sleep_start) {
1263 #if LOCKS_INDIRECT_ALLOW
1264 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) {
1265 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1266 mach_absolute_time() - sleep_start);
1267 } else
1268 #endif /* LOCKS_INDIRECT_ALLOW */
1269 {
1270 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1271 mach_absolute_time() - sleep_start);
1272 }
1273 }
1274 #endif
1275 }
1276
1277 /*
1278 * Routine: lck_mtx_lock_acquire
1279 *
1280 * Invoked on acquiring the mutex when there is
1281 * contention.
1282 *
1283 * Returns the current number of waiters.
1284 *
1285 * Called with the interlock locked.
1286 */
1287 int
lck_mtx_lock_acquire(lck_mtx_t * lck,struct turnstile * ts)1288 lck_mtx_lock_acquire(
1289 lck_mtx_t *lck,
1290 struct turnstile *ts)
1291 {
1292 thread_t thread = current_thread();
1293 lck_mtx_t *mutex = lck;
1294
1295 #if LOCKS_INDIRECT_ALLOW
1296 if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1297 mutex = &lck->lck_mtx_ptr->lck_mtx;
1298 }
1299 #endif /* LOCKS_INDIRECT_ALLOW */
1300
1301 if (mutex->lck_mtx_waiters > 0) {
1302 if (ts == NULL) {
1303 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1304 }
1305
1306 turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1307 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1308 }
1309
1310 if (ts != NULL) {
1311 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1312 }
1313
1314 return mutex->lck_mtx_waiters;
1315 }
1316
1317 /*
1318 * Routine: lck_mtx_unlock_wakeup
1319 *
1320 * Invoked on unlock when there is contention.
1321 *
1322 * Called with the interlock locked.
1323 *
1324 * NOTE: callers should call turnstile_clenup after
1325 * dropping the interlock.
1326 */
1327 boolean_t
lck_mtx_unlock_wakeup(lck_mtx_t * lck,thread_t holder)1328 lck_mtx_unlock_wakeup(
1329 lck_mtx_t *lck,
1330 thread_t holder)
1331 {
1332 thread_t thread = current_thread();
1333 lck_mtx_t *mutex = lck;
1334 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1335 struct turnstile *ts;
1336 kern_return_t did_wake;
1337
1338 #if LOCKS_INDIRECT_ALLOW
1339 if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1340 mutex = &lck->lck_mtx_ptr->lck_mtx;
1341 }
1342 #endif /* LOCKS_INDIRECT_ALLOW */
1343
1344
1345 if (thread != holder) {
1346 panic("lck_mtx_unlock_wakeup: mutex %p holder %p", mutex, holder);
1347 }
1348
1349 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
1350 trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1351
1352 assert(mutex->lck_mtx_waiters > 0);
1353
1354 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1355
1356 if (mutex->lck_mtx_waiters > 1) {
1357 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1358 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1359 } else {
1360 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1361 turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1362 }
1363 assert(did_wake == KERN_SUCCESS);
1364
1365 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1366 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1367
1368 mutex->lck_mtx_waiters--;
1369
1370 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1371
1372 return mutex->lck_mtx_waiters > 0;
1373 }
1374
1375 /*
1376 * Routine: mutex_pause
1377 *
1378 * Called by former callers of simple_lock_pause().
1379 */
1380 #define MAX_COLLISION_COUNTS 32
1381 #define MAX_COLLISION 8
1382
1383 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1384
1385 uint32_t collision_backoffs[MAX_COLLISION] = {
1386 10, 50, 100, 200, 400, 600, 800, 1000
1387 };
1388
1389
1390 void
mutex_pause(uint32_t collisions)1391 mutex_pause(uint32_t collisions)
1392 {
1393 wait_result_t wait_result;
1394 uint32_t back_off;
1395
1396 if (collisions >= MAX_COLLISION_COUNTS) {
1397 collisions = MAX_COLLISION_COUNTS - 1;
1398 }
1399 max_collision_count[collisions]++;
1400
1401 if (collisions >= MAX_COLLISION) {
1402 collisions = MAX_COLLISION - 1;
1403 }
1404 back_off = collision_backoffs[collisions];
1405
1406 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1407 assert(wait_result == THREAD_WAITING);
1408
1409 wait_result = thread_block(THREAD_CONTINUE_NULL);
1410 assert(wait_result == THREAD_TIMED_OUT);
1411 }
1412
1413
1414 unsigned int mutex_yield_wait = 0;
1415 unsigned int mutex_yield_no_wait = 0;
1416
1417 void
lck_mtx_yield(lck_mtx_t * lck)1418 lck_mtx_yield(
1419 lck_mtx_t *lck)
1420 {
1421 int waiters;
1422
1423 #if DEBUG
1424 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1425 #endif /* DEBUG */
1426
1427 #if LOCKS_INDIRECT_ALLOW
1428 if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1429 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1430 } else
1431 #endif /* LOCKS_INDIRECT_ALLOW */
1432 {
1433 waiters = lck->lck_mtx_waiters;
1434 }
1435
1436 if (!waiters) {
1437 mutex_yield_no_wait++;
1438 } else {
1439 mutex_yield_wait++;
1440 lck_mtx_unlock(lck);
1441 mutex_pause(0);
1442 lck_mtx_lock(lck);
1443 }
1444 }
1445
1446 /*
1447 * sleep_with_inheritor and wakeup_with_inheritor KPI
1448 *
1449 * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1450 * the latest thread specified as inheritor.
1451 *
1452 * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1453 * direct the push. The inheritor cannot run in user space while holding a push from an event. Therefore is the caller responsibility to call a
1454 * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1455 *
1456 * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1457 *
1458 * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1459 * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1460 * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1461 * invoking any turnstile operation.
1462 *
1463 * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1464 * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1465 * is instantiated for this KPI to manage the hash without interrupt disabled.
1466 * Also:
1467 * - all events on the system that hash on the same bucket will contend on the same spinlock.
1468 * - every event will have a dedicated wait_queue.
1469 *
1470 * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1471 * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1472 */
1473
1474 kern_return_t
wakeup_with_inheritor_and_turnstile_type(event_t event,turnstile_type_t type,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1475 wakeup_with_inheritor_and_turnstile_type(event_t event, turnstile_type_t type, wait_result_t result, bool wake_one, lck_wake_action_t action, thread_t *thread_wokenup)
1476 {
1477 uint32_t index;
1478 struct turnstile *ts = NULL;
1479 kern_return_t ret = KERN_NOT_WAITING;
1480 int priority;
1481 thread_t wokeup;
1482
1483 /*
1484 * the hash bucket spinlock is used as turnstile interlock
1485 */
1486 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1487
1488 ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1489
1490 if (wake_one) {
1491 if (action == LCK_WAKE_DEFAULT) {
1492 priority = WAITQ_PROMOTE_ON_WAKE;
1493 } else {
1494 assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1495 priority = WAITQ_ALL_PRIORITIES;
1496 }
1497
1498 /*
1499 * WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor
1500 * if it finds a thread
1501 */
1502 wokeup = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(event), result, priority);
1503 if (wokeup != NULL) {
1504 if (thread_wokenup != NULL) {
1505 *thread_wokenup = wokeup;
1506 } else {
1507 thread_deallocate_safe(wokeup);
1508 }
1509 ret = KERN_SUCCESS;
1510 if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1511 goto complete;
1512 }
1513 } else {
1514 if (thread_wokenup != NULL) {
1515 *thread_wokenup = NULL;
1516 }
1517 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1518 ret = KERN_NOT_WAITING;
1519 }
1520 } else {
1521 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
1522 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1523 }
1524
1525 /*
1526 * turnstile_update_inheritor_complete could be called while holding the interlock.
1527 * In this case the new inheritor or is null, or is a thread that is just been woken up
1528 * and have not blocked because it is racing with the same interlock used here
1529 * after the wait.
1530 * So there is no chain to update for the new inheritor.
1531 *
1532 * However unless the current thread is the old inheritor,
1533 * old inheritor can be blocked and requires a chain update.
1534 *
1535 * The chain should be short because kernel turnstiles cannot have user turnstiles
1536 * chained after them.
1537 *
1538 * We can anyway optimize this by asking turnstile to tell us
1539 * if old inheritor needs an update and drop the lock
1540 * just in that case.
1541 */
1542 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1543
1544 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1545
1546 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1547
1548 complete:
1549 turnstile_complete((uintptr_t)event, NULL, NULL, type);
1550
1551 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1552
1553 turnstile_cleanup();
1554
1555 return ret;
1556 }
1557
1558 static wait_result_t
1559 sleep_with_inheritor_and_turnstile_type(event_t event,
1560 thread_t inheritor,
1561 wait_interrupt_t interruptible,
1562 uint64_t deadline,
1563 turnstile_type_t type,
1564 void (^primitive_lock)(void),
1565 void (^primitive_unlock)(void))
1566 {
1567 wait_result_t ret;
1568 uint32_t index;
1569 struct turnstile *ts = NULL;
1570
1571 /*
1572 * the hash bucket spinlock is used as turnstile interlock,
1573 * lock it before releasing the primitive lock
1574 */
1575 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1576
1577 primitive_unlock();
1578
1579 ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1580
1581 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1582 /*
1583 * We need TURNSTILE_DELAYED_UPDATE because we will call
1584 * waitq_assert_wait64 after.
1585 */
1586 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1587
1588 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1589
1590 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1591
1592 /*
1593 * Update new and old inheritor chains outside the interlock;
1594 */
1595 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1596
1597 if (ret == THREAD_WAITING) {
1598 ret = thread_block(THREAD_CONTINUE_NULL);
1599 }
1600
1601 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1602
1603 turnstile_complete((uintptr_t)event, NULL, NULL, type);
1604
1605 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1606
1607 turnstile_cleanup();
1608
1609 primitive_lock();
1610
1611 return ret;
1612 }
1613
1614 kern_return_t
change_sleep_inheritor_and_turnstile_type(event_t event,thread_t inheritor,turnstile_type_t type)1615 change_sleep_inheritor_and_turnstile_type(event_t event,
1616 thread_t inheritor,
1617 turnstile_type_t type)
1618 {
1619 uint32_t index;
1620 struct turnstile *ts = NULL;
1621 kern_return_t ret = KERN_SUCCESS;
1622 /*
1623 * the hash bucket spinlock is used as turnstile interlock
1624 */
1625 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1626
1627 ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1628
1629 if (!turnstile_has_waiters(ts)) {
1630 ret = KERN_NOT_WAITING;
1631 }
1632
1633 /*
1634 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1635 */
1636 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1637
1638 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1639
1640 /*
1641 * update the chains outside the interlock
1642 */
1643 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1644
1645 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1646
1647 turnstile_complete((uintptr_t)event, NULL, NULL, type);
1648
1649 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1650
1651 turnstile_cleanup();
1652
1653 return ret;
1654 }
1655
1656 typedef void (^void_block_void)(void);
1657
1658 /*
1659 * sleep_with_inheritor functions with lck_mtx_t as locking primitive.
1660 */
1661
1662 wait_result_t
lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline,turnstile_type_t type)1663 lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1664 {
1665 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1666
1667 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1668 return sleep_with_inheritor_and_turnstile_type(event,
1669 inheritor,
1670 interruptible,
1671 deadline,
1672 type,
1673 ^{;},
1674 ^{lck_mtx_unlock(lock);});
1675 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1676 return sleep_with_inheritor_and_turnstile_type(event,
1677 inheritor,
1678 interruptible,
1679 deadline,
1680 type,
1681 ^{lck_mtx_lock_spin(lock);},
1682 ^{lck_mtx_unlock(lock);});
1683 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1684 return sleep_with_inheritor_and_turnstile_type(event,
1685 inheritor,
1686 interruptible,
1687 deadline,
1688 type,
1689 ^{lck_mtx_lock_spin_always(lock);},
1690 ^{lck_mtx_unlock(lock);});
1691 } else {
1692 return sleep_with_inheritor_and_turnstile_type(event,
1693 inheritor,
1694 interruptible,
1695 deadline,
1696 type,
1697 ^{lck_mtx_lock(lock);},
1698 ^{lck_mtx_unlock(lock);});
1699 }
1700 }
1701
1702 /*
1703 * Name: lck_spin_sleep_with_inheritor
1704 *
1705 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1706 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1707 * be directed to the inheritor specified.
1708 * An interruptible mode and deadline can be specified to return earlier from the wait.
1709 *
1710 * Args:
1711 * Arg1: lck_spin_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1712 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1713 * Arg3: event to wait on.
1714 * Arg4: thread to propagate the event push to.
1715 * Arg5: interruptible flag for wait.
1716 * Arg6: deadline for wait.
1717 *
1718 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1719 * Lock will be dropped while waiting.
1720 * The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1721 * wakeup for the event is called.
1722 *
1723 * Returns: result of the wait.
1724 */
1725 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1726 lck_spin_sleep_with_inheritor(
1727 lck_spin_t *lock,
1728 lck_sleep_action_t lck_sleep_action,
1729 event_t event,
1730 thread_t inheritor,
1731 wait_interrupt_t interruptible,
1732 uint64_t deadline)
1733 {
1734 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1735 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1736 interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1737 ^{}, ^{ lck_spin_unlock(lock); });
1738 } else {
1739 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1740 interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1741 ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1742 }
1743 }
1744
1745 /*
1746 * Name: lck_ticket_sleep_with_inheritor
1747 *
1748 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1749 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1750 * be directed to the inheritor specified.
1751 * An interruptible mode and deadline can be specified to return earlier from the wait.
1752 *
1753 * Args:
1754 * Arg1: lck_ticket_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1755 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1756 * Arg3: event to wait on.
1757 * Arg4: thread to propagate the event push to.
1758 * Arg5: interruptible flag for wait.
1759 * Arg6: deadline for wait.
1760 *
1761 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1762 * Lock will be dropped while waiting.
1763 * The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1764 * wakeup for the event is called.
1765 *
1766 * Returns: result of the wait.
1767 */
1768 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1769 lck_ticket_sleep_with_inheritor(
1770 lck_ticket_t *lock,
1771 lck_grp_t *grp,
1772 lck_sleep_action_t lck_sleep_action,
1773 event_t event,
1774 thread_t inheritor,
1775 wait_interrupt_t interruptible,
1776 uint64_t deadline)
1777 {
1778 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1779 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1780 interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1781 ^{}, ^{ lck_ticket_unlock(lock); });
1782 } else {
1783 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1784 interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1785 ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1786 }
1787 }
1788
1789 /*
1790 * Name: lck_mtx_sleep_with_inheritor
1791 *
1792 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1793 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1794 * be directed to the inheritor specified.
1795 * An interruptible mode and deadline can be specified to return earlier from the wait.
1796 *
1797 * Args:
1798 * Arg1: lck_mtx_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1799 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
1800 * Arg3: event to wait on.
1801 * Arg4: thread to propagate the event push to.
1802 * Arg5: interruptible flag for wait.
1803 * Arg6: deadline for wait.
1804 *
1805 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1806 * Lock will be dropped while waiting.
1807 * The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1808 * wakeup for the event is called.
1809 *
1810 * Returns: result of the wait.
1811 */
1812 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1813 lck_mtx_sleep_with_inheritor(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
1814 {
1815 return lck_mtx_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1816 }
1817
1818 /*
1819 * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1820 */
1821
1822 wait_result_t
lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline,turnstile_type_t type)1823 lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1824 {
1825 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1826
1827 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1828
1829 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1830 return sleep_with_inheritor_and_turnstile_type(event,
1831 inheritor,
1832 interruptible,
1833 deadline,
1834 type,
1835 ^{;},
1836 ^{lck_rw_type = lck_rw_done(lock);});
1837 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1838 return sleep_with_inheritor_and_turnstile_type(event,
1839 inheritor,
1840 interruptible,
1841 deadline,
1842 type,
1843 ^{lck_rw_lock(lock, lck_rw_type);},
1844 ^{lck_rw_type = lck_rw_done(lock);});
1845 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1846 return sleep_with_inheritor_and_turnstile_type(event,
1847 inheritor,
1848 interruptible,
1849 deadline,
1850 type,
1851 ^{lck_rw_lock_exclusive(lock);},
1852 ^{lck_rw_type = lck_rw_done(lock);});
1853 } else {
1854 return sleep_with_inheritor_and_turnstile_type(event,
1855 inheritor,
1856 interruptible,
1857 deadline,
1858 type,
1859 ^{lck_rw_lock_shared(lock);},
1860 ^{lck_rw_type = lck_rw_done(lock);});
1861 }
1862 }
1863
1864 /*
1865 * Name: lck_rw_sleep_with_inheritor
1866 *
1867 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1868 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1869 * be directed to the inheritor specified.
1870 * An interruptible mode and deadline can be specified to return earlier from the wait.
1871 *
1872 * Args:
1873 * Arg1: lck_rw_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1874 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
1875 * Arg3: event to wait on.
1876 * Arg4: thread to propagate the event push to.
1877 * Arg5: interruptible flag for wait.
1878 * Arg6: deadline for wait.
1879 *
1880 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1881 * Lock will be dropped while waiting.
1882 * The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1883 * wakeup for the event is called.
1884 *
1885 * Returns: result of the wait.
1886 */
1887 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1888 lck_rw_sleep_with_inheritor(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
1889 {
1890 return lck_rw_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1891 }
1892
1893 /*
1894 * wakeup_with_inheritor functions are independent from the locking primitive.
1895 */
1896
1897 /*
1898 * Name: wakeup_one_with_inheritor
1899 *
1900 * Description: wake up one waiter for event if any. The thread woken up will be the one with the higher sched priority waiting on event.
1901 * The push for the event will be transferred from the last inheritor to the woken up thread if LCK_WAKE_DEFAULT is specified.
1902 * If LCK_WAKE_DO_NOT_TRANSFER_PUSH is specified the push will not be transferred.
1903 *
1904 * Args:
1905 * Arg1: event to wake from.
1906 * Arg2: wait result to pass to the woken up thread.
1907 * Arg3: wake flag. LCK_WAKE_DEFAULT or LCK_WAKE_DO_NOT_TRANSFER_PUSH.
1908 * Arg4: pointer for storing the thread wokenup.
1909 *
1910 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1911 *
1912 * Conditions: The new inheritor wokenup cannot run in user space until another inheritor is specified for the event or a
1913 * wakeup for the event is called.
1914 * A reference for the wokenup thread is acquired.
1915 * NOTE: this cannot be called from interrupt context.
1916 */
1917 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1918 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1919 {
1920 return wakeup_with_inheritor_and_turnstile_type(event,
1921 TURNSTILE_SLEEP_INHERITOR,
1922 result,
1923 TRUE,
1924 action,
1925 thread_wokenup);
1926 }
1927
1928 /*
1929 * Name: wakeup_all_with_inheritor
1930 *
1931 * Description: wake up all waiters waiting for event. The old inheritor will lose the push.
1932 *
1933 * Args:
1934 * Arg1: event to wake from.
1935 * Arg2: wait result to pass to the woken up threads.
1936 *
1937 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1938 *
1939 * Conditions: NOTE: this cannot be called from interrupt context.
1940 */
1941 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1942 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1943 {
1944 return wakeup_with_inheritor_and_turnstile_type(event,
1945 TURNSTILE_SLEEP_INHERITOR,
1946 result,
1947 FALSE,
1948 0,
1949 NULL);
1950 }
1951
1952 /*
1953 * change_sleep_inheritor is independent from the locking primitive.
1954 */
1955
1956 /*
1957 * Name: change_sleep_inheritor
1958 *
1959 * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1960 *
1961 * Args:
1962 * Arg1: event to redirect the push.
1963 * Arg2: new inheritor for event.
1964 *
1965 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1966 *
1967 * Conditions: In case of success, the new inheritor cannot run in user space until another inheritor is specified for the event or a
1968 * wakeup for the event is called.
1969 * NOTE: this cannot be called from interrupt context.
1970 */
1971 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1972 change_sleep_inheritor(event_t event, thread_t inheritor)
1973 {
1974 return change_sleep_inheritor_and_turnstile_type(event,
1975 inheritor,
1976 TURNSTILE_SLEEP_INHERITOR);
1977 }
1978
1979 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1980 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1981 {
1982 assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1983 assert(waitq_type(waitq) == WQT_TURNSTILE);
1984 waitinfo->owner = 0;
1985 waitinfo->context = 0;
1986
1987 if (waitq_held(waitq)) {
1988 return;
1989 }
1990
1991 struct turnstile *turnstile = waitq_to_turnstile(waitq);
1992 assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1993 waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1994 }
1995
1996 #define GATE_TYPE 3
1997 #define GATE_ILOCK_BIT 0
1998 #define GATE_WAITERS_BIT 1
1999
2000 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
2001 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
2002
2003 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
2004 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
2005 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
2006 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
2007 #define ordered_store_gate(gate, value) os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
2008
2009 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
2010 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
2011 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
2012 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
2013
2014 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
2015
2016 #define GATE_EVENT(gate) ((event_t) gate)
2017 #define EVENT_TO_GATE(event) ((gate_t *) event)
2018
2019 typedef void (*void_func_void)(void);
2020
2021 __abortlike
2022 static void
gate_verify_tag_panic(gate_t * gate)2023 gate_verify_tag_panic(gate_t *gate)
2024 {
2025 panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2026 }
2027
2028 __abortlike
2029 static void
gate_verify_destroy_panic(gate_t * gate)2030 gate_verify_destroy_panic(gate_t *gate)
2031 {
2032 panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2033 }
2034
2035 static void
gate_verify(gate_t * gate)2036 gate_verify(gate_t *gate)
2037 {
2038 if (gate->gt_type != GATE_TYPE) {
2039 gate_verify_tag_panic(gate);
2040 }
2041 if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
2042 gate_verify_destroy_panic(gate);
2043 }
2044
2045 assert(gate->gt_refs > 0);
2046 }
2047
2048 __abortlike
2049 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)2050 gate_already_owned_panic(gate_t *gate, thread_t holder)
2051 {
2052 panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2053 }
2054
2055 static kern_return_t
gate_try_close(gate_t * gate)2056 gate_try_close(gate_t *gate)
2057 {
2058 uintptr_t state;
2059 thread_t holder;
2060 kern_return_t ret;
2061 thread_t thread = current_thread();
2062
2063 gate_verify(gate);
2064
2065 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2066 return KERN_SUCCESS;
2067 }
2068
2069 gate_ilock(gate);
2070 state = ordered_load_gate(gate);
2071 holder = GATE_STATE_TO_THREAD(state);
2072
2073 if (holder == NULL) {
2074 assert(gate_has_waiter_bit(state) == FALSE);
2075
2076 state = GATE_THREAD_TO_STATE(current_thread());
2077 state |= GATE_ILOCK;
2078 ordered_store_gate(gate, state);
2079 ret = KERN_SUCCESS;
2080 } else {
2081 if (holder == current_thread()) {
2082 gate_already_owned_panic(gate, holder);
2083 }
2084 ret = KERN_FAILURE;
2085 }
2086
2087 gate_iunlock(gate);
2088 return ret;
2089 }
2090
2091 static void
gate_close(gate_t * gate)2092 gate_close(gate_t* gate)
2093 {
2094 uintptr_t state;
2095 thread_t holder;
2096 thread_t thread = current_thread();
2097
2098 gate_verify(gate);
2099
2100 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2101 return;
2102 }
2103
2104 gate_ilock(gate);
2105 state = ordered_load_gate(gate);
2106 holder = GATE_STATE_TO_THREAD(state);
2107
2108 if (holder != NULL) {
2109 gate_already_owned_panic(gate, holder);
2110 }
2111
2112 assert(gate_has_waiter_bit(state) == FALSE);
2113
2114 state = GATE_THREAD_TO_STATE(thread);
2115 state |= GATE_ILOCK;
2116 ordered_store_gate(gate, state);
2117
2118 gate_iunlock(gate);
2119 }
2120
2121 static void
gate_open_turnstile(gate_t * gate)2122 gate_open_turnstile(gate_t *gate)
2123 {
2124 struct turnstile *ts = NULL;
2125
2126 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2127 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2128 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2129 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2130 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2131 /*
2132 * We can do the cleanup while holding the interlock.
2133 * It is ok because:
2134 * 1. current_thread is the previous inheritor and it is running
2135 * 2. new inheritor is NULL.
2136 * => No chain of turnstiles needs to be updated.
2137 */
2138 turnstile_cleanup();
2139 }
2140
2141 __abortlike
2142 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2143 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2144 {
2145 if (open) {
2146 panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2147 } else {
2148 panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2149 }
2150 }
2151
2152 static void
gate_open(gate_t * gate)2153 gate_open(gate_t *gate)
2154 {
2155 uintptr_t state;
2156 thread_t holder;
2157 bool waiters;
2158 thread_t thread = current_thread();
2159
2160 gate_verify(gate);
2161 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2162 return;
2163 }
2164
2165 gate_ilock(gate);
2166 state = ordered_load_gate(gate);
2167 holder = GATE_STATE_TO_THREAD(state);
2168 waiters = gate_has_waiter_bit(state);
2169
2170 if (holder != thread) {
2171 gate_not_owned_panic(gate, holder, true);
2172 }
2173
2174 if (waiters) {
2175 gate_open_turnstile(gate);
2176 }
2177
2178 state = GATE_ILOCK;
2179 ordered_store_gate(gate, state);
2180
2181 gate_iunlock(gate);
2182 }
2183
2184 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2185 gate_handoff_turnstile(gate_t *gate,
2186 int flags,
2187 thread_t *thread_woken_up,
2188 bool *waiters)
2189 {
2190 struct turnstile *ts = NULL;
2191 kern_return_t ret = KERN_FAILURE;
2192 thread_t hp_thread;
2193
2194 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2195 /*
2196 * Wake up the higest priority thread waiting on the gate
2197 */
2198 hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
2199
2200 if (hp_thread != NULL) {
2201 /*
2202 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2203 */
2204 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2205 *thread_woken_up = hp_thread;
2206 *waiters = turnstile_has_waiters(ts);
2207 /*
2208 * Note: hp_thread is the new holder and the new inheritor.
2209 * In case there are no more waiters, it doesn't need to be the inheritor
2210 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2211 * handoff can go through the fast path.
2212 * We could set the inheritor to NULL here, or the new holder itself can set it
2213 * on its way back from the sleep. In the latter case there are more chanses that
2214 * new waiters will come by, avoiding to do the opearation at all.
2215 */
2216 ret = KERN_SUCCESS;
2217 } else {
2218 /*
2219 * waiters can have been woken up by an interrupt and still not
2220 * have updated gate->waiters, so we couldn't find them on the waitq.
2221 * Update the inheritor to NULL here, so that the current thread can return to userspace
2222 * indipendently from when the interrupted waiters will finish the wait.
2223 */
2224 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2225 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2226 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2227 }
2228 // there are no waiters.
2229 ret = KERN_NOT_WAITING;
2230 }
2231
2232 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2233
2234 /*
2235 * We can do the cleanup while holding the interlock.
2236 * It is ok because:
2237 * 1. current_thread is the previous inheritor and it is running
2238 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2239 * of the gate before trying to sleep.
2240 * => No chain of turnstiles needs to be updated.
2241 */
2242 turnstile_cleanup();
2243
2244 return ret;
2245 }
2246
2247 static kern_return_t
gate_handoff(gate_t * gate,int flags)2248 gate_handoff(gate_t *gate,
2249 int flags)
2250 {
2251 kern_return_t ret;
2252 thread_t new_holder = NULL;
2253 uintptr_t state;
2254 thread_t holder;
2255 bool waiters;
2256 thread_t thread = current_thread();
2257
2258 assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2259 gate_verify(gate);
2260
2261 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2262 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2263 //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2264 return KERN_NOT_WAITING;
2265 }
2266 }
2267
2268 gate_ilock(gate);
2269 state = ordered_load_gate(gate);
2270 holder = GATE_STATE_TO_THREAD(state);
2271 waiters = gate_has_waiter_bit(state);
2272
2273 if (holder != current_thread()) {
2274 gate_not_owned_panic(gate, holder, false);
2275 }
2276
2277 if (waiters) {
2278 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2279 if (ret == KERN_SUCCESS) {
2280 state = GATE_THREAD_TO_STATE(new_holder);
2281 if (waiters) {
2282 state |= GATE_WAITERS;
2283 }
2284 } else {
2285 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2286 state = 0;
2287 }
2288 }
2289 } else {
2290 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2291 state = 0;
2292 }
2293 ret = KERN_NOT_WAITING;
2294 }
2295 state |= GATE_ILOCK;
2296 ordered_store_gate(gate, state);
2297
2298 gate_iunlock(gate);
2299
2300 if (new_holder) {
2301 thread_deallocate(new_holder);
2302 }
2303 return ret;
2304 }
2305
2306 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2307 gate_steal_turnstile(gate_t *gate,
2308 thread_t new_inheritor)
2309 {
2310 struct turnstile *ts = NULL;
2311
2312 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2313
2314 turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2315 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2316 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2317
2318 /*
2319 * turnstile_cleanup might need to update the chain of the old holder.
2320 * This operation should happen without the turnstile interlock held.
2321 */
2322 return turnstile_cleanup;
2323 }
2324
2325 __abortlike
2326 static void
gate_not_closed_panic(gate_t * gate,bool wait)2327 gate_not_closed_panic(gate_t *gate, bool wait)
2328 {
2329 if (wait) {
2330 panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2331 } else {
2332 panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2333 }
2334 }
2335
2336 static void
gate_steal(gate_t * gate)2337 gate_steal(gate_t *gate)
2338 {
2339 uintptr_t state;
2340 thread_t holder;
2341 thread_t thread = current_thread();
2342 bool waiters;
2343
2344 void_func_void func_after_interlock_unlock;
2345
2346 gate_verify(gate);
2347
2348 gate_ilock(gate);
2349 state = ordered_load_gate(gate);
2350 holder = GATE_STATE_TO_THREAD(state);
2351 waiters = gate_has_waiter_bit(state);
2352
2353 if (holder == NULL) {
2354 gate_not_closed_panic(gate, false);
2355 }
2356
2357 state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2358 if (waiters) {
2359 state |= GATE_WAITERS;
2360 ordered_store_gate(gate, state);
2361 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2362 gate_iunlock(gate);
2363
2364 func_after_interlock_unlock();
2365 } else {
2366 ordered_store_gate(gate, state);
2367 gate_iunlock(gate);
2368 }
2369 }
2370
2371 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2372 gate_wait_turnstile(gate_t *gate,
2373 wait_interrupt_t interruptible,
2374 uint64_t deadline,
2375 thread_t holder,
2376 wait_result_t* wait,
2377 bool* waiters)
2378 {
2379 struct turnstile *ts;
2380 uintptr_t state;
2381
2382 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2383
2384 turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2385 waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2386
2387 gate_iunlock(gate);
2388
2389 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2390
2391 *wait = thread_block(THREAD_CONTINUE_NULL);
2392
2393 gate_ilock(gate);
2394
2395 *waiters = turnstile_has_waiters(ts);
2396
2397 if (!*waiters) {
2398 /*
2399 * We want to enable the fast path as soon as we see that there are no more waiters.
2400 * On the fast path the holder will not do any turnstile operations.
2401 * Set the inheritor as NULL here.
2402 *
2403 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2404 * already been set to NULL.
2405 */
2406 state = ordered_load_gate(gate);
2407 holder = GATE_STATE_TO_THREAD(state);
2408 if (holder &&
2409 ((*wait != THREAD_AWAKENED) || // thread interrupted or timedout
2410 holder == current_thread())) { // thread was woken up and it is the new holder
2411 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2412 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2413 }
2414 }
2415
2416 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2417
2418 /*
2419 * turnstile_cleanup might need to update the chain of the old holder.
2420 * This operation should happen without the turnstile primitive interlock held.
2421 */
2422 return turnstile_cleanup;
2423 }
2424
2425 static void
gate_free_internal(gate_t * gate)2426 gate_free_internal(gate_t *gate)
2427 {
2428 zfree(KT_GATE, gate);
2429 }
2430
2431 __abortlike
2432 static void
gate_too_many_refs_panic(gate_t * gate)2433 gate_too_many_refs_panic(gate_t *gate)
2434 {
2435 panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2436 }
2437
2438 static gate_wait_result_t
2439 gate_wait(gate_t* gate,
2440 wait_interrupt_t interruptible,
2441 uint64_t deadline,
2442 void (^primitive_unlock)(void),
2443 void (^primitive_lock)(void))
2444 {
2445 gate_wait_result_t ret;
2446 void_func_void func_after_interlock_unlock;
2447 wait_result_t wait_result;
2448 uintptr_t state;
2449 thread_t holder;
2450 bool waiters;
2451
2452 gate_verify(gate);
2453
2454 gate_ilock(gate);
2455 state = ordered_load_gate(gate);
2456 holder = GATE_STATE_TO_THREAD(state);
2457
2458 if (holder == NULL) {
2459 gate_not_closed_panic(gate, true);
2460 }
2461
2462 /*
2463 * Get a ref on the gate so it will not
2464 * be freed while we are coming back from the sleep.
2465 */
2466 if (gate->gt_refs == UINT16_MAX) {
2467 gate_too_many_refs_panic(gate);
2468 }
2469 gate->gt_refs++;
2470 state |= GATE_WAITERS;
2471 ordered_store_gate(gate, state);
2472
2473 /*
2474 * Release the primitive lock before any
2475 * turnstile operation. Turnstile
2476 * does not support a blocking primitive as
2477 * interlock.
2478 *
2479 * In this way, concurrent threads will be
2480 * able to acquire the primitive lock
2481 * but still will wait for me through the
2482 * gate interlock.
2483 */
2484 primitive_unlock();
2485
2486 func_after_interlock_unlock = gate_wait_turnstile( gate,
2487 interruptible,
2488 deadline,
2489 holder,
2490 &wait_result,
2491 &waiters);
2492
2493 state = ordered_load_gate(gate);
2494 holder = GATE_STATE_TO_THREAD(state);
2495
2496 switch (wait_result) {
2497 case THREAD_INTERRUPTED:
2498 case THREAD_TIMED_OUT:
2499 assert(holder != current_thread());
2500
2501 if (waiters) {
2502 state |= GATE_WAITERS;
2503 } else {
2504 state &= ~GATE_WAITERS;
2505 }
2506 ordered_store_gate(gate, state);
2507
2508 if (wait_result == THREAD_INTERRUPTED) {
2509 ret = GATE_INTERRUPTED;
2510 } else {
2511 ret = GATE_TIMED_OUT;
2512 }
2513 break;
2514 default:
2515 /*
2516 * Note it is possible that even if the gate was handed off to
2517 * me, someone called gate_steal() before I woke up.
2518 *
2519 * As well as it is possible that the gate was opened, but someone
2520 * closed it while I was waking up.
2521 *
2522 * In both cases we return GATE_OPENED, as the gate was opened to me
2523 * at one point, it is the caller responsibility to check again if
2524 * the gate is open.
2525 */
2526 if (holder == current_thread()) {
2527 ret = GATE_HANDOFF;
2528 } else {
2529 ret = GATE_OPENED;
2530 }
2531 break;
2532 }
2533
2534 assert(gate->gt_refs > 0);
2535 uint32_t ref = --gate->gt_refs;
2536 bool to_free = gate->gt_alloc;
2537 gate_iunlock(gate);
2538
2539 if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2540 if (to_free == true) {
2541 assert(!waiters);
2542 if (ref == 0) {
2543 gate_free_internal(gate);
2544 }
2545 ret = GATE_OPENED;
2546 } else {
2547 gate_verify_destroy_panic(gate);
2548 }
2549 }
2550
2551 /*
2552 * turnstile func that needs to be executed without
2553 * holding the primitive interlock
2554 */
2555 func_after_interlock_unlock();
2556
2557 primitive_lock();
2558
2559 return ret;
2560 }
2561
2562 static void
gate_assert(gate_t * gate,int flags)2563 gate_assert(gate_t *gate, int flags)
2564 {
2565 uintptr_t state;
2566 thread_t holder;
2567
2568 gate_verify(gate);
2569
2570 gate_ilock(gate);
2571 state = ordered_load_gate(gate);
2572 holder = GATE_STATE_TO_THREAD(state);
2573
2574 switch (flags) {
2575 case GATE_ASSERT_CLOSED:
2576 assert(holder != NULL);
2577 break;
2578 case GATE_ASSERT_OPEN:
2579 assert(holder == NULL);
2580 break;
2581 case GATE_ASSERT_HELD:
2582 assert(holder == current_thread());
2583 break;
2584 default:
2585 panic("invalid %s flag %d", __func__, flags);
2586 }
2587
2588 gate_iunlock(gate);
2589 }
2590
2591 enum {
2592 GT_INIT_DEFAULT = 0,
2593 GT_INIT_ALLOC
2594 };
2595
2596 static void
gate_init(gate_t * gate,uint type)2597 gate_init(gate_t *gate, uint type)
2598 {
2599 bzero(gate, sizeof(gate_t));
2600
2601 gate->gt_data = 0;
2602 gate->gt_turnstile = NULL;
2603 gate->gt_refs = 1;
2604 switch (type) {
2605 case GT_INIT_ALLOC:
2606 gate->gt_alloc = 1;
2607 break;
2608 default:
2609 gate->gt_alloc = 0;
2610 break;
2611 }
2612 gate->gt_type = GATE_TYPE;
2613 gate->gt_flags_pad = 0;
2614 }
2615
2616 static gate_t*
gate_alloc_init(void)2617 gate_alloc_init(void)
2618 {
2619 gate_t *gate;
2620 gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2621 gate_init(gate, GT_INIT_ALLOC);
2622 return gate;
2623 }
2624
2625 __abortlike
2626 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2627 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2628 {
2629 panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2630 }
2631
2632 __abortlike
2633 static void
gate_destroy_waiter_panic(gate_t * gate)2634 gate_destroy_waiter_panic(gate_t *gate)
2635 {
2636 panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2637 }
2638
2639 static uint16_t
gate_destroy_internal(gate_t * gate)2640 gate_destroy_internal(gate_t *gate)
2641 {
2642 uintptr_t state;
2643 thread_t holder;
2644 uint16_t ref;
2645
2646 gate_ilock(gate);
2647 state = ordered_load_gate(gate);
2648 holder = GATE_STATE_TO_THREAD(state);
2649
2650 /*
2651 * The gate must be open
2652 * and all the threads must
2653 * have been woken up by this time
2654 */
2655 if (holder != NULL) {
2656 gate_destroy_owned_panic(gate, holder);
2657 }
2658 if (gate_has_waiter_bit(state)) {
2659 gate_destroy_waiter_panic(gate);
2660 }
2661
2662 assert(gate->gt_refs > 0);
2663
2664 ref = --gate->gt_refs;
2665
2666 /*
2667 * Mark the gate as destroyed.
2668 * The interlock bit still need
2669 * to be available to let the
2670 * last wokenup threads to clear
2671 * the wait.
2672 */
2673 state = GATE_DESTROYED;
2674 state |= GATE_ILOCK;
2675 ordered_store_gate(gate, state);
2676 gate_iunlock(gate);
2677 return ref;
2678 }
2679
2680 __abortlike
2681 static void
gate_destroy_panic(gate_t * gate)2682 gate_destroy_panic(gate_t *gate)
2683 {
2684 panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2685 }
2686
2687 static void
gate_destroy(gate_t * gate)2688 gate_destroy(gate_t *gate)
2689 {
2690 gate_verify(gate);
2691 if (gate->gt_alloc == 1) {
2692 gate_destroy_panic(gate);
2693 }
2694 gate_destroy_internal(gate);
2695 }
2696
2697 __abortlike
2698 static void
gate_free_panic(gate_t * gate)2699 gate_free_panic(gate_t *gate)
2700 {
2701 panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2702 }
2703
2704 static void
gate_free(gate_t * gate)2705 gate_free(gate_t *gate)
2706 {
2707 uint16_t ref;
2708
2709 gate_verify(gate);
2710
2711 if (gate->gt_alloc == 0) {
2712 gate_free_panic(gate);
2713 }
2714
2715 ref = gate_destroy_internal(gate);
2716 /*
2717 * Some of the threads waiting on the gate
2718 * might still need to run after being woken up.
2719 * They will access the gate to cleanup the
2720 * state, so we cannot free it.
2721 * The last waiter will free the gate in this case.
2722 */
2723 if (ref == 0) {
2724 gate_free_internal(gate);
2725 }
2726 }
2727
2728 /*
2729 * Name: lck_rw_gate_init
2730 *
2731 * Description: initializes a variable declared with decl_lck_rw_gate_data.
2732 *
2733 * Args:
2734 * Arg1: lck_rw_t lock used to protect the gate.
2735 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2736 */
2737 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2738 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2739 {
2740 (void) lock;
2741 gate_init(gate, GT_INIT_DEFAULT);
2742 }
2743
2744 /*
2745 * Name: lck_rw_gate_alloc_init
2746 *
2747 * Description: allocates and initializes a gate_t.
2748 *
2749 * Args:
2750 * Arg1: lck_rw_t lock used to protect the gate.
2751 *
2752 * Returns:
2753 * gate_t allocated.
2754 */
2755 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2756 lck_rw_gate_alloc_init(lck_rw_t *lock)
2757 {
2758 (void) lock;
2759 return gate_alloc_init();
2760 }
2761
2762 /*
2763 * Name: lck_rw_gate_destroy
2764 *
2765 * Description: destroys a variable previously initialized
2766 * with lck_rw_gate_init().
2767 *
2768 * Args:
2769 * Arg1: lck_rw_t lock used to protect the gate.
2770 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2771 */
2772 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2773 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2774 {
2775 (void) lock;
2776 gate_destroy(gate);
2777 }
2778
2779 /*
2780 * Name: lck_rw_gate_free
2781 *
2782 * Description: destroys and tries to free a gate previously allocated
2783 * with lck_rw_gate_alloc_init().
2784 * The gate free might be delegated to the last thread returning
2785 * from the gate_wait().
2786 *
2787 * Args:
2788 * Arg1: lck_rw_t lock used to protect the gate.
2789 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2790 */
2791 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2792 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2793 {
2794 (void) lock;
2795 gate_free(gate);
2796 }
2797
2798 /*
2799 * Name: lck_rw_gate_try_close
2800 *
2801 * Description: Tries to close the gate.
2802 * In case of success the current thread will be set as
2803 * the holder of the gate.
2804 *
2805 * Args:
2806 * Arg1: lck_rw_t lock used to protect the gate.
2807 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2808 *
2809 * Conditions: Lock must be held. Returns with the lock held.
2810 *
2811 * Returns:
2812 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2813 * of the gate.
2814 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2815 * to wake up possible waiters on the gate before returning to userspace.
2816 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2817 * between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2818 *
2819 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2820 * lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2821 * The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2822 * be done without dropping the lock that is protecting the gate in between.
2823 */
2824 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2825 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2826 {
2827 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2828
2829 return gate_try_close(gate);
2830 }
2831
2832 /*
2833 * Name: lck_rw_gate_close
2834 *
2835 * Description: Closes the gate. The current thread will be set as
2836 * the holder of the gate. Will panic if the gate is already closed.
2837 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2838 * to wake up possible waiters on the gate before returning to userspace.
2839 *
2840 * Args:
2841 * Arg1: lck_rw_t lock used to protect the gate.
2842 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2843 *
2844 * Conditions: Lock must be held. Returns with the lock held.
2845 * The gate must be open.
2846 *
2847 */
2848 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2849 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2850 {
2851 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2852
2853 return gate_close(gate);
2854 }
2855
2856 /*
2857 * Name: lck_rw_gate_open
2858 *
2859 * Description: Opens the gate and wakes up possible waiters.
2860 *
2861 * Args:
2862 * Arg1: lck_rw_t lock used to protect the gate.
2863 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2864 *
2865 * Conditions: Lock must be held. Returns with the lock held.
2866 * The current thread must be the holder of the gate.
2867 *
2868 */
2869 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2870 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2871 {
2872 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2873
2874 gate_open(gate);
2875 }
2876
2877 /*
2878 * Name: lck_rw_gate_handoff
2879 *
2880 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2881 * priority will be selected as the new holder of the gate, and woken up,
2882 * with the gate remaining in the closed state throughout.
2883 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2884 * will be returned.
2885 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2886 * case no waiters were found.
2887 *
2888 *
2889 * Args:
2890 * Arg1: lck_rw_t lock used to protect the gate.
2891 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2892 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2893 *
2894 * Conditions: Lock must be held. Returns with the lock held.
2895 * The current thread must be the holder of the gate.
2896 *
2897 * Returns:
2898 * KERN_SUCCESS in case one of the waiters became the new holder.
2899 * KERN_NOT_WAITING in case there were no waiters.
2900 *
2901 */
2902 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2903 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2904 {
2905 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2906
2907 return gate_handoff(gate, flags);
2908 }
2909
2910 /*
2911 * Name: lck_rw_gate_steal
2912 *
2913 * Description: Set the current ownership of the gate. It sets the current thread as the
2914 * new holder of the gate.
2915 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2916 * to wake up possible waiters on the gate before returning to userspace.
2917 * NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2918 * anymore.
2919 *
2920 *
2921 * Args:
2922 * Arg1: lck_rw_t lock used to protect the gate.
2923 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2924 *
2925 * Conditions: Lock must be held. Returns with the lock held.
2926 * The gate must be closed and the current thread must not already be the holder.
2927 *
2928 */
2929 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2930 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2931 {
2932 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2933
2934 gate_steal(gate);
2935 }
2936
2937 /*
2938 * Name: lck_rw_gate_wait
2939 *
2940 * Description: Waits for the current thread to become the holder of the gate or for the
2941 * gate to become open. An interruptible mode and deadline can be specified
2942 * to return earlier from the wait.
2943 *
2944 * Args:
2945 * Arg1: lck_rw_t lock used to protect the gate.
2946 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2947 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2948 * Arg3: interruptible flag for wait.
2949 * Arg4: deadline
2950 *
2951 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2952 * Lock will be dropped while waiting.
2953 * The gate must be closed.
2954 *
2955 * Returns: Reason why the thread was woken up.
2956 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2957 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2958 * to wake up possible waiters on the gate before returning to userspace.
2959 * GATE_OPENED - the gate was opened by the holder.
2960 * GATE_TIMED_OUT - the thread was woken up by a timeout.
2961 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
2962 */
2963 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2964 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2965 {
2966 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2967
2968 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2969
2970 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2971 return gate_wait(gate,
2972 interruptible,
2973 deadline,
2974 ^{lck_rw_type = lck_rw_done(lock);},
2975 ^{;});
2976 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2977 return gate_wait(gate,
2978 interruptible,
2979 deadline,
2980 ^{lck_rw_type = lck_rw_done(lock);},
2981 ^{lck_rw_lock(lock, lck_rw_type);});
2982 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2983 return gate_wait(gate,
2984 interruptible,
2985 deadline,
2986 ^{lck_rw_type = lck_rw_done(lock);},
2987 ^{lck_rw_lock_exclusive(lock);});
2988 } else {
2989 return gate_wait(gate,
2990 interruptible,
2991 deadline,
2992 ^{lck_rw_type = lck_rw_done(lock);},
2993 ^{lck_rw_lock_shared(lock);});
2994 }
2995 }
2996
2997 /*
2998 * Name: lck_rw_gate_assert
2999 *
3000 * Description: asserts that the gate is in the specified state.
3001 *
3002 * Args:
3003 * Arg1: lck_rw_t lock used to protect the gate.
3004 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3005 * Arg3: flags to specified assert type.
3006 * GATE_ASSERT_CLOSED - the gate is currently closed
3007 * GATE_ASSERT_OPEN - the gate is currently opened
3008 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3009 */
3010 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)3011 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
3012 {
3013 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3014
3015 gate_assert(gate, flags);
3016 return;
3017 }
3018
3019 /*
3020 * Name: lck_mtx_gate_init
3021 *
3022 * Description: initializes a variable declared with decl_lck_mtx_gate_data.
3023 *
3024 * Args:
3025 * Arg1: lck_mtx_t lock used to protect the gate.
3026 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3027 */
3028 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)3029 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
3030 {
3031 (void) lock;
3032 gate_init(gate, GT_INIT_DEFAULT);
3033 }
3034
3035 /*
3036 * Name: lck_mtx_gate_alloc_init
3037 *
3038 * Description: allocates and initializes a gate_t.
3039 *
3040 * Args:
3041 * Arg1: lck_mtx_t lock used to protect the gate.
3042 *
3043 * Returns:
3044 * gate_t allocated.
3045 */
3046 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)3047 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3048 {
3049 (void) lock;
3050 return gate_alloc_init();
3051 }
3052
3053 /*
3054 * Name: lck_mtx_gate_destroy
3055 *
3056 * Description: destroys a variable previously initialized
3057 * with lck_mtx_gate_init().
3058 *
3059 * Args:
3060 * Arg1: lck_mtx_t lock used to protect the gate.
3061 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3062 */
3063 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)3064 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3065 {
3066 (void) lock;
3067 gate_destroy(gate);
3068 }
3069
3070 /*
3071 * Name: lck_mtx_gate_free
3072 *
3073 * Description: destroys and tries to free a gate previously allocated
3074 * with lck_mtx_gate_alloc_init().
3075 * The gate free might be delegated to the last thread returning
3076 * from the gate_wait().
3077 *
3078 * Args:
3079 * Arg1: lck_mtx_t lock used to protect the gate.
3080 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3081 */
3082 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3083 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3084 {
3085 (void) lock;
3086 gate_free(gate);
3087 }
3088
3089 /*
3090 * Name: lck_mtx_gate_try_close
3091 *
3092 * Description: Tries to close the gate.
3093 * In case of success the current thread will be set as
3094 * the holder of the gate.
3095 *
3096 * Args:
3097 * Arg1: lck_mtx_t lock used to protect the gate.
3098 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3099 *
3100 * Conditions: Lock must be held. Returns with the lock held.
3101 *
3102 * Returns:
3103 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3104 * of the gate.
3105 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3106 * to wake up possible waiters on the gate before returning to userspace.
3107 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3108 * between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3109 *
3110 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3111 * lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3112 * The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3113 * be done without dropping the lock that is protecting the gate in between.
3114 */
3115 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3116 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3117 {
3118 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3119
3120 return gate_try_close(gate);
3121 }
3122
3123 /*
3124 * Name: lck_mtx_gate_close
3125 *
3126 * Description: Closes the gate. The current thread will be set as
3127 * the holder of the gate. Will panic if the gate is already closed.
3128 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3129 * to wake up possible waiters on the gate before returning to userspace.
3130 *
3131 * Args:
3132 * Arg1: lck_mtx_t lock used to protect the gate.
3133 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3134 *
3135 * Conditions: Lock must be held. Returns with the lock held.
3136 * The gate must be open.
3137 *
3138 */
3139 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3140 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3141 {
3142 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3143
3144 return gate_close(gate);
3145 }
3146
3147 /*
3148 * Name: lck_mtx_gate_open
3149 *
3150 * Description: Opens of the gate and wakes up possible waiters.
3151 *
3152 * Args:
3153 * Arg1: lck_mtx_t lock used to protect the gate.
3154 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3155 *
3156 * Conditions: Lock must be held. Returns with the lock held.
3157 * The current thread must be the holder of the gate.
3158 *
3159 */
3160 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3161 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3162 {
3163 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3164
3165 gate_open(gate);
3166 }
3167
3168 /*
3169 * Name: lck_mtx_gate_handoff
3170 *
3171 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3172 * priority will be selected as the new holder of the gate, and woken up,
3173 * with the gate remaining in the closed state throughout.
3174 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3175 * will be returned.
3176 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3177 * case no waiters were found.
3178 *
3179 *
3180 * Args:
3181 * Arg1: lck_mtx_t lock used to protect the gate.
3182 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3183 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3184 *
3185 * Conditions: Lock must be held. Returns with the lock held.
3186 * The current thread must be the holder of the gate.
3187 *
3188 * Returns:
3189 * KERN_SUCCESS in case one of the waiters became the new holder.
3190 * KERN_NOT_WAITING in case there were no waiters.
3191 *
3192 */
3193 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3194 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3195 {
3196 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3197
3198 return gate_handoff(gate, flags);
3199 }
3200
3201 /*
3202 * Name: lck_mtx_gate_steal
3203 *
3204 * Description: Steals the ownership of the gate. It sets the current thread as the
3205 * new holder of the gate.
3206 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3207 * to wake up possible waiters on the gate before returning to userspace.
3208 * NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3209 * anymore.
3210 *
3211 *
3212 * Args:
3213 * Arg1: lck_mtx_t lock used to protect the gate.
3214 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3215 *
3216 * Conditions: Lock must be held. Returns with the lock held.
3217 * The gate must be closed and the current thread must not already be the holder.
3218 *
3219 */
3220 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3221 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3222 {
3223 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3224
3225 gate_steal(gate);
3226 }
3227
3228 /*
3229 * Name: lck_mtx_gate_wait
3230 *
3231 * Description: Waits for the current thread to become the holder of the gate or for the
3232 * gate to become open. An interruptible mode and deadline can be specified
3233 * to return earlier from the wait.
3234 *
3235 * Args:
3236 * Arg1: lck_mtx_t lock used to protect the gate.
3237 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3238 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3239 * Arg3: interruptible flag for wait.
3240 * Arg4: deadline
3241 *
3242 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3243 * Lock will be dropped while waiting.
3244 * The gate must be closed.
3245 *
3246 * Returns: Reason why the thread was woken up.
3247 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3248 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3249 * to wake up possible waiters on the gate before returning to userspace.
3250 * GATE_OPENED - the gate was opened by the holder.
3251 * GATE_TIMED_OUT - the thread was woken up by a timeout.
3252 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
3253 */
3254 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3255 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3256 {
3257 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3258
3259 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3260 return gate_wait(gate,
3261 interruptible,
3262 deadline,
3263 ^{lck_mtx_unlock(lock);},
3264 ^{;});
3265 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3266 return gate_wait(gate,
3267 interruptible,
3268 deadline,
3269 ^{lck_mtx_unlock(lock);},
3270 ^{lck_mtx_lock_spin(lock);});
3271 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3272 return gate_wait(gate,
3273 interruptible,
3274 deadline,
3275 ^{lck_mtx_unlock(lock);},
3276 ^{lck_mtx_lock_spin_always(lock);});
3277 } else {
3278 return gate_wait(gate,
3279 interruptible,
3280 deadline,
3281 ^{lck_mtx_unlock(lock);},
3282 ^{lck_mtx_lock(lock);});
3283 }
3284 }
3285
3286 /*
3287 * Name: lck_mtx_gate_assert
3288 *
3289 * Description: asserts that the gate is in the specified state.
3290 *
3291 * Args:
3292 * Arg1: lck_mtx_t lock used to protect the gate.
3293 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3294 * Arg3: flags to specified assert type.
3295 * GATE_ASSERT_CLOSED - the gate is currently closed
3296 * GATE_ASSERT_OPEN - the gate is currently opened
3297 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3298 */
3299 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3300 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3301 {
3302 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3303
3304 gate_assert(gate, flags);
3305 }
3306
3307 #pragma mark - LCK_*_DECLARE support
3308
3309 __startup_func
3310 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3311 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3312 {
3313 lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3314 }
3315
3316 __startup_func
3317 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3318 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3319 {
3320 if (sp->lck_ext) {
3321 lck_mtx_init_ext(sp->lck, sp->lck_ext, sp->lck_grp, sp->lck_attr);
3322 } else {
3323 lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3324 }
3325 }
3326
3327 __startup_func
3328 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3329 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3330 {
3331 lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3332 }
3333
3334 __startup_func
3335 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3336 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3337 {
3338 simple_lock_init(sp->lck, sp->lck_init_arg);
3339 }
3340