1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #define LOCK_PRIVATE 1
58
59 #include <mach_ldebug.h>
60 #include <debug.h>
61
62 #include <mach/kern_return.h>
63
64 #include <kern/locks_internal.h>
65 #include <kern/lock_stat.h>
66 #include <kern/locks.h>
67 #include <kern/misc_protos.h>
68 #include <kern/zalloc.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/sched_prim.h>
72 #include <kern/debug.h>
73 #include <libkern/section_keywords.h>
74 #if defined(__x86_64__)
75 #include <i386/tsc.h>
76 #include <i386/machine_routines.h>
77 #endif
78 #include <machine/atomic.h>
79 #include <machine/machine_cpu.h>
80 #include <string.h>
81 #include <vm/pmap.h>
82
83 #include <sys/kdebug.h>
84
85 #define LCK_MTX_SLEEP_CODE 0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
87 #define LCK_MTX_LCK_WAIT_CODE 2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
89
90 // Panic in tests that check lock usage correctness
91 // These are undesirable when in a panic or a debugger is runnning.
92 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93
94 #if MACH_LDEBUG
95 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96 #else
97 #define ALIGN_TEST(p, t) do{}while(0)
98 #endif
99
100 #define NOINLINE __attribute__((noinline))
101
102 #define ordered_load_hw(lock) os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103 #define ordered_store_hw(lock, value) os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104
105 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106
107 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109
110 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111
112 struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113
114 #if CONFIG_PV_TICKET
115 SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; /* used by waitq.py */
116 #endif
117
118 #if DEBUG
119 TUNABLE(uint32_t, LcksOpts, "lcks", enaLkDeb);
120 #else
121 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
122 #endif
123
124 #if CONFIG_DTRACE
125 #if defined (__x86_64__)
126 machine_timeout_t dtrace_spin_threshold = 500; // 500ns
127 #elif defined(__arm64__)
128 MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129 0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130 #endif
131 #endif
132
133 __kdebug_only
134 uintptr_t
unslide_for_kdebug(void * object)135 unslide_for_kdebug(void* object)
136 {
137 if (__improbable(kdebug_enable)) {
138 return VM_KERNEL_UNSLIDE_OR_PERM(object);
139 } else {
140 return 0;
141 }
142 }
143
144 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)145 __lck_require_preemption_disabled_panic(void *lock)
146 {
147 panic("Attempt to take no-preempt lock %p in preemptible context", lock);
148 }
149
150 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)151 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
152 {
153 if (__improbable(!lock_preemption_disabled_for_thread(self))) {
154 __lck_require_preemption_disabled_panic(lock);
155 }
156 }
157
158 #pragma mark - HW Spin policies
159
160 /*
161 * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
162 */
163 __attribute__((always_inline))
164 hw_spin_timeout_t
hw_spin_compute_timeout(hw_spin_policy_t pol)165 hw_spin_compute_timeout(hw_spin_policy_t pol)
166 {
167 hw_spin_timeout_t ret = {
168 .hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
169 };
170
171 ret.hwst_timeout <<= pol->hwsp_timeout_shift;
172 #if SCHED_HYGIENE_DEBUG
173 ret.hwst_in_ppl = pmap_in_ppl();
174 /* Note we can't check if we are interruptible if in ppl */
175 ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
176 #endif /* SCHED_HYGIENE_DEBUG */
177
178 #if SCHED_HYGIENE_DEBUG
179 #ifndef KASAN
180 if (ret.hwst_timeout > 0 &&
181 !ret.hwst_in_ppl &&
182 !ret.hwst_interruptible &&
183 interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
184 uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
185
186 #if defined(__x86_64__)
187 int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
188 #endif
189 if (int_timeout < ret.hwst_timeout) {
190 ret.hwst_timeout = int_timeout;
191 }
192 }
193 #endif /* !KASAN */
194 #endif /* SCHED_HYGIENE_DEBUG */
195
196 return ret;
197 }
198
199 __attribute__((always_inline))
200 bool
hw_spin_in_ppl(hw_spin_timeout_t to)201 hw_spin_in_ppl(hw_spin_timeout_t to)
202 {
203 #if SCHED_HYGIENE_DEBUG
204 return to.hwst_in_ppl;
205 #else
206 (void)to;
207 return pmap_in_ppl();
208 #endif
209 }
210
211 bool
hw_spin_should_keep_spinning(void * lock,hw_spin_policy_t pol,hw_spin_timeout_t to,hw_spin_state_t * state)212 hw_spin_should_keep_spinning(
213 void *lock,
214 hw_spin_policy_t pol,
215 hw_spin_timeout_t to,
216 hw_spin_state_t *state)
217 {
218 hw_spin_timeout_status_t rc;
219 #if SCHED_HYGIENE_DEBUG
220 uint64_t irq_time = 0;
221 #endif
222 uint64_t now;
223
224 if (__improbable(to.hwst_timeout == 0)) {
225 return true;
226 }
227
228 now = ml_get_timebase();
229 if (__probable(now < state->hwss_deadline)) {
230 /* keep spinning */
231 return true;
232 }
233
234 #if SCHED_HYGIENE_DEBUG
235 if (to.hwst_interruptible) {
236 irq_time = current_thread()->machine.int_time_mt;
237 }
238 #endif /* SCHED_HYGIENE_DEBUG */
239
240 if (__probable(state->hwss_deadline == 0)) {
241 state->hwss_start = now;
242 state->hwss_deadline = now + to.hwst_timeout;
243 #if SCHED_HYGIENE_DEBUG
244 state->hwss_irq_start = irq_time;
245 #endif
246 return true;
247 }
248
249 /*
250 * Update fields that the callback needs
251 */
252 state->hwss_now = now;
253 #if SCHED_HYGIENE_DEBUG
254 state->hwss_irq_end = irq_time;
255 #endif /* SCHED_HYGIENE_DEBUG */
256
257 rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
258 to, *state);
259 if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
260 /* push the deadline */
261 state->hwss_deadline += to.hwst_timeout;
262 }
263 return rc == HW_LOCK_TIMEOUT_CONTINUE;
264 }
265
266 __attribute__((always_inline))
267 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)268 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
269 {
270 #if DEBUG || DEVELOPMENT
271 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
272 #else
273 (void)owner;
274 #endif
275 }
276
277 __attribute__((always_inline))
278 void
lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)279 lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
280 {
281 #if DEBUG || DEVELOPMENT
282 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
283 (uintptr_t)ctid_get_thread_unsafe(ctid);
284 #else
285 (void)ctid;
286 #endif
287 }
288
289 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)290 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
291 {
292 lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
293
294 if (owner < (1u << CTID_SIZE_BIT)) {
295 owner = (uintptr_t)ctid_get_thread_unsafe((uint32_t)owner);
296 } else {
297 /* strip possible bits used by the lock implementations */
298 owner &= ~0x7ul;
299 }
300
301 lsti->lock = lck;
302 lsti->owner_thread_cur = owner;
303 lsti->owner_cpu = ~0u;
304 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
305
306 if (owner == 0) {
307 /* if the owner isn't known, just bail */
308 goto out;
309 }
310
311 for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
312 cpu_data_t *data = cpu_datap(i);
313 if (data && (uintptr_t)data->cpu_active_thread == owner) {
314 lsti->owner_cpu = i;
315 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
316 #if __x86_64__
317 if ((uint32_t)cpu_number() != i) {
318 /* Cause NMI and panic on the owner's cpu */
319 NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
320 }
321 #endif
322 break;
323 }
324 }
325
326 out:
327 return lsti;
328 }
329
330 #pragma mark - HW locks
331
332 /*
333 * Routine: hw_lock_init
334 *
335 * Initialize a hardware lock.
336 */
337 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)338 hw_lock_init(hw_lock_t lock)
339 {
340 ordered_store_hw(lock, 0);
341 }
342
343 __result_use_check
344 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)345 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
346 {
347 #if OS_ATOMIC_USE_LLSC
348 uintptr_t oldval;
349 os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
350 if (oldval != 0) {
351 wait_for_event(); // clears the monitor so we don't need give_up()
352 return false;
353 }
354 });
355 return true;
356 #else // !OS_ATOMIC_USE_LLSC
357 #if OS_ATOMIC_HAS_LLSC
358 uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
359 if (oldval != 0) {
360 wait_for_event(); // clears the monitor so we don't need give_up()
361 return false;
362 }
363 #endif
364 return lock_cmpxchg(&lock->lock_data, 0, newval, acquire);
365 #endif // !OS_ATOMIC_USE_LLSC
366 }
367
368 __result_use_check
369 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)370 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
371 {
372 uint32_t mask = 1u << bit;
373
374 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
375 uint32_t oldval, newval;
376 os_atomic_rmw_loop(target, oldval, newval, acquire, {
377 newval = oldval | mask;
378 if (__improbable(oldval & mask)) {
379 #if OS_ATOMIC_HAS_LLSC
380 if (wait) {
381 wait_for_event(); // clears the monitor so we don't need give_up()
382 } else {
383 os_atomic_clear_exclusive();
384 }
385 #else
386 if (wait) {
387 cpu_pause();
388 }
389 #endif
390 return false;
391 }
392 });
393 return true;
394 #else
395 uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
396 if (__improbable(oldval & mask)) {
397 if (wait) {
398 wait_for_event(); // clears the monitor so we don't need give_up()
399 } else {
400 os_atomic_clear_exclusive();
401 }
402 return false;
403 }
404 return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
405 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
406 }
407
408 static hw_spin_timeout_status_t
hw_spin_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)409 hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
410 {
411 hw_lock_t lock = _lock;
412 uintptr_t owner = lock->lock_data & ~0x7ul;
413 lck_spinlock_to_info_t lsti;
414
415 if (!spinlock_timeout_panic) {
416 /* keep spinning rather than panicing */
417 return HW_LOCK_TIMEOUT_CONTINUE;
418 }
419
420 if (pmap_in_ppl()) {
421 /*
422 * This code is used by the PPL and can't write to globals.
423 */
424 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
425 "current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
426 lock, HW_SPIN_TIMEOUT_ARG(to, st),
427 (void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
428 }
429
430 // Capture the actual time spent blocked, which may be higher than the timeout
431 // if a misbehaving interrupt stole this thread's CPU time.
432 lsti = lck_spinlock_timeout_hit(lock, owner);
433 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
434 "current owner: %p (on cpu %d), "
435 #if DEBUG || DEVELOPMENT
436 "initial owner: %p, "
437 #endif /* DEBUG || DEVELOPMENT */
438 HW_SPIN_TIMEOUT_DETAILS_FMT,
439 lock, HW_SPIN_TIMEOUT_ARG(to, st),
440 (void *)lsti->owner_thread_cur, lsti->owner_cpu,
441 #if DEBUG || DEVELOPMENT
442 (void *)lsti->owner_thread_orig,
443 #endif /* DEBUG || DEVELOPMENT */
444 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
445 }
446
447 const struct hw_spin_policy hw_lock_spin_policy = {
448 .hwsp_name = "hw_lock_t",
449 .hwsp_timeout_atomic = &lock_panic_timeout,
450 .hwsp_op_timeout = hw_spin_timeout_panic,
451 };
452
453 static hw_spin_timeout_status_t
hw_spin_always_return(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)454 hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
455 {
456 #pragma unused(_lock, to, st)
457 return HW_LOCK_TIMEOUT_RETURN;
458 }
459
460 const struct hw_spin_policy hw_lock_spin_panic_policy = {
461 .hwsp_name = "hw_lock_t[panic]",
462 #if defined(__x86_64__)
463 .hwsp_timeout = &LockTimeOutTSC,
464 #else
465 .hwsp_timeout_atomic = &LockTimeOut,
466 #endif
467 .hwsp_timeout_shift = 2,
468 .hwsp_op_timeout = hw_spin_always_return,
469 };
470
471 #if DEBUG || DEVELOPMENT
472 static machine_timeout_t hw_lock_test_to;
473 const struct hw_spin_policy hw_lock_test_give_up_policy = {
474 .hwsp_name = "testing policy",
475 #if defined(__x86_64__)
476 .hwsp_timeout = &LockTimeOutTSC,
477 #else
478 .hwsp_timeout_atomic = &LockTimeOut,
479 #endif
480 .hwsp_timeout_shift = 2,
481 .hwsp_op_timeout = hw_spin_always_return,
482 };
483
484 __startup_func
485 static void
hw_lock_test_to_init(void)486 hw_lock_test_to_init(void)
487 {
488 uint64_t timeout;
489
490 nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &timeout);
491 #if defined(__x86_64__)
492 timeout = tmrCvt(timeout, tscFCvtn2t);
493 #endif
494 os_atomic_init(&hw_lock_test_to, timeout);
495 }
496 STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
497 #endif
498
499 static hw_spin_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)500 hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
501 {
502 hw_lock_bit_t *lock = _lock;
503
504 if (!spinlock_timeout_panic) {
505 /* keep spinning rather than panicing */
506 return HW_LOCK_TIMEOUT_CONTINUE;
507 }
508
509 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
510 "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
511 lock, HW_SPIN_TIMEOUT_ARG(to, st),
512 *lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
513 }
514
515 static const struct hw_spin_policy hw_lock_bit_policy = {
516 .hwsp_name = "hw_lock_bit_t",
517 .hwsp_timeout_atomic = &lock_panic_timeout,
518 .hwsp_op_timeout = hw_lock_bit_timeout_panic,
519 };
520
521 #if __arm64__
522 const uint64_t hw_lock_bit_timeout_2s = 0x3000000;
523 const struct hw_spin_policy hw_lock_bit_policy_2s = {
524 .hwsp_name = "hw_lock_bit_t",
525 .hwsp_timeout = &hw_lock_bit_timeout_2s,
526 .hwsp_op_timeout = hw_lock_bit_timeout_panic,
527 };
528 #endif
529
530 /*
531 * Routine: hw_lock_lock_contended
532 *
533 * Spin until lock is acquired or timeout expires.
534 * timeout is in mach_absolute_time ticks. Called with
535 * preemption disabled.
536 */
537 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,uintptr_t data,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))538 hw_lock_lock_contended(
539 hw_lock_t lock,
540 uintptr_t data,
541 hw_spin_policy_t pol
542 LCK_GRP_ARG(lck_grp_t *grp))
543 {
544 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
545 hw_spin_state_t state = { };
546 hw_lock_status_t rc = HW_LOCK_CONTENDED;
547
548 if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
549 HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
550 panic("hwlock: thread %p is trying to lock %p recursively",
551 HW_LOCK_STATE_TO_THREAD(data), lock);
552 }
553
554 #if CONFIG_DTRACE || LOCK_STATS
555 uint64_t begin = 0;
556 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
557
558 if (__improbable(stat_enabled)) {
559 begin = mach_absolute_time();
560 }
561 #endif /* CONFIG_DTRACE || LOCK_STATS */
562
563 if (!hw_spin_in_ppl(to)) {
564 /*
565 * This code is used by the PPL and can't write to globals.
566 */
567 lck_spinlock_timeout_set_orig_owner(lock->lock_data);
568 }
569
570 do {
571 for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
572 cpu_pause();
573 if (hw_lock_trylock_contended(lock, data)) {
574 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
575 rc = HW_LOCK_ACQUIRED;
576 goto end;
577 }
578 }
579 } while (hw_spin_should_keep_spinning(lock, pol, to, &state));
580
581 end:
582 #if CONFIG_DTRACE || LOCK_STATS
583 if (__improbable(stat_enabled)) {
584 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
585 mach_absolute_time() - begin);
586 }
587 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
588 #endif /* CONFIG_DTRACE || LOCK_STATS */
589 return rc;
590 }
591
592 static hw_spin_timeout_status_t
hw_wait_while_equals32_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)593 hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
594 {
595 uint32_t *address = _lock;
596
597 if (!spinlock_timeout_panic) {
598 /* keep spinning rather than panicing */
599 return HW_LOCK_TIMEOUT_CONTINUE;
600 }
601
602 panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
603 "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
604 address, HW_SPIN_TIMEOUT_ARG(to, st),
605 *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
606 }
607
608 static const struct hw_spin_policy hw_wait_while_equals32_policy = {
609 .hwsp_name = "hw_wait_while_equals32",
610 .hwsp_timeout_atomic = &lock_panic_timeout,
611 .hwsp_op_timeout = hw_wait_while_equals32_panic,
612 };
613
614 static hw_spin_timeout_status_t
hw_wait_while_equals64_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)615 hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
616 {
617 uint64_t *address = _lock;
618
619 if (!spinlock_timeout_panic) {
620 /* keep spinning rather than panicing */
621 return HW_LOCK_TIMEOUT_CONTINUE;
622 }
623
624 panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
625 "current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
626 address, HW_SPIN_TIMEOUT_ARG(to, st),
627 *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
628 }
629
630 static const struct hw_spin_policy hw_wait_while_equals64_policy = {
631 .hwsp_name = "hw_wait_while_equals64",
632 .hwsp_timeout_atomic = &lock_panic_timeout,
633 .hwsp_op_timeout = hw_wait_while_equals64_panic,
634 };
635
636 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)637 hw_wait_while_equals32(uint32_t *address, uint32_t current)
638 {
639 hw_spin_policy_t pol = &hw_wait_while_equals32_policy;
640 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
641 hw_spin_state_t state = { };
642 uint32_t v;
643
644 while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
645 hw_spin_should_keep_spinning(address, pol, to, &state);
646 }
647
648 return v;
649 }
650
651 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)652 hw_wait_while_equals64(uint64_t *address, uint64_t current)
653 {
654 hw_spin_policy_t pol = &hw_wait_while_equals64_policy;
655 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
656 hw_spin_state_t state = { };
657 uint64_t v;
658
659 while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
660 hw_spin_should_keep_spinning(address, pol, to, &state);
661 }
662
663 return v;
664 }
665
666 __result_use_check
667 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))668 hw_lock_to_internal(
669 hw_lock_t lock,
670 thread_t thread,
671 hw_spin_policy_t pol
672 LCK_GRP_ARG(lck_grp_t *grp))
673 {
674 uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
675
676 if (__probable(hw_lock_trylock_contended(lock, state))) {
677 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
678 return HW_LOCK_ACQUIRED;
679 }
680
681 return hw_lock_lock_contended(lock, state, pol LCK_GRP_ARG(grp));
682 }
683
684 /*
685 * Routine: hw_lock_lock
686 *
687 * Acquire lock, spinning until it becomes available,
688 * return with preemption disabled.
689 */
690 void
691 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
692 {
693 thread_t thread = current_thread();
694 lock_disable_preemption_for_thread(thread);
695 (void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
696 LCK_GRP_ARG(grp));
697 }
698
699 /*
700 * Routine: hw_lock_lock_nopreempt
701 *
702 * Acquire lock, spinning until it becomes available.
703 */
704 void
705 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
706 {
707 thread_t thread = current_thread();
708 __lck_require_preemption_disabled(lock, thread);
709 (void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
710 LCK_GRP_ARG(grp));
711 }
712
713 /*
714 * Routine: hw_lock_to
715 *
716 * Acquire lock, spinning until it becomes available or timeout.
717 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
718 * preemption disabled.
719 */
720 unsigned
721 int
722 (hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
723 {
724 thread_t thread = current_thread();
725 lock_disable_preemption_for_thread(thread);
726 return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
727 }
728
729 /*
730 * Routine: hw_lock_to_nopreempt
731 *
732 * Acquire lock, spinning until it becomes available or timeout.
733 * Timeout is in mach_absolute_time ticks, called and return with
734 * preemption disabled.
735 */
736 unsigned
737 int
738 (hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
739 {
740 thread_t thread = current_thread();
741 __lck_require_preemption_disabled(lock, thread);
742 return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
743 }
744
745 __result_use_check
746 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))747 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
748 {
749 if (__probable(lock_cmpxchg(&lock->lock_data, 0,
750 HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
751 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
752 return true;
753 }
754 return false;
755 }
756
757 /*
758 * Routine: hw_lock_try
759 *
760 * returns with preemption disabled on success.
761 */
762 unsigned
763 int
764 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
765 {
766 thread_t thread = current_thread();
767 lock_disable_preemption_for_thread(thread);
768 unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
769 if (!success) {
770 lock_enable_preemption();
771 }
772 return success;
773 }
774
775 unsigned
776 int
777 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
778 {
779 thread_t thread = current_thread();
780 __lck_require_preemption_disabled(lock, thread);
781 return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
782 }
783
784 #if DEBUG || DEVELOPMENT
785 __abortlike
786 static void
__hw_lock_unlock_unowned_panic(hw_lock_t lock)787 __hw_lock_unlock_unowned_panic(hw_lock_t lock)
788 {
789 panic("hwlock: thread %p is trying to lock %p recursively",
790 current_thread(), lock);
791 }
792 #endif /* DEBUG || DEVELOPMENT */
793
794 /*
795 * Routine: hw_lock_unlock
796 *
797 * Unconditionally release lock, release preemption level.
798 */
799 static inline void
hw_lock_unlock_internal(hw_lock_t lock)800 hw_lock_unlock_internal(hw_lock_t lock)
801 {
802 #if DEBUG || DEVELOPMENT
803 if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
804 LOCK_CORRECTNESS_PANIC()) {
805 __hw_lock_unlock_unowned_panic(lock);
806 }
807 #endif /* DEBUG || DEVELOPMENT */
808
809 os_atomic_store(&lock->lock_data, 0, release);
810 #if CONFIG_DTRACE
811 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
812 #endif /* CONFIG_DTRACE */
813 }
814
815 void
816 (hw_lock_unlock)(hw_lock_t lock)
817 {
818 hw_lock_unlock_internal(lock);
819 lock_enable_preemption();
820 }
821
822 void
823 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
824 {
825 hw_lock_unlock_internal(lock);
826 }
827
828 /*
829 * Routine hw_lock_held, doesn't change preemption state.
830 * N.B. Racy, of course.
831 */
832 unsigned int
hw_lock_held(hw_lock_t lock)833 hw_lock_held(hw_lock_t lock)
834 {
835 return ordered_load_hw(lock) != 0;
836 }
837
838 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))839 hw_lock_bit_to_contended(
840 hw_lock_bit_t *lock,
841 uint32_t bit,
842 hw_spin_policy_t pol
843 LCK_GRP_ARG(lck_grp_t *grp))
844 {
845 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
846 hw_spin_state_t state = { };
847 hw_lock_status_t rc = HW_LOCK_CONTENDED;
848
849 #if CONFIG_DTRACE || LOCK_STATS
850 uint64_t begin = 0;
851 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
852
853 if (__improbable(stat_enabled)) {
854 begin = mach_absolute_time();
855 }
856 #endif /* LOCK_STATS || CONFIG_DTRACE */
857
858 do {
859 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
860 rc = hw_lock_trylock_bit(lock, bit, true);
861
862 if (rc == HW_LOCK_ACQUIRED) {
863 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
864 goto end;
865 }
866 }
867
868 assert(rc == HW_LOCK_CONTENDED);
869 } while (hw_spin_should_keep_spinning(lock, pol, to, &state));
870
871 end:
872 #if CONFIG_DTRACE || LOCK_STATS
873 if (__improbable(stat_enabled)) {
874 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
875 mach_absolute_time() - begin);
876 }
877 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
878 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
879 return rc;
880 }
881
882 __result_use_check
883 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))884 hw_lock_bit_to_internal(
885 hw_lock_bit_t *lock,
886 unsigned int bit,
887 hw_spin_policy_t pol
888 LCK_GRP_ARG(lck_grp_t *grp))
889 {
890 if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
891 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
892 return HW_LOCK_ACQUIRED;
893 }
894
895 return (unsigned)hw_lock_bit_to_contended(lock, bit, pol LCK_GRP_ARG(grp));
896 }
897
898 /*
899 * Routine: hw_lock_bit_to
900 *
901 * Acquire bit lock, spinning until it becomes available or timeout.
902 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
903 * preemption disabled.
904 */
905 unsigned
906 int
907 (hw_lock_bit_to)(
908 hw_lock_bit_t * lock,
909 uint32_t bit,
910 hw_spin_policy_t pol
911 LCK_GRP_ARG(lck_grp_t *grp))
912 {
913 _disable_preemption();
914 return hw_lock_bit_to_internal(lock, bit, pol LCK_GRP_ARG(grp));
915 }
916
917 /*
918 * Routine: hw_lock_bit
919 *
920 * Acquire bit lock, spinning until it becomes available,
921 * return with preemption disabled.
922 */
923 void
924 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
925 {
926 _disable_preemption();
927 (void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
928 }
929
930 /*
931 * Routine: hw_lock_bit_nopreempt
932 *
933 * Acquire bit lock, spinning until it becomes available.
934 */
935 void
936 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
937 {
938 __lck_require_preemption_disabled(lock, current_thread());
939 (void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
940 }
941
942
943 unsigned
944 int
945 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
946 {
947 boolean_t success = false;
948
949 _disable_preemption();
950 success = hw_lock_trylock_bit(lock, bit, false);
951 if (!success) {
952 lock_enable_preemption();
953 }
954
955 if (success) {
956 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
957 }
958
959 return success;
960 }
961
962 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)963 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
964 {
965 os_atomic_andnot(lock, 1u << bit, release);
966 #if CONFIG_DTRACE
967 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
968 #endif
969 }
970
971 /*
972 * Routine: hw_unlock_bit
973 *
974 * Release spin-lock. The second parameter is the bit number to test and set.
975 * Decrement the preemption level.
976 */
977 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)978 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
979 {
980 hw_unlock_bit_internal(lock, bit);
981 lock_enable_preemption();
982 }
983
984 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)985 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
986 {
987 __lck_require_preemption_disabled(lock, current_thread());
988 hw_unlock_bit_internal(lock, bit);
989 }
990
991
992 #pragma mark - lck_*_sleep
993
994 /*
995 * Routine: lck_spin_sleep
996 */
997 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)998 lck_spin_sleep_grp(
999 lck_spin_t *lck,
1000 lck_sleep_action_t lck_sleep_action,
1001 event_t event,
1002 wait_interrupt_t interruptible,
1003 lck_grp_t *grp)
1004 {
1005 wait_result_t res;
1006
1007 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1008 panic("Invalid lock sleep action %x", lck_sleep_action);
1009 }
1010
1011 res = assert_wait(event, interruptible);
1012 if (res == THREAD_WAITING) {
1013 lck_spin_unlock(lck);
1014 res = thread_block(THREAD_CONTINUE_NULL);
1015 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1016 lck_spin_lock_grp(lck, grp);
1017 }
1018 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1019 lck_spin_unlock(lck);
1020 }
1021
1022 return res;
1023 }
1024
1025 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1026 lck_spin_sleep(
1027 lck_spin_t *lck,
1028 lck_sleep_action_t lck_sleep_action,
1029 event_t event,
1030 wait_interrupt_t interruptible)
1031 {
1032 return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1033 }
1034
1035 /*
1036 * Routine: lck_spin_sleep_deadline
1037 */
1038 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1039 lck_spin_sleep_deadline(
1040 lck_spin_t *lck,
1041 lck_sleep_action_t lck_sleep_action,
1042 event_t event,
1043 wait_interrupt_t interruptible,
1044 uint64_t deadline)
1045 {
1046 wait_result_t res;
1047
1048 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1049 panic("Invalid lock sleep action %x", lck_sleep_action);
1050 }
1051
1052 res = assert_wait_deadline(event, interruptible, deadline);
1053 if (res == THREAD_WAITING) {
1054 lck_spin_unlock(lck);
1055 res = thread_block(THREAD_CONTINUE_NULL);
1056 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1057 lck_spin_lock(lck);
1058 }
1059 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1060 lck_spin_unlock(lck);
1061 }
1062
1063 return res;
1064 }
1065
1066 /*
1067 * Routine: lck_mtx_sleep
1068 */
1069 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1070 lck_mtx_sleep(
1071 lck_mtx_t *lck,
1072 lck_sleep_action_t lck_sleep_action,
1073 event_t event,
1074 wait_interrupt_t interruptible)
1075 {
1076 wait_result_t res;
1077 thread_pri_floor_t token;
1078
1079 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1080 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1081
1082 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1083 panic("Invalid lock sleep action %x", lck_sleep_action);
1084 }
1085
1086 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1087 /*
1088 * We get a priority floor
1089 * during the time that this thread is asleep, so that when it
1090 * is re-awakened (and not yet contending on the mutex), it is
1091 * runnable at a reasonably high priority.
1092 */
1093 token = thread_priority_floor_start();
1094 }
1095
1096 res = assert_wait(event, interruptible);
1097 if (res == THREAD_WAITING) {
1098 lck_mtx_unlock(lck);
1099 res = thread_block(THREAD_CONTINUE_NULL);
1100 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1101 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1102 lck_mtx_lock_spin(lck);
1103 } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1104 lck_mtx_lock_spin_always(lck);
1105 } else {
1106 lck_mtx_lock(lck);
1107 }
1108 }
1109 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1110 lck_mtx_unlock(lck);
1111 }
1112
1113 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1114 thread_priority_floor_end(&token);
1115 }
1116
1117 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1118
1119 return res;
1120 }
1121
1122
1123 /*
1124 * Routine: lck_mtx_sleep_deadline
1125 */
1126 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1127 lck_mtx_sleep_deadline(
1128 lck_mtx_t *lck,
1129 lck_sleep_action_t lck_sleep_action,
1130 event_t event,
1131 wait_interrupt_t interruptible,
1132 uint64_t deadline)
1133 {
1134 wait_result_t res;
1135 thread_pri_floor_t token;
1136
1137 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1138 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1139
1140 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1141 panic("Invalid lock sleep action %x", lck_sleep_action);
1142 }
1143
1144 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1145 /*
1146 * See lck_mtx_sleep().
1147 */
1148 token = thread_priority_floor_start();
1149 }
1150
1151 res = assert_wait_deadline(event, interruptible, deadline);
1152 if (res == THREAD_WAITING) {
1153 lck_mtx_unlock(lck);
1154 res = thread_block(THREAD_CONTINUE_NULL);
1155 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1156 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1157 lck_mtx_lock_spin(lck);
1158 } else {
1159 lck_mtx_lock(lck);
1160 }
1161 }
1162 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1163 lck_mtx_unlock(lck);
1164 }
1165
1166 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1167 thread_priority_floor_end(&token);
1168 }
1169
1170 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1171
1172 return res;
1173 }
1174
1175 /*
1176 * sleep_with_inheritor and wakeup_with_inheritor KPI
1177 *
1178 * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1179 * the latest thread specified as inheritor.
1180 *
1181 * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1182 * direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1183 * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1184 *
1185 * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1186 *
1187 * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1188 * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1189 * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1190 * invoking any turnstile operation.
1191 *
1192 * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1193 * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1194 * is instantiated for this KPI to manage the hash without interrupt disabled.
1195 * Also:
1196 * - all events on the system that hash on the same bucket will contend on the same spinlock.
1197 * - every event will have a dedicated wait_queue.
1198 *
1199 * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1200 * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1201 */
1202
1203 static kern_return_t
wakeup_with_inheritor_and_turnstile(event_t event,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1204 wakeup_with_inheritor_and_turnstile(
1205 event_t event,
1206 wait_result_t result,
1207 bool wake_one,
1208 lck_wake_action_t action,
1209 thread_t *thread_wokenup)
1210 {
1211 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1212 uint32_t index;
1213 struct turnstile *ts = NULL;
1214 kern_return_t ret = KERN_NOT_WAITING;
1215 thread_t wokeup;
1216
1217 /*
1218 * the hash bucket spinlock is used as turnstile interlock
1219 */
1220 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1221
1222 ts = turnstile_prepare_hash((uintptr_t)event, type);
1223
1224 if (wake_one) {
1225 waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1226
1227 if (action == LCK_WAKE_DEFAULT) {
1228 flags = WAITQ_UPDATE_INHERITOR;
1229 } else {
1230 assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1231 }
1232
1233 /*
1234 * WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1235 * if it finds a thread
1236 */
1237 wokeup = waitq_wakeup64_identify(&ts->ts_waitq,
1238 CAST_EVENT64_T(event), result, flags);
1239 if (wokeup != NULL) {
1240 if (thread_wokenup != NULL) {
1241 *thread_wokenup = wokeup;
1242 } else {
1243 thread_deallocate_safe(wokeup);
1244 }
1245 ret = KERN_SUCCESS;
1246 if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1247 goto complete;
1248 }
1249 } else {
1250 if (thread_wokenup != NULL) {
1251 *thread_wokenup = NULL;
1252 }
1253 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1254 ret = KERN_NOT_WAITING;
1255 }
1256 } else {
1257 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event),
1258 result, WAITQ_UPDATE_INHERITOR);
1259 }
1260
1261 /*
1262 * turnstile_update_inheritor_complete could be called while holding the interlock.
1263 * In this case the new inheritor or is null, or is a thread that is just been woken up
1264 * and have not blocked because it is racing with the same interlock used here
1265 * after the wait.
1266 * So there is no chain to update for the new inheritor.
1267 *
1268 * However unless the current thread is the old inheritor,
1269 * old inheritor can be blocked and requires a chain update.
1270 *
1271 * The chain should be short because kernel turnstiles cannot have user turnstiles
1272 * chained after them.
1273 *
1274 * We can anyway optimize this by asking turnstile to tell us
1275 * if old inheritor needs an update and drop the lock
1276 * just in that case.
1277 */
1278 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1279
1280 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1281
1282 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1283
1284 complete:
1285 turnstile_complete_hash((uintptr_t)event, type);
1286
1287 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1288
1289 turnstile_cleanup();
1290
1291 return ret;
1292 }
1293
1294 static wait_result_t
1295 sleep_with_inheritor_and_turnstile(
1296 event_t event,
1297 thread_t inheritor,
1298 wait_interrupt_t interruptible,
1299 uint64_t deadline,
1300 void (^primitive_lock)(void),
1301 void (^primitive_unlock)(void))
1302 {
1303 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1304 wait_result_t ret;
1305 uint32_t index;
1306 struct turnstile *ts = NULL;
1307
1308 /*
1309 * the hash bucket spinlock is used as turnstile interlock,
1310 * lock it before releasing the primitive lock
1311 */
1312 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1313
1314 primitive_unlock();
1315
1316 ts = turnstile_prepare_hash((uintptr_t)event, type);
1317
1318 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1319 /*
1320 * We need TURNSTILE_DELAYED_UPDATE because we will call
1321 * waitq_assert_wait64 after.
1322 */
1323 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1324
1325 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1326
1327 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1328
1329 /*
1330 * Update new and old inheritor chains outside the interlock;
1331 */
1332 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1333
1334 if (ret == THREAD_WAITING) {
1335 ret = thread_block(THREAD_CONTINUE_NULL);
1336 }
1337
1338 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1339
1340 turnstile_complete_hash((uintptr_t)event, type);
1341
1342 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1343
1344 turnstile_cleanup();
1345
1346 primitive_lock();
1347
1348 return ret;
1349 }
1350
1351 /*
1352 * change_sleep_inheritor is independent from the locking primitive.
1353 */
1354
1355 /*
1356 * Name: change_sleep_inheritor
1357 *
1358 * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1359 *
1360 * Args:
1361 * Arg1: event to redirect the push.
1362 * Arg2: new inheritor for event.
1363 *
1364 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1365 *
1366 * Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1367 * wakeup for the event is called.
1368 * NOTE: this cannot be called from interrupt context.
1369 */
1370 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1371 change_sleep_inheritor(event_t event, thread_t inheritor)
1372 {
1373 uint32_t index;
1374 struct turnstile *ts = NULL;
1375 kern_return_t ret = KERN_SUCCESS;
1376 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1377
1378 /*
1379 * the hash bucket spinlock is used as turnstile interlock
1380 */
1381 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1382
1383 ts = turnstile_prepare_hash((uintptr_t)event, type);
1384
1385 if (!turnstile_has_waiters(ts)) {
1386 ret = KERN_NOT_WAITING;
1387 }
1388
1389 /*
1390 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1391 */
1392 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1393
1394 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1395
1396 /*
1397 * update the chains outside the interlock
1398 */
1399 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1400
1401 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1402
1403 turnstile_complete_hash((uintptr_t)event, type);
1404
1405 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1406
1407 turnstile_cleanup();
1408
1409 return ret;
1410 }
1411
1412 /*
1413 * Name: lck_spin_sleep_with_inheritor
1414 *
1415 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1416 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1417 * be directed to the inheritor specified.
1418 * An interruptible mode and deadline can be specified to return earlier from the wait.
1419 *
1420 * Args:
1421 * Arg1: lck_spin_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1422 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1423 * Arg3: event to wait on.
1424 * Arg4: thread to propagate the event push to.
1425 * Arg5: interruptible flag for wait.
1426 * Arg6: deadline for wait.
1427 *
1428 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1429 * Lock will be dropped while waiting.
1430 * The inheritor specified cannot return to user space or exit until another inheritor is specified for the event or a
1431 * wakeup for the event is called.
1432 *
1433 * Returns: result of the wait.
1434 */
1435 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1436 lck_spin_sleep_with_inheritor(
1437 lck_spin_t *lock,
1438 lck_sleep_action_t lck_sleep_action,
1439 event_t event,
1440 thread_t inheritor,
1441 wait_interrupt_t interruptible,
1442 uint64_t deadline)
1443 {
1444 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1445 return sleep_with_inheritor_and_turnstile(event, inheritor,
1446 interruptible, deadline,
1447 ^{}, ^{ lck_spin_unlock(lock); });
1448 } else {
1449 return sleep_with_inheritor_and_turnstile(event, inheritor,
1450 interruptible, deadline,
1451 ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1452 }
1453 }
1454
1455 /*
1456 * Name: lck_ticket_sleep_with_inheritor
1457 *
1458 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1459 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1460 * be directed to the inheritor specified.
1461 * An interruptible mode and deadline can be specified to return earlier from the wait.
1462 *
1463 * Args:
1464 * Arg1: lck_ticket_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1465 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1466 * Arg3: event to wait on.
1467 * Arg4: thread to propagate the event push to.
1468 * Arg5: interruptible flag for wait.
1469 * Arg6: deadline for wait.
1470 *
1471 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1472 * Lock will be dropped while waiting.
1473 * The inheritor specified cannot return to user space or exit until another inheritor is specified for the event or a
1474 * wakeup for the event is called.
1475 *
1476 * Returns: result of the wait.
1477 */
1478 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1479 lck_ticket_sleep_with_inheritor(
1480 lck_ticket_t *lock,
1481 lck_grp_t *grp,
1482 lck_sleep_action_t lck_sleep_action,
1483 event_t event,
1484 thread_t inheritor,
1485 wait_interrupt_t interruptible,
1486 uint64_t deadline)
1487 {
1488 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1489 return sleep_with_inheritor_and_turnstile(event, inheritor,
1490 interruptible, deadline,
1491 ^{}, ^{ lck_ticket_unlock(lock); });
1492 } else {
1493 return sleep_with_inheritor_and_turnstile(event, inheritor,
1494 interruptible, deadline,
1495 ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1496 }
1497 }
1498
1499 /*
1500 * Name: lck_mtx_sleep_with_inheritor
1501 *
1502 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1503 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1504 * be directed to the inheritor specified.
1505 * An interruptible mode and deadline can be specified to return earlier from the wait.
1506 *
1507 * Args:
1508 * Arg1: lck_mtx_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1509 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
1510 * Arg3: event to wait on.
1511 * Arg4: thread to propagate the event push to.
1512 * Arg5: interruptible flag for wait.
1513 * Arg6: deadline for wait.
1514 *
1515 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1516 * Lock will be dropped while waiting.
1517 * The inheritor specified cannot return to user space or exit until another inheritor is specified for the event or a
1518 * wakeup for the event is called.
1519 *
1520 * Returns: result of the wait.
1521 */
1522 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1523 lck_mtx_sleep_with_inheritor(
1524 lck_mtx_t *lock,
1525 lck_sleep_action_t lck_sleep_action,
1526 event_t event,
1527 thread_t inheritor,
1528 wait_interrupt_t interruptible,
1529 uint64_t deadline)
1530 {
1531 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1532
1533 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1534 return sleep_with_inheritor_and_turnstile(event,
1535 inheritor,
1536 interruptible,
1537 deadline,
1538 ^{;},
1539 ^{lck_mtx_unlock(lock);});
1540 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1541 return sleep_with_inheritor_and_turnstile(event,
1542 inheritor,
1543 interruptible,
1544 deadline,
1545 ^{lck_mtx_lock_spin(lock);},
1546 ^{lck_mtx_unlock(lock);});
1547 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1548 return sleep_with_inheritor_and_turnstile(event,
1549 inheritor,
1550 interruptible,
1551 deadline,
1552 ^{lck_mtx_lock_spin_always(lock);},
1553 ^{lck_mtx_unlock(lock);});
1554 } else {
1555 return sleep_with_inheritor_and_turnstile(event,
1556 inheritor,
1557 interruptible,
1558 deadline,
1559 ^{lck_mtx_lock(lock);},
1560 ^{lck_mtx_unlock(lock);});
1561 }
1562 }
1563
1564 /*
1565 * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1566 */
1567
1568 /*
1569 * Name: lck_rw_sleep_with_inheritor
1570 *
1571 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1572 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1573 * be directed to the inheritor specified.
1574 * An interruptible mode and deadline can be specified to return earlier from the wait.
1575 *
1576 * Args:
1577 * Arg1: lck_rw_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1578 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
1579 * Arg3: event to wait on.
1580 * Arg4: thread to propagate the event push to.
1581 * Arg5: interruptible flag for wait.
1582 * Arg6: deadline for wait.
1583 *
1584 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1585 * Lock will be dropped while waiting.
1586 * The inheritor specified cannot return to user space or exit until another inheritor is specified for the event or a
1587 * wakeup for the event is called.
1588 *
1589 * Returns: result of the wait.
1590 */
1591 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1592 lck_rw_sleep_with_inheritor(
1593 lck_rw_t *lock,
1594 lck_sleep_action_t lck_sleep_action,
1595 event_t event,
1596 thread_t inheritor,
1597 wait_interrupt_t interruptible,
1598 uint64_t deadline)
1599 {
1600 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1601
1602 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1603
1604 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1605 return sleep_with_inheritor_and_turnstile(event,
1606 inheritor,
1607 interruptible,
1608 deadline,
1609 ^{;},
1610 ^{lck_rw_type = lck_rw_done(lock);});
1611 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1612 return sleep_with_inheritor_and_turnstile(event,
1613 inheritor,
1614 interruptible,
1615 deadline,
1616 ^{lck_rw_lock(lock, lck_rw_type);},
1617 ^{lck_rw_type = lck_rw_done(lock);});
1618 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1619 return sleep_with_inheritor_and_turnstile(event,
1620 inheritor,
1621 interruptible,
1622 deadline,
1623 ^{lck_rw_lock_exclusive(lock);},
1624 ^{lck_rw_type = lck_rw_done(lock);});
1625 } else {
1626 return sleep_with_inheritor_and_turnstile(event,
1627 inheritor,
1628 interruptible,
1629 deadline,
1630 ^{lck_rw_lock_shared(lock);},
1631 ^{lck_rw_type = lck_rw_done(lock);});
1632 }
1633 }
1634
1635 /*
1636 * wakeup_with_inheritor functions are independent from the locking primitive.
1637 */
1638
1639 /*
1640 * Name: wakeup_one_with_inheritor
1641 *
1642 * Description: wake up one waiter for event if any. The thread woken up will be the one with the higher sched priority waiting on event.
1643 * The push for the event will be transferred from the last inheritor to the woken up thread if LCK_WAKE_DEFAULT is specified.
1644 * If LCK_WAKE_DO_NOT_TRANSFER_PUSH is specified the push will not be transferred.
1645 *
1646 * Args:
1647 * Arg1: event to wake from.
1648 * Arg2: wait result to pass to the woken up thread.
1649 * Arg3: wake flag. LCK_WAKE_DEFAULT or LCK_WAKE_DO_NOT_TRANSFER_PUSH.
1650 * Arg4: pointer for storing the thread wokenup.
1651 *
1652 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1653 *
1654 * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the event or a
1655 * wakeup for the event is called.
1656 * A reference for the wokenup thread is acquired.
1657 * NOTE: this cannot be called from interrupt context.
1658 */
1659 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1660 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1661 {
1662 return wakeup_with_inheritor_and_turnstile(event,
1663 result,
1664 TRUE,
1665 action,
1666 thread_wokenup);
1667 }
1668
1669 /*
1670 * Name: wakeup_all_with_inheritor
1671 *
1672 * Description: wake up all waiters waiting for event. The old inheritor will lose the push.
1673 *
1674 * Args:
1675 * Arg1: event to wake from.
1676 * Arg2: wait result to pass to the woken up threads.
1677 *
1678 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1679 *
1680 * Conditions: NOTE: this cannot be called from interrupt context.
1681 */
1682 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1683 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1684 {
1685 return wakeup_with_inheritor_and_turnstile(event,
1686 result,
1687 FALSE,
1688 0,
1689 NULL);
1690 }
1691
1692 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1693 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1694 {
1695 assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1696 assert(waitq_type(waitq) == WQT_TURNSTILE);
1697 waitinfo->owner = 0;
1698 waitinfo->context = 0;
1699
1700 if (waitq_held(waitq)) {
1701 return;
1702 }
1703
1704 struct turnstile *turnstile = waitq_to_turnstile(waitq);
1705 assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1706 waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1707 }
1708
1709 static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1710 static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1711 static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1712
1713 static wait_result_t
1714 cond_sleep_with_inheritor_and_turnstile_type(
1715 cond_swi_var_t cond,
1716 bool (^cond_sleep_check)(ctid_t*),
1717 wait_interrupt_t interruptible,
1718 uint64_t deadline,
1719 turnstile_type_t type)
1720 {
1721 wait_result_t ret;
1722 uint32_t index;
1723 struct turnstile *ts = NULL;
1724 ctid_t ctid = 0;
1725 thread_t inheritor;
1726
1727 /*
1728 * the hash bucket spinlock is used as turnstile interlock,
1729 * lock it before checking the sleep condition
1730 */
1731 turnstile_hash_bucket_lock((uintptr_t)cond, &index, type);
1732
1733 /*
1734 * In case the sleep check succeeds, the block will
1735 * provide us the ctid observed on the variable.
1736 */
1737 if (!cond_sleep_check(&ctid)) {
1738 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1739 return THREAD_NOT_WAITING;
1740 }
1741
1742 /*
1743 * We can translate the ctid to a thread_t only
1744 * if cond_sleep_check succeded.
1745 */
1746 inheritor = ctid_get_thread(ctid);
1747 assert(inheritor != NULL);
1748
1749 ts = turnstile_prepare_hash((uintptr_t)cond, type);
1750
1751 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1752 /*
1753 * We need TURNSTILE_DELAYED_UPDATE because we will call
1754 * waitq_assert_wait64 after.
1755 */
1756 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1757
1758 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1759
1760 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1761
1762 /*
1763 * Update new and old inheritor chains outside the interlock;
1764 */
1765 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1766 if (ret == THREAD_WAITING) {
1767 ret = thread_block(THREAD_CONTINUE_NULL);
1768 }
1769
1770 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1771
1772 turnstile_complete_hash((uintptr_t)cond, type);
1773
1774 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1775
1776 turnstile_cleanup();
1777 return ret;
1778 }
1779
1780 /*
1781 * Name: cond_sleep_with_inheritor32_mask
1782 *
1783 * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1784 * Allows a thread to conditionally sleep while indicating which thread should
1785 * inherit the priority push associated with the condition.
1786 * The condition should be expressed through a cond_swi_var32_s pointer.
1787 * The condition needs to be populated by the caller with the ctid of the
1788 * thread that should inherit the push. The remaining bits of the condition
1789 * can be used by the caller to implement its own synchronization logic.
1790 * A copy of the condition value observed by the caller when it decided to call
1791 * this function should be provided to prevent races with matching wakeups.
1792 * This function will atomically check the value stored in the condition against
1793 * the expected/observed one provided only for the bits that are set in the mask.
1794 * If the check doesn't pass the thread will not sleep and the function will return.
1795 * The ctid provided in the condition will be used only after a successful
1796 * check.
1797 *
1798 * Args:
1799 * Arg1: cond_swi_var32_s pointer that stores the condition to check.
1800 * Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1801 * Arg3: mask to apply to the condition to check.
1802 * Arg4: interruptible flag for wait.
1803 * Arg5: deadline for wait.
1804 *
1805 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1806 * wakeup for the cond is called.
1807 *
1808 * Returns: result of the wait.
1809 */
1810 static wait_result_t
cond_sleep_with_inheritor32_mask(cond_swi_var_t cond,cond_swi_var32_s expected_cond,uint32_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1811 cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1812 {
1813 bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1814 cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1815 bool ret;
1816 if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1817 ret = true;
1818 *ctid = cond_val.cond32_owner;
1819 } else {
1820 ret = false;
1821 }
1822 return ret;
1823 };
1824
1825 return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1826 }
1827
1828 /*
1829 * Name: cond_sleep_with_inheritor64_mask
1830 *
1831 * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1832 * Allows a thread to conditionally sleep while indicating which thread should
1833 * inherit the priority push associated with the condition.
1834 * The condition should be expressed through a cond_swi_var64_s pointer.
1835 * The condition needs to be populated by the caller with the ctid of the
1836 * thread that should inherit the push. The remaining bits of the condition
1837 * can be used by the caller to implement its own synchronization logic.
1838 * A copy of the condition value observed by the caller when it decided to call
1839 * this function should be provided to prevent races with matching wakeups.
1840 * This function will atomically check the value stored in the condition against
1841 * the expected/observed one provided only for the bits that are set in the mask.
1842 * If the check doesn't pass the thread will not sleep and the function will return.
1843 * The ctid provided in the condition will be used only after a successful
1844 * check.
1845 *
1846 * Args:
1847 * Arg1: cond_swi_var64_s pointer that stores the condition to check.
1848 * Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1849 * Arg3: mask to apply to the condition to check.
1850 * Arg4: interruptible flag for wait.
1851 * Arg5: deadline for wait.
1852 *
1853 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1854 * wakeup for the cond is called.
1855 *
1856 * Returns: result of the wait.
1857 */
1858 wait_result_t
cond_sleep_with_inheritor64_mask(cond_swi_var_t cond,cond_swi_var64_s expected_cond,uint64_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1859 cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1860 {
1861 bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1862 cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1863 bool ret;
1864 if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1865 ret = true;
1866 *ctid = cond_val.cond64_owner;
1867 } else {
1868 ret = false;
1869 }
1870 return ret;
1871 };
1872
1873 return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1874 }
1875
1876 /*
1877 * Name: cond_sleep_with_inheritor32
1878 *
1879 * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1880 * Allows a thread to conditionally sleep while indicating which thread should
1881 * inherit the priority push associated with the condition.
1882 * The condition should be expressed through a cond_swi_var32_s pointer.
1883 * The condition needs to be populated by the caller with the ctid of the
1884 * thread that should inherit the push. The remaining bits of the condition
1885 * can be used by the caller to implement its own synchronization logic.
1886 * A copy of the condition value observed by the caller when it decided to call
1887 * this function should be provided to prevent races with matching wakeups.
1888 * This function will atomically check the value stored in the condition against
1889 * the expected/observed one provided. If the check doesn't pass the thread will not
1890 * sleep and the function will return.
1891 * The ctid provided in the condition will be used only after a successful
1892 * check.
1893 *
1894 * Args:
1895 * Arg1: cond_swi_var32_s pointer that stores the condition to check.
1896 * Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1897 * Arg3: interruptible flag for wait.
1898 * Arg4: deadline for wait.
1899 *
1900 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1901 * wakeup for the cond is called.
1902 *
1903 * Returns: result of the wait.
1904 */
1905 wait_result_t
cond_sleep_with_inheritor32(cond_swi_var_t cond,cond_swi_var32_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1906 cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1907 {
1908 return cond_sleep_with_inheritor32_mask(cond, expected_cond, ~0u, interruptible, deadline);
1909 }
1910
1911 /*
1912 * Name: cond_sleep_with_inheritor64
1913 *
1914 * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1915 * Allows a thread to conditionally sleep while indicating which thread should
1916 * inherit the priority push associated with the condition.
1917 * The condition should be expressed through a cond_swi_var64_s pointer.
1918 * The condition needs to be populated by the caller with the ctid of the
1919 * thread that should inherit the push. The remaining bits of the condition
1920 * can be used by the caller to implement its own synchronization logic.
1921 * A copy of the condition value observed by the caller when it decided to call
1922 * this function should be provided to prevent races with matching wakeups.
1923 * This function will atomically check the value stored in the condition against
1924 * the expected/observed one provided. If the check doesn't pass the thread will not
1925 * sleep and the function will return.
1926 * The ctid provided in the condition will be used only after a successful
1927 * check.
1928 *
1929 * Args:
1930 * Arg1: cond_swi_var64_s pointer that stores the condition to check.
1931 * Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1932 * Arg3: interruptible flag for wait.
1933 * Arg4: deadline for wait.
1934 *
1935 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1936 * wakeup for the cond is called.
1937 *
1938 * Returns: result of the wait.
1939 */
1940 wait_result_t
cond_sleep_with_inheritor64(cond_swi_var_t cond,cond_swi_var64_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1941 cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1942 {
1943 return cond_sleep_with_inheritor64_mask(cond, expected_cond, ~0ull, interruptible, deadline);
1944 }
1945
1946 /*
1947 * Name: cond_wakeup_one_with_inheritor
1948 *
1949 * Description: Wake up one waiter waiting on the condition (if any).
1950 * The thread woken up will be the one with the higher sched priority waiting on the condition.
1951 * The push for the condition will be transferred from the last inheritor to the woken up thread.
1952 *
1953 * Args:
1954 * Arg1: condition to wake from.
1955 * Arg2: wait result to pass to the woken up thread.
1956 * Arg3: pointer for storing the thread wokenup.
1957 *
1958 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1959 *
1960 * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1961 * condition or a wakeup for the event is called.
1962 * A reference for the wokenup thread is acquired.
1963 * NOTE: this cannot be called from interrupt context.
1964 */
1965 kern_return_t
cond_wakeup_one_with_inheritor(cond_swi_var_t cond,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1966 cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1967 {
1968 return wakeup_with_inheritor_and_turnstile((event_t)cond,
1969 result,
1970 TRUE,
1971 action,
1972 thread_wokenup);
1973 }
1974
1975 /*
1976 * Name: cond_wakeup_all_with_inheritor
1977 *
1978 * Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1979 *
1980 * Args:
1981 * Arg1: condition to wake from.
1982 * Arg2: wait result to pass to the woken up threads.
1983 *
1984 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1985 *
1986 * Conditions: NOTE: this cannot be called from interrupt context.
1987 */
1988 kern_return_t
cond_wakeup_all_with_inheritor(cond_swi_var_t cond,wait_result_t result)1989 cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1990 {
1991 return wakeup_with_inheritor_and_turnstile((event_t)cond,
1992 result,
1993 FALSE,
1994 0,
1995 NULL);
1996 }
1997
1998
1999 #pragma mark - gates
2000
2001 #define GATE_TYPE 3
2002 #define GATE_ILOCK_BIT 0
2003 #define GATE_WAITERS_BIT 1
2004
2005 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
2006 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
2007
2008 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
2009 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
2010 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
2011 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
2012 #define ordered_store_gate(gate, value) os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
2013
2014 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
2015 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
2016 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
2017 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
2018
2019 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
2020
2021 #define GATE_EVENT(gate) ((event_t) gate)
2022 #define EVENT_TO_GATE(event) ((gate_t *) event)
2023
2024 typedef void (*void_func_void)(void);
2025
2026 __abortlike
2027 static void
gate_verify_tag_panic(gate_t * gate)2028 gate_verify_tag_panic(gate_t *gate)
2029 {
2030 panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2031 }
2032
2033 __abortlike
2034 static void
gate_verify_destroy_panic(gate_t * gate)2035 gate_verify_destroy_panic(gate_t *gate)
2036 {
2037 panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2038 }
2039
2040 static void
gate_verify(gate_t * gate)2041 gate_verify(gate_t *gate)
2042 {
2043 if (gate->gt_type != GATE_TYPE) {
2044 gate_verify_tag_panic(gate);
2045 }
2046 if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
2047 gate_verify_destroy_panic(gate);
2048 }
2049
2050 assert(gate->gt_refs > 0);
2051 }
2052
2053 __abortlike
2054 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)2055 gate_already_owned_panic(gate_t *gate, thread_t holder)
2056 {
2057 panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2058 }
2059
2060 static kern_return_t
gate_try_close(gate_t * gate)2061 gate_try_close(gate_t *gate)
2062 {
2063 uintptr_t state;
2064 thread_t holder;
2065 kern_return_t ret;
2066 thread_t thread = current_thread();
2067
2068 gate_verify(gate);
2069
2070 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2071 return KERN_SUCCESS;
2072 }
2073
2074 gate_ilock(gate);
2075 state = ordered_load_gate(gate);
2076 holder = GATE_STATE_TO_THREAD(state);
2077
2078 if (holder == NULL) {
2079 assert(gate_has_waiter_bit(state) == FALSE);
2080
2081 state = GATE_THREAD_TO_STATE(current_thread());
2082 state |= GATE_ILOCK;
2083 ordered_store_gate(gate, state);
2084 ret = KERN_SUCCESS;
2085 } else {
2086 if (holder == current_thread()) {
2087 gate_already_owned_panic(gate, holder);
2088 }
2089 ret = KERN_FAILURE;
2090 }
2091
2092 gate_iunlock(gate);
2093 return ret;
2094 }
2095
2096 static void
gate_close(gate_t * gate)2097 gate_close(gate_t* gate)
2098 {
2099 uintptr_t state;
2100 thread_t holder;
2101 thread_t thread = current_thread();
2102
2103 gate_verify(gate);
2104
2105 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2106 return;
2107 }
2108
2109 gate_ilock(gate);
2110 state = ordered_load_gate(gate);
2111 holder = GATE_STATE_TO_THREAD(state);
2112
2113 if (holder != NULL) {
2114 gate_already_owned_panic(gate, holder);
2115 }
2116
2117 assert(gate_has_waiter_bit(state) == FALSE);
2118
2119 state = GATE_THREAD_TO_STATE(thread);
2120 state |= GATE_ILOCK;
2121 ordered_store_gate(gate, state);
2122
2123 gate_iunlock(gate);
2124 }
2125
2126 static void
gate_open_turnstile(gate_t * gate)2127 gate_open_turnstile(gate_t *gate)
2128 {
2129 struct turnstile *ts = NULL;
2130
2131 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile,
2132 TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2133 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2134 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2135 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2136 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2137 /*
2138 * We can do the cleanup while holding the interlock.
2139 * It is ok because:
2140 * 1. current_thread is the previous inheritor and it is running
2141 * 2. new inheritor is NULL.
2142 * => No chain of turnstiles needs to be updated.
2143 */
2144 turnstile_cleanup();
2145 }
2146
2147 __abortlike
2148 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2149 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2150 {
2151 if (open) {
2152 panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2153 } else {
2154 panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2155 }
2156 }
2157
2158 static void
gate_open(gate_t * gate)2159 gate_open(gate_t *gate)
2160 {
2161 uintptr_t state;
2162 thread_t holder;
2163 bool waiters;
2164 thread_t thread = current_thread();
2165
2166 gate_verify(gate);
2167 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2168 return;
2169 }
2170
2171 gate_ilock(gate);
2172 state = ordered_load_gate(gate);
2173 holder = GATE_STATE_TO_THREAD(state);
2174 waiters = gate_has_waiter_bit(state);
2175
2176 if (holder != thread) {
2177 gate_not_owned_panic(gate, holder, true);
2178 }
2179
2180 if (waiters) {
2181 gate_open_turnstile(gate);
2182 }
2183
2184 state = GATE_ILOCK;
2185 ordered_store_gate(gate, state);
2186
2187 gate_iunlock(gate);
2188 }
2189
2190 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2191 gate_handoff_turnstile(gate_t *gate,
2192 int flags,
2193 thread_t *thread_woken_up,
2194 bool *waiters)
2195 {
2196 struct turnstile *ts = NULL;
2197 kern_return_t ret = KERN_FAILURE;
2198 thread_t hp_thread;
2199
2200 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2201 /*
2202 * Wake up the higest priority thread waiting on the gate
2203 */
2204 hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2205 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2206
2207 if (hp_thread != NULL) {
2208 /*
2209 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2210 */
2211 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2212 *thread_woken_up = hp_thread;
2213 *waiters = turnstile_has_waiters(ts);
2214 /*
2215 * Note: hp_thread is the new holder and the new inheritor.
2216 * In case there are no more waiters, it doesn't need to be the inheritor
2217 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2218 * handoff can go through the fast path.
2219 * We could set the inheritor to NULL here, or the new holder itself can set it
2220 * on its way back from the sleep. In the latter case there are more chanses that
2221 * new waiters will come by, avoiding to do the opearation at all.
2222 */
2223 ret = KERN_SUCCESS;
2224 } else {
2225 /*
2226 * waiters can have been woken up by an interrupt and still not
2227 * have updated gate->waiters, so we couldn't find them on the waitq.
2228 * Update the inheritor to NULL here, so that the current thread can return to userspace
2229 * indipendently from when the interrupted waiters will finish the wait.
2230 */
2231 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2232 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2233 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2234 }
2235 // there are no waiters.
2236 ret = KERN_NOT_WAITING;
2237 }
2238
2239 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2240
2241 /*
2242 * We can do the cleanup while holding the interlock.
2243 * It is ok because:
2244 * 1. current_thread is the previous inheritor and it is running
2245 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2246 * of the gate before trying to sleep.
2247 * => No chain of turnstiles needs to be updated.
2248 */
2249 turnstile_cleanup();
2250
2251 return ret;
2252 }
2253
2254 static kern_return_t
gate_handoff(gate_t * gate,int flags)2255 gate_handoff(gate_t *gate,
2256 int flags)
2257 {
2258 kern_return_t ret;
2259 thread_t new_holder = NULL;
2260 uintptr_t state;
2261 thread_t holder;
2262 bool waiters;
2263 thread_t thread = current_thread();
2264
2265 assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2266 gate_verify(gate);
2267
2268 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2269 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2270 //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2271 return KERN_NOT_WAITING;
2272 }
2273 }
2274
2275 gate_ilock(gate);
2276 state = ordered_load_gate(gate);
2277 holder = GATE_STATE_TO_THREAD(state);
2278 waiters = gate_has_waiter_bit(state);
2279
2280 if (holder != current_thread()) {
2281 gate_not_owned_panic(gate, holder, false);
2282 }
2283
2284 if (waiters) {
2285 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2286 if (ret == KERN_SUCCESS) {
2287 state = GATE_THREAD_TO_STATE(new_holder);
2288 if (waiters) {
2289 state |= GATE_WAITERS;
2290 }
2291 } else {
2292 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2293 state = 0;
2294 }
2295 }
2296 } else {
2297 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2298 state = 0;
2299 }
2300 ret = KERN_NOT_WAITING;
2301 }
2302 state |= GATE_ILOCK;
2303 ordered_store_gate(gate, state);
2304
2305 gate_iunlock(gate);
2306
2307 if (new_holder) {
2308 thread_deallocate(new_holder);
2309 }
2310 return ret;
2311 }
2312
2313 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2314 gate_steal_turnstile(gate_t *gate,
2315 thread_t new_inheritor)
2316 {
2317 struct turnstile *ts = NULL;
2318
2319 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2320
2321 turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2322 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2323 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2324
2325 /*
2326 * turnstile_cleanup might need to update the chain of the old holder.
2327 * This operation should happen without the turnstile interlock held.
2328 */
2329 return turnstile_cleanup;
2330 }
2331
2332 __abortlike
2333 static void
gate_not_closed_panic(gate_t * gate,bool wait)2334 gate_not_closed_panic(gate_t *gate, bool wait)
2335 {
2336 if (wait) {
2337 panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2338 } else {
2339 panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2340 }
2341 }
2342
2343 static void
gate_steal(gate_t * gate)2344 gate_steal(gate_t *gate)
2345 {
2346 uintptr_t state;
2347 thread_t holder;
2348 thread_t thread = current_thread();
2349 bool waiters;
2350
2351 void_func_void func_after_interlock_unlock;
2352
2353 gate_verify(gate);
2354
2355 gate_ilock(gate);
2356 state = ordered_load_gate(gate);
2357 holder = GATE_STATE_TO_THREAD(state);
2358 waiters = gate_has_waiter_bit(state);
2359
2360 if (holder == NULL) {
2361 gate_not_closed_panic(gate, false);
2362 }
2363
2364 state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2365 if (waiters) {
2366 state |= GATE_WAITERS;
2367 ordered_store_gate(gate, state);
2368 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2369 gate_iunlock(gate);
2370
2371 func_after_interlock_unlock();
2372 } else {
2373 ordered_store_gate(gate, state);
2374 gate_iunlock(gate);
2375 }
2376 }
2377
2378 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2379 gate_wait_turnstile(gate_t *gate,
2380 wait_interrupt_t interruptible,
2381 uint64_t deadline,
2382 thread_t holder,
2383 wait_result_t* wait,
2384 bool* waiters)
2385 {
2386 struct turnstile *ts;
2387 uintptr_t state;
2388
2389 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2390
2391 turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2392 waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2393
2394 gate_iunlock(gate);
2395
2396 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2397
2398 *wait = thread_block(THREAD_CONTINUE_NULL);
2399
2400 gate_ilock(gate);
2401
2402 *waiters = turnstile_has_waiters(ts);
2403
2404 if (!*waiters) {
2405 /*
2406 * We want to enable the fast path as soon as we see that there are no more waiters.
2407 * On the fast path the holder will not do any turnstile operations.
2408 * Set the inheritor as NULL here.
2409 *
2410 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2411 * already been set to NULL.
2412 */
2413 state = ordered_load_gate(gate);
2414 holder = GATE_STATE_TO_THREAD(state);
2415 if (holder &&
2416 ((*wait != THREAD_AWAKENED) || // thread interrupted or timedout
2417 holder == current_thread())) { // thread was woken up and it is the new holder
2418 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2419 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2420 }
2421 }
2422
2423 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2424
2425 /*
2426 * turnstile_cleanup might need to update the chain of the old holder.
2427 * This operation should happen without the turnstile primitive interlock held.
2428 */
2429 return turnstile_cleanup;
2430 }
2431
2432 static void
gate_free_internal(gate_t * gate)2433 gate_free_internal(gate_t *gate)
2434 {
2435 zfree(KT_GATE, gate);
2436 }
2437
2438 __abortlike
2439 static void
gate_too_many_refs_panic(gate_t * gate)2440 gate_too_many_refs_panic(gate_t *gate)
2441 {
2442 panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2443 }
2444
2445 static gate_wait_result_t
2446 gate_wait(gate_t* gate,
2447 wait_interrupt_t interruptible,
2448 uint64_t deadline,
2449 void (^primitive_unlock)(void),
2450 void (^primitive_lock)(void))
2451 {
2452 gate_wait_result_t ret;
2453 void_func_void func_after_interlock_unlock;
2454 wait_result_t wait_result;
2455 uintptr_t state;
2456 thread_t holder;
2457 bool waiters;
2458
2459 gate_verify(gate);
2460
2461 gate_ilock(gate);
2462 state = ordered_load_gate(gate);
2463 holder = GATE_STATE_TO_THREAD(state);
2464
2465 if (holder == NULL) {
2466 gate_not_closed_panic(gate, true);
2467 }
2468
2469 /*
2470 * Get a ref on the gate so it will not
2471 * be freed while we are coming back from the sleep.
2472 */
2473 if (gate->gt_refs == UINT16_MAX) {
2474 gate_too_many_refs_panic(gate);
2475 }
2476 gate->gt_refs++;
2477 state |= GATE_WAITERS;
2478 ordered_store_gate(gate, state);
2479
2480 /*
2481 * Release the primitive lock before any
2482 * turnstile operation. Turnstile
2483 * does not support a blocking primitive as
2484 * interlock.
2485 *
2486 * In this way, concurrent threads will be
2487 * able to acquire the primitive lock
2488 * but still will wait for me through the
2489 * gate interlock.
2490 */
2491 primitive_unlock();
2492
2493 func_after_interlock_unlock = gate_wait_turnstile( gate,
2494 interruptible,
2495 deadline,
2496 holder,
2497 &wait_result,
2498 &waiters);
2499
2500 state = ordered_load_gate(gate);
2501 holder = GATE_STATE_TO_THREAD(state);
2502
2503 switch (wait_result) {
2504 case THREAD_INTERRUPTED:
2505 case THREAD_TIMED_OUT:
2506 assert(holder != current_thread());
2507
2508 if (waiters) {
2509 state |= GATE_WAITERS;
2510 } else {
2511 state &= ~GATE_WAITERS;
2512 }
2513 ordered_store_gate(gate, state);
2514
2515 if (wait_result == THREAD_INTERRUPTED) {
2516 ret = GATE_INTERRUPTED;
2517 } else {
2518 ret = GATE_TIMED_OUT;
2519 }
2520 break;
2521 default:
2522 /*
2523 * Note it is possible that even if the gate was handed off to
2524 * me, someone called gate_steal() before I woke up.
2525 *
2526 * As well as it is possible that the gate was opened, but someone
2527 * closed it while I was waking up.
2528 *
2529 * In both cases we return GATE_OPENED, as the gate was opened to me
2530 * at one point, it is the caller responsibility to check again if
2531 * the gate is open.
2532 */
2533 if (holder == current_thread()) {
2534 ret = GATE_HANDOFF;
2535 } else {
2536 ret = GATE_OPENED;
2537 }
2538 break;
2539 }
2540
2541 assert(gate->gt_refs > 0);
2542 uint32_t ref = --gate->gt_refs;
2543 bool to_free = gate->gt_alloc;
2544 gate_iunlock(gate);
2545
2546 if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2547 if (to_free == true) {
2548 assert(!waiters);
2549 if (ref == 0) {
2550 gate_free_internal(gate);
2551 }
2552 ret = GATE_OPENED;
2553 } else {
2554 gate_verify_destroy_panic(gate);
2555 }
2556 }
2557
2558 /*
2559 * turnstile func that needs to be executed without
2560 * holding the primitive interlock
2561 */
2562 func_after_interlock_unlock();
2563
2564 primitive_lock();
2565
2566 return ret;
2567 }
2568
2569 static void
gate_assert(gate_t * gate,int flags)2570 gate_assert(gate_t *gate, int flags)
2571 {
2572 uintptr_t state;
2573 thread_t holder;
2574
2575 gate_verify(gate);
2576
2577 gate_ilock(gate);
2578 state = ordered_load_gate(gate);
2579 holder = GATE_STATE_TO_THREAD(state);
2580
2581 switch (flags) {
2582 case GATE_ASSERT_CLOSED:
2583 assert(holder != NULL);
2584 break;
2585 case GATE_ASSERT_OPEN:
2586 assert(holder == NULL);
2587 break;
2588 case GATE_ASSERT_HELD:
2589 assert(holder == current_thread());
2590 break;
2591 default:
2592 panic("invalid %s flag %d", __func__, flags);
2593 }
2594
2595 gate_iunlock(gate);
2596 }
2597
2598 enum {
2599 GT_INIT_DEFAULT = 0,
2600 GT_INIT_ALLOC
2601 };
2602
2603 static void
gate_init(gate_t * gate,uint type)2604 gate_init(gate_t *gate, uint type)
2605 {
2606 bzero(gate, sizeof(gate_t));
2607
2608 gate->gt_data = 0;
2609 gate->gt_turnstile = NULL;
2610 gate->gt_refs = 1;
2611 switch (type) {
2612 case GT_INIT_ALLOC:
2613 gate->gt_alloc = 1;
2614 break;
2615 default:
2616 gate->gt_alloc = 0;
2617 break;
2618 }
2619 gate->gt_type = GATE_TYPE;
2620 gate->gt_flags_pad = 0;
2621 }
2622
2623 static gate_t*
gate_alloc_init(void)2624 gate_alloc_init(void)
2625 {
2626 gate_t *gate;
2627 gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2628 gate_init(gate, GT_INIT_ALLOC);
2629 return gate;
2630 }
2631
2632 __abortlike
2633 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2634 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2635 {
2636 panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2637 }
2638
2639 __abortlike
2640 static void
gate_destroy_waiter_panic(gate_t * gate)2641 gate_destroy_waiter_panic(gate_t *gate)
2642 {
2643 panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2644 }
2645
2646 static uint16_t
gate_destroy_internal(gate_t * gate)2647 gate_destroy_internal(gate_t *gate)
2648 {
2649 uintptr_t state;
2650 thread_t holder;
2651 uint16_t ref;
2652
2653 gate_ilock(gate);
2654 state = ordered_load_gate(gate);
2655 holder = GATE_STATE_TO_THREAD(state);
2656
2657 /*
2658 * The gate must be open
2659 * and all the threads must
2660 * have been woken up by this time
2661 */
2662 if (holder != NULL) {
2663 gate_destroy_owned_panic(gate, holder);
2664 }
2665 if (gate_has_waiter_bit(state)) {
2666 gate_destroy_waiter_panic(gate);
2667 }
2668
2669 assert(gate->gt_refs > 0);
2670
2671 ref = --gate->gt_refs;
2672
2673 /*
2674 * Mark the gate as destroyed.
2675 * The interlock bit still need
2676 * to be available to let the
2677 * last wokenup threads to clear
2678 * the wait.
2679 */
2680 state = GATE_DESTROYED;
2681 state |= GATE_ILOCK;
2682 ordered_store_gate(gate, state);
2683 gate_iunlock(gate);
2684 return ref;
2685 }
2686
2687 __abortlike
2688 static void
gate_destroy_panic(gate_t * gate)2689 gate_destroy_panic(gate_t *gate)
2690 {
2691 panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2692 }
2693
2694 static void
gate_destroy(gate_t * gate)2695 gate_destroy(gate_t *gate)
2696 {
2697 gate_verify(gate);
2698 if (gate->gt_alloc == 1) {
2699 gate_destroy_panic(gate);
2700 }
2701 gate_destroy_internal(gate);
2702 }
2703
2704 __abortlike
2705 static void
gate_free_panic(gate_t * gate)2706 gate_free_panic(gate_t *gate)
2707 {
2708 panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2709 }
2710
2711 static void
gate_free(gate_t * gate)2712 gate_free(gate_t *gate)
2713 {
2714 uint16_t ref;
2715
2716 gate_verify(gate);
2717
2718 if (gate->gt_alloc == 0) {
2719 gate_free_panic(gate);
2720 }
2721
2722 ref = gate_destroy_internal(gate);
2723 /*
2724 * Some of the threads waiting on the gate
2725 * might still need to run after being woken up.
2726 * They will access the gate to cleanup the
2727 * state, so we cannot free it.
2728 * The last waiter will free the gate in this case.
2729 */
2730 if (ref == 0) {
2731 gate_free_internal(gate);
2732 }
2733 }
2734
2735 /*
2736 * Name: lck_rw_gate_init
2737 *
2738 * Description: initializes a variable declared with decl_lck_rw_gate_data.
2739 *
2740 * Args:
2741 * Arg1: lck_rw_t lock used to protect the gate.
2742 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2743 */
2744 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2745 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2746 {
2747 (void) lock;
2748 gate_init(gate, GT_INIT_DEFAULT);
2749 }
2750
2751 /*
2752 * Name: lck_rw_gate_alloc_init
2753 *
2754 * Description: allocates and initializes a gate_t.
2755 *
2756 * Args:
2757 * Arg1: lck_rw_t lock used to protect the gate.
2758 *
2759 * Returns:
2760 * gate_t allocated.
2761 */
2762 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2763 lck_rw_gate_alloc_init(lck_rw_t *lock)
2764 {
2765 (void) lock;
2766 return gate_alloc_init();
2767 }
2768
2769 /*
2770 * Name: lck_rw_gate_destroy
2771 *
2772 * Description: destroys a variable previously initialized
2773 * with lck_rw_gate_init().
2774 *
2775 * Args:
2776 * Arg1: lck_rw_t lock used to protect the gate.
2777 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2778 */
2779 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2780 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2781 {
2782 (void) lock;
2783 gate_destroy(gate);
2784 }
2785
2786 /*
2787 * Name: lck_rw_gate_free
2788 *
2789 * Description: destroys and tries to free a gate previously allocated
2790 * with lck_rw_gate_alloc_init().
2791 * The gate free might be delegated to the last thread returning
2792 * from the gate_wait().
2793 *
2794 * Args:
2795 * Arg1: lck_rw_t lock used to protect the gate.
2796 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2797 */
2798 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2799 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2800 {
2801 (void) lock;
2802 gate_free(gate);
2803 }
2804
2805 /*
2806 * Name: lck_rw_gate_try_close
2807 *
2808 * Description: Tries to close the gate.
2809 * In case of success the current thread will be set as
2810 * the holder of the gate.
2811 *
2812 * Args:
2813 * Arg1: lck_rw_t lock used to protect the gate.
2814 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2815 *
2816 * Conditions: Lock must be held. Returns with the lock held.
2817 *
2818 * Returns:
2819 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2820 * of the gate.
2821 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2822 * to wake up possible waiters on the gate before returning to userspace.
2823 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2824 * between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2825 *
2826 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2827 * lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2828 * The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2829 * be done without dropping the lock that is protecting the gate in between.
2830 */
2831 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2832 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2833 {
2834 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2835
2836 return gate_try_close(gate);
2837 }
2838
2839 /*
2840 * Name: lck_rw_gate_close
2841 *
2842 * Description: Closes the gate. The current thread will be set as
2843 * the holder of the gate. Will panic if the gate is already closed.
2844 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2845 * to wake up possible waiters on the gate before returning to userspace.
2846 *
2847 * Args:
2848 * Arg1: lck_rw_t lock used to protect the gate.
2849 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2850 *
2851 * Conditions: Lock must be held. Returns with the lock held.
2852 * The gate must be open.
2853 *
2854 */
2855 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2856 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2857 {
2858 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2859
2860 return gate_close(gate);
2861 }
2862
2863 /*
2864 * Name: lck_rw_gate_open
2865 *
2866 * Description: Opens the gate and wakes up possible waiters.
2867 *
2868 * Args:
2869 * Arg1: lck_rw_t lock used to protect the gate.
2870 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2871 *
2872 * Conditions: Lock must be held. Returns with the lock held.
2873 * The current thread must be the holder of the gate.
2874 *
2875 */
2876 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2877 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2878 {
2879 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2880
2881 gate_open(gate);
2882 }
2883
2884 /*
2885 * Name: lck_rw_gate_handoff
2886 *
2887 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2888 * priority will be selected as the new holder of the gate, and woken up,
2889 * with the gate remaining in the closed state throughout.
2890 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2891 * will be returned.
2892 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2893 * case no waiters were found.
2894 *
2895 *
2896 * Args:
2897 * Arg1: lck_rw_t lock used to protect the gate.
2898 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2899 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2900 *
2901 * Conditions: Lock must be held. Returns with the lock held.
2902 * The current thread must be the holder of the gate.
2903 *
2904 * Returns:
2905 * KERN_SUCCESS in case one of the waiters became the new holder.
2906 * KERN_NOT_WAITING in case there were no waiters.
2907 *
2908 */
2909 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2910 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2911 {
2912 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2913
2914 return gate_handoff(gate, flags);
2915 }
2916
2917 /*
2918 * Name: lck_rw_gate_steal
2919 *
2920 * Description: Set the current ownership of the gate. It sets the current thread as the
2921 * new holder of the gate.
2922 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2923 * to wake up possible waiters on the gate before returning to userspace.
2924 * NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2925 * anymore.
2926 *
2927 *
2928 * Args:
2929 * Arg1: lck_rw_t lock used to protect the gate.
2930 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2931 *
2932 * Conditions: Lock must be held. Returns with the lock held.
2933 * The gate must be closed and the current thread must not already be the holder.
2934 *
2935 */
2936 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2937 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2938 {
2939 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2940
2941 gate_steal(gate);
2942 }
2943
2944 /*
2945 * Name: lck_rw_gate_wait
2946 *
2947 * Description: Waits for the current thread to become the holder of the gate or for the
2948 * gate to become open. An interruptible mode and deadline can be specified
2949 * to return earlier from the wait.
2950 *
2951 * Args:
2952 * Arg1: lck_rw_t lock used to protect the gate.
2953 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2954 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2955 * Arg3: interruptible flag for wait.
2956 * Arg4: deadline
2957 *
2958 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2959 * Lock will be dropped while waiting.
2960 * The gate must be closed.
2961 *
2962 * Returns: Reason why the thread was woken up.
2963 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2964 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2965 * to wake up possible waiters on the gate before returning to userspace.
2966 * GATE_OPENED - the gate was opened by the holder.
2967 * GATE_TIMED_OUT - the thread was woken up by a timeout.
2968 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
2969 */
2970 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2971 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2972 {
2973 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2974
2975 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2976
2977 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2978 return gate_wait(gate,
2979 interruptible,
2980 deadline,
2981 ^{lck_rw_type = lck_rw_done(lock);},
2982 ^{;});
2983 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2984 return gate_wait(gate,
2985 interruptible,
2986 deadline,
2987 ^{lck_rw_type = lck_rw_done(lock);},
2988 ^{lck_rw_lock(lock, lck_rw_type);});
2989 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2990 return gate_wait(gate,
2991 interruptible,
2992 deadline,
2993 ^{lck_rw_type = lck_rw_done(lock);},
2994 ^{lck_rw_lock_exclusive(lock);});
2995 } else {
2996 return gate_wait(gate,
2997 interruptible,
2998 deadline,
2999 ^{lck_rw_type = lck_rw_done(lock);},
3000 ^{lck_rw_lock_shared(lock);});
3001 }
3002 }
3003
3004 /*
3005 * Name: lck_rw_gate_assert
3006 *
3007 * Description: asserts that the gate is in the specified state.
3008 *
3009 * Args:
3010 * Arg1: lck_rw_t lock used to protect the gate.
3011 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3012 * Arg3: flags to specified assert type.
3013 * GATE_ASSERT_CLOSED - the gate is currently closed
3014 * GATE_ASSERT_OPEN - the gate is currently opened
3015 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3016 */
3017 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)3018 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
3019 {
3020 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3021
3022 gate_assert(gate, flags);
3023 return;
3024 }
3025
3026 /*
3027 * Name: lck_mtx_gate_init
3028 *
3029 * Description: initializes a variable declared with decl_lck_mtx_gate_data.
3030 *
3031 * Args:
3032 * Arg1: lck_mtx_t lock used to protect the gate.
3033 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3034 */
3035 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)3036 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
3037 {
3038 (void) lock;
3039 gate_init(gate, GT_INIT_DEFAULT);
3040 }
3041
3042 /*
3043 * Name: lck_mtx_gate_alloc_init
3044 *
3045 * Description: allocates and initializes a gate_t.
3046 *
3047 * Args:
3048 * Arg1: lck_mtx_t lock used to protect the gate.
3049 *
3050 * Returns:
3051 * gate_t allocated.
3052 */
3053 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)3054 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3055 {
3056 (void) lock;
3057 return gate_alloc_init();
3058 }
3059
3060 /*
3061 * Name: lck_mtx_gate_destroy
3062 *
3063 * Description: destroys a variable previously initialized
3064 * with lck_mtx_gate_init().
3065 *
3066 * Args:
3067 * Arg1: lck_mtx_t lock used to protect the gate.
3068 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3069 */
3070 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)3071 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3072 {
3073 (void) lock;
3074 gate_destroy(gate);
3075 }
3076
3077 /*
3078 * Name: lck_mtx_gate_free
3079 *
3080 * Description: destroys and tries to free a gate previously allocated
3081 * with lck_mtx_gate_alloc_init().
3082 * The gate free might be delegated to the last thread returning
3083 * from the gate_wait().
3084 *
3085 * Args:
3086 * Arg1: lck_mtx_t lock used to protect the gate.
3087 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3088 */
3089 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3090 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3091 {
3092 (void) lock;
3093 gate_free(gate);
3094 }
3095
3096 /*
3097 * Name: lck_mtx_gate_try_close
3098 *
3099 * Description: Tries to close the gate.
3100 * In case of success the current thread will be set as
3101 * the holder of the gate.
3102 *
3103 * Args:
3104 * Arg1: lck_mtx_t lock used to protect the gate.
3105 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3106 *
3107 * Conditions: Lock must be held. Returns with the lock held.
3108 *
3109 * Returns:
3110 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3111 * of the gate.
3112 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3113 * to wake up possible waiters on the gate before returning to userspace.
3114 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3115 * between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3116 *
3117 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3118 * lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3119 * The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3120 * be done without dropping the lock that is protecting the gate in between.
3121 */
3122 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3123 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3124 {
3125 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3126
3127 return gate_try_close(gate);
3128 }
3129
3130 /*
3131 * Name: lck_mtx_gate_close
3132 *
3133 * Description: Closes the gate. The current thread will be set as
3134 * the holder of the gate. Will panic if the gate is already closed.
3135 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3136 * to wake up possible waiters on the gate before returning to userspace.
3137 *
3138 * Args:
3139 * Arg1: lck_mtx_t lock used to protect the gate.
3140 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3141 *
3142 * Conditions: Lock must be held. Returns with the lock held.
3143 * The gate must be open.
3144 *
3145 */
3146 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3147 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3148 {
3149 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3150
3151 return gate_close(gate);
3152 }
3153
3154 /*
3155 * Name: lck_mtx_gate_open
3156 *
3157 * Description: Opens of the gate and wakes up possible waiters.
3158 *
3159 * Args:
3160 * Arg1: lck_mtx_t lock used to protect the gate.
3161 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3162 *
3163 * Conditions: Lock must be held. Returns with the lock held.
3164 * The current thread must be the holder of the gate.
3165 *
3166 */
3167 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3168 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3169 {
3170 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3171
3172 gate_open(gate);
3173 }
3174
3175 /*
3176 * Name: lck_mtx_gate_handoff
3177 *
3178 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3179 * priority will be selected as the new holder of the gate, and woken up,
3180 * with the gate remaining in the closed state throughout.
3181 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3182 * will be returned.
3183 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3184 * case no waiters were found.
3185 *
3186 *
3187 * Args:
3188 * Arg1: lck_mtx_t lock used to protect the gate.
3189 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3190 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3191 *
3192 * Conditions: Lock must be held. Returns with the lock held.
3193 * The current thread must be the holder of the gate.
3194 *
3195 * Returns:
3196 * KERN_SUCCESS in case one of the waiters became the new holder.
3197 * KERN_NOT_WAITING in case there were no waiters.
3198 *
3199 */
3200 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3201 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3202 {
3203 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3204
3205 return gate_handoff(gate, flags);
3206 }
3207
3208 /*
3209 * Name: lck_mtx_gate_steal
3210 *
3211 * Description: Steals the ownership of the gate. It sets the current thread as the
3212 * new holder of the gate.
3213 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3214 * to wake up possible waiters on the gate before returning to userspace.
3215 * NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3216 * anymore.
3217 *
3218 *
3219 * Args:
3220 * Arg1: lck_mtx_t lock used to protect the gate.
3221 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3222 *
3223 * Conditions: Lock must be held. Returns with the lock held.
3224 * The gate must be closed and the current thread must not already be the holder.
3225 *
3226 */
3227 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3228 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3229 {
3230 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3231
3232 gate_steal(gate);
3233 }
3234
3235 /*
3236 * Name: lck_mtx_gate_wait
3237 *
3238 * Description: Waits for the current thread to become the holder of the gate or for the
3239 * gate to become open. An interruptible mode and deadline can be specified
3240 * to return earlier from the wait.
3241 *
3242 * Args:
3243 * Arg1: lck_mtx_t lock used to protect the gate.
3244 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3245 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3246 * Arg3: interruptible flag for wait.
3247 * Arg4: deadline
3248 *
3249 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3250 * Lock will be dropped while waiting.
3251 * The gate must be closed.
3252 *
3253 * Returns: Reason why the thread was woken up.
3254 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3255 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3256 * to wake up possible waiters on the gate before returning to userspace.
3257 * GATE_OPENED - the gate was opened by the holder.
3258 * GATE_TIMED_OUT - the thread was woken up by a timeout.
3259 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
3260 */
3261 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3262 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3263 {
3264 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3265
3266 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3267 return gate_wait(gate,
3268 interruptible,
3269 deadline,
3270 ^{lck_mtx_unlock(lock);},
3271 ^{;});
3272 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3273 return gate_wait(gate,
3274 interruptible,
3275 deadline,
3276 ^{lck_mtx_unlock(lock);},
3277 ^{lck_mtx_lock_spin(lock);});
3278 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3279 return gate_wait(gate,
3280 interruptible,
3281 deadline,
3282 ^{lck_mtx_unlock(lock);},
3283 ^{lck_mtx_lock_spin_always(lock);});
3284 } else {
3285 return gate_wait(gate,
3286 interruptible,
3287 deadline,
3288 ^{lck_mtx_unlock(lock);},
3289 ^{lck_mtx_lock(lock);});
3290 }
3291 }
3292
3293 /*
3294 * Name: lck_mtx_gate_assert
3295 *
3296 * Description: asserts that the gate is in the specified state.
3297 *
3298 * Args:
3299 * Arg1: lck_mtx_t lock used to protect the gate.
3300 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3301 * Arg3: flags to specified assert type.
3302 * GATE_ASSERT_CLOSED - the gate is currently closed
3303 * GATE_ASSERT_OPEN - the gate is currently opened
3304 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3305 */
3306 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3307 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3308 {
3309 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3310
3311 gate_assert(gate, flags);
3312 }
3313
3314 #pragma mark - LCK_*_DECLARE support
3315
3316 __startup_func
3317 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3318 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3319 {
3320 lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3321 }
3322
3323 __startup_func
3324 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3325 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3326 {
3327 lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3328 }
3329
3330 __startup_func
3331 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3332 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3333 {
3334 lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3335 }
3336
3337 __startup_func
3338 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3339 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3340 {
3341 simple_lock_init(sp->lck, sp->lck_init_arg);
3342 }
3343
3344 __startup_func
3345 void
lck_ticket_startup_init(struct lck_ticket_startup_spec * sp)3346 lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3347 {
3348 lck_ticket_init(sp->lck, sp->lck_grp);
3349 }
3350