1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #define LOCK_PRIVATE 1
58
59 #include <mach_ldebug.h>
60 #include <debug.h>
61
62 #include <mach/kern_return.h>
63
64 #include <kern/locks_internal.h>
65 #include <kern/lock_stat.h>
66 #include <kern/locks.h>
67 #include <kern/misc_protos.h>
68 #include <kern/zalloc.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/sched_prim.h>
72 #include <kern/debug.h>
73 #include <libkern/section_keywords.h>
74 #if defined(__x86_64__)
75 #include <i386/tsc.h>
76 #include <i386/machine_routines.h>
77 #endif
78 #include <machine/atomic.h>
79 #include <machine/machine_cpu.h>
80 #include <string.h>
81 #include <vm/pmap.h>
82
83 #include <sys/kdebug.h>
84
85 #define LCK_MTX_SLEEP_CODE 0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
87 #define LCK_MTX_LCK_WAIT_CODE 2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
89
90 // Panic in tests that check lock usage correctness
91 // These are undesirable when in a panic or a debugger is runnning.
92 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93
94 #if MACH_LDEBUG
95 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96 #else
97 #define ALIGN_TEST(p, t) do{}while(0)
98 #endif
99
100 #define NOINLINE __attribute__((noinline))
101
102 #define ordered_load_hw(lock) os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103 #define ordered_store_hw(lock, value) os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104
105 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106
107 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109
110 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111
112 struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113
114 #if CONFIG_PV_TICKET
115 SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; /* used by waitq.py */
116 #endif
117
118 #if DEBUG
119 TUNABLE(uint32_t, LcksOpts, "lcks", LCK_OPTION_ENABLE_DEBUG);
120 #else
121 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
122 #endif
123
124 #if CONFIG_DTRACE
125 #if defined (__x86_64__)
126 machine_timeout_t dtrace_spin_threshold = 500; // 500ns
127 #elif defined(__arm64__)
128 MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129 0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130 #endif
131 #endif
132
133 struct lck_mcs PERCPU_DATA(lck_mcs);
134
135 __kdebug_only
136 uintptr_t
unslide_for_kdebug(const void * object)137 unslide_for_kdebug(const void* object)
138 {
139 if (__improbable(kdebug_enable)) {
140 return VM_KERNEL_UNSLIDE_OR_PERM(object);
141 } else {
142 return 0;
143 }
144 }
145
146 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)147 __lck_require_preemption_disabled_panic(void *lock)
148 {
149 panic("Attempt to take no-preempt lock %p in preemptible context", lock);
150 }
151
152 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)153 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
154 {
155 if (__improbable(!lock_preemption_disabled_for_thread(self))) {
156 __lck_require_preemption_disabled_panic(lock);
157 }
158 }
159
160 #pragma mark - HW Spin policies
161
162 /*
163 * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
164 */
165 __attribute__((always_inline))
166 hw_spin_timeout_t
hw_spin_compute_timeout(hw_spin_policy_t pol)167 hw_spin_compute_timeout(hw_spin_policy_t pol)
168 {
169 hw_spin_timeout_t ret = {
170 .hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
171 };
172
173 ret.hwst_timeout <<= pol->hwsp_timeout_shift;
174 #if SCHED_HYGIENE_DEBUG
175 ret.hwst_in_ppl = pmap_in_ppl();
176 /* Note we can't check if we are interruptible if in ppl */
177 ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
178 #endif /* SCHED_HYGIENE_DEBUG */
179
180 #if SCHED_HYGIENE_DEBUG
181 #ifndef KASAN
182 if (ret.hwst_timeout > 0 &&
183 !ret.hwst_in_ppl &&
184 !ret.hwst_interruptible &&
185 interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
186 uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
187
188 #if defined(__x86_64__)
189 int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
190 #endif
191 if (int_timeout < ret.hwst_timeout) {
192 ret.hwst_timeout = int_timeout;
193 }
194 }
195 #endif /* !KASAN */
196 #endif /* SCHED_HYGIENE_DEBUG */
197
198 return ret;
199 }
200
201 __attribute__((always_inline))
202 bool
hw_spin_in_ppl(hw_spin_timeout_t to)203 hw_spin_in_ppl(hw_spin_timeout_t to)
204 {
205 #if SCHED_HYGIENE_DEBUG
206 return to.hwst_in_ppl;
207 #else
208 (void)to;
209 return pmap_in_ppl();
210 #endif
211 }
212
213 bool
hw_spin_should_keep_spinning(void * lock,hw_spin_policy_t pol,hw_spin_timeout_t to,hw_spin_state_t * state)214 hw_spin_should_keep_spinning(
215 void *lock,
216 hw_spin_policy_t pol,
217 hw_spin_timeout_t to,
218 hw_spin_state_t *state)
219 {
220 hw_spin_timeout_status_t rc;
221 #if SCHED_HYGIENE_DEBUG
222 uint64_t irq_time = 0;
223 #endif
224 uint64_t now;
225
226 if (__improbable(to.hwst_timeout == 0)) {
227 return true;
228 }
229
230 now = ml_get_timebase();
231 if (__probable(now < state->hwss_deadline)) {
232 /* keep spinning */
233 return true;
234 }
235
236 #if SCHED_HYGIENE_DEBUG
237 if (to.hwst_interruptible) {
238 irq_time = current_thread()->machine.int_time_mt;
239 }
240 #endif /* SCHED_HYGIENE_DEBUG */
241
242 if (__probable(state->hwss_deadline == 0)) {
243 state->hwss_start = now;
244 state->hwss_deadline = now + to.hwst_timeout;
245 #if SCHED_HYGIENE_DEBUG
246 state->hwss_irq_start = irq_time;
247 #endif
248 return true;
249 }
250
251 /*
252 * Update fields that the callback needs
253 */
254 state->hwss_now = now;
255 #if SCHED_HYGIENE_DEBUG
256 state->hwss_irq_end = irq_time;
257 #endif /* SCHED_HYGIENE_DEBUG */
258
259 rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
260 to, *state);
261 if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
262 /* push the deadline */
263 state->hwss_deadline += to.hwst_timeout;
264 }
265 return rc == HW_LOCK_TIMEOUT_CONTINUE;
266 }
267
268 __attribute__((always_inline))
269 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)270 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
271 {
272 #if DEBUG || DEVELOPMENT
273 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
274 #else
275 (void)owner;
276 #endif
277 }
278
279 __attribute__((always_inline))
280 void
lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)281 lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
282 {
283 #if DEBUG || DEVELOPMENT
284 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
285 (uintptr_t)ctid_get_thread_unsafe(ctid);
286 #else
287 (void)ctid;
288 #endif
289 }
290
291 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)292 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
293 {
294 lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
295
296 if (owner < (1u << CTID_SIZE_BIT)) {
297 owner = (uintptr_t)ctid_get_thread_unsafe((uint32_t)owner);
298 } else {
299 /* strip possible bits used by the lock implementations */
300 owner &= ~0x7ul;
301 }
302
303 lsti->lock = lck;
304 lsti->owner_thread_cur = owner;
305 lsti->owner_cpu = ~0u;
306 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
307
308 if (owner == 0) {
309 /* if the owner isn't known, just bail */
310 goto out;
311 }
312
313 for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
314 cpu_data_t *data = cpu_datap(i);
315 if (data && (uintptr_t)data->cpu_active_thread == owner) {
316 lsti->owner_cpu = i;
317 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
318 #if __x86_64__
319 if ((uint32_t)cpu_number() != i) {
320 /* Cause NMI and panic on the owner's cpu */
321 NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
322 }
323 #endif
324 break;
325 }
326 }
327
328 out:
329 return lsti;
330 }
331
332 #pragma mark - HW locks
333
334 /*
335 * Routine: hw_lock_init
336 *
337 * Initialize a hardware lock.
338 */
339 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)340 hw_lock_init(hw_lock_t lock)
341 {
342 ordered_store_hw(lock, 0);
343 }
344
345 __result_use_check
346 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)347 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
348 {
349 #if OS_ATOMIC_USE_LLSC
350 uintptr_t oldval;
351 os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
352 if (oldval != 0) {
353 wait_for_event(); // clears the monitor so we don't need give_up()
354 return false;
355 }
356 });
357 return true;
358 #else // !OS_ATOMIC_USE_LLSC
359 #if OS_ATOMIC_HAS_LLSC
360 uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
361 if (oldval != 0) {
362 wait_for_event(); // clears the monitor so we don't need give_up()
363 return false;
364 }
365 #endif
366 return lock_cmpxchg(&lock->lock_data, 0, newval, acquire);
367 #endif // !OS_ATOMIC_USE_LLSC
368 }
369
370 __result_use_check
371 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)372 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
373 {
374 uint32_t mask = 1u << bit;
375
376 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
377 uint32_t oldval, newval;
378 os_atomic_rmw_loop(target, oldval, newval, acquire, {
379 newval = oldval | mask;
380 if (__improbable(oldval & mask)) {
381 #if OS_ATOMIC_HAS_LLSC
382 if (wait) {
383 wait_for_event(); // clears the monitor so we don't need give_up()
384 } else {
385 os_atomic_clear_exclusive();
386 }
387 #else
388 if (wait) {
389 cpu_pause();
390 }
391 #endif
392 return false;
393 }
394 });
395 return true;
396 #else
397 uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
398 if (__improbable(oldval & mask)) {
399 if (wait) {
400 wait_for_event(); // clears the monitor so we don't need give_up()
401 } else {
402 os_atomic_clear_exclusive();
403 }
404 return false;
405 }
406 return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
407 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
408 }
409
410 static hw_spin_timeout_status_t
hw_spin_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)411 hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
412 {
413 hw_lock_t lock = _lock;
414 uintptr_t owner = lock->lock_data & ~0x7ul;
415 lck_spinlock_to_info_t lsti;
416
417 if (!spinlock_timeout_panic) {
418 /* keep spinning rather than panicing */
419 return HW_LOCK_TIMEOUT_CONTINUE;
420 }
421
422 if (pmap_in_ppl()) {
423 /*
424 * This code is used by the PPL and can't write to globals.
425 */
426 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
427 "current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
428 lock, HW_SPIN_TIMEOUT_ARG(to, st),
429 (void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
430 }
431
432 // Capture the actual time spent blocked, which may be higher than the timeout
433 // if a misbehaving interrupt stole this thread's CPU time.
434 lsti = lck_spinlock_timeout_hit(lock, owner);
435 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
436 "current owner: %p (on cpu %d), "
437 #if DEBUG || DEVELOPMENT
438 "initial owner: %p, "
439 #endif /* DEBUG || DEVELOPMENT */
440 HW_SPIN_TIMEOUT_DETAILS_FMT,
441 lock, HW_SPIN_TIMEOUT_ARG(to, st),
442 (void *)lsti->owner_thread_cur, lsti->owner_cpu,
443 #if DEBUG || DEVELOPMENT
444 (void *)lsti->owner_thread_orig,
445 #endif /* DEBUG || DEVELOPMENT */
446 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
447 }
448
449 const struct hw_spin_policy hw_lock_spin_policy = {
450 .hwsp_name = "hw_lock_t",
451 .hwsp_timeout_atomic = &lock_panic_timeout,
452 .hwsp_op_timeout = hw_spin_timeout_panic,
453 };
454
455 static hw_spin_timeout_status_t
hw_spin_always_return(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)456 hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
457 {
458 #pragma unused(_lock, to, st)
459 return HW_LOCK_TIMEOUT_RETURN;
460 }
461
462 const struct hw_spin_policy hw_lock_spin_panic_policy = {
463 .hwsp_name = "hw_lock_t[panic]",
464 #if defined(__x86_64__)
465 .hwsp_timeout = &LockTimeOutTSC,
466 #else
467 .hwsp_timeout_atomic = &LockTimeOut,
468 #endif
469 .hwsp_timeout_shift = 2,
470 .hwsp_op_timeout = hw_spin_always_return,
471 };
472
473 #if DEBUG || DEVELOPMENT
474 static machine_timeout_t hw_lock_test_to;
475 const struct hw_spin_policy hw_lock_test_give_up_policy = {
476 .hwsp_name = "testing policy",
477 #if defined(__x86_64__)
478 .hwsp_timeout = &LockTimeOutTSC,
479 #else
480 .hwsp_timeout_atomic = &LockTimeOut,
481 #endif
482 .hwsp_timeout_shift = 2,
483 .hwsp_op_timeout = hw_spin_always_return,
484 };
485
486 __startup_func
487 static void
hw_lock_test_to_init(void)488 hw_lock_test_to_init(void)
489 {
490 uint64_t timeout;
491
492 nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &timeout);
493 #if defined(__x86_64__)
494 timeout = tmrCvt(timeout, tscFCvtn2t);
495 #endif
496 os_atomic_init(&hw_lock_test_to, timeout);
497 }
498 STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
499 #endif
500
501 static hw_spin_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)502 hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
503 {
504 hw_lock_bit_t *lock = _lock;
505
506 if (!spinlock_timeout_panic) {
507 /* keep spinning rather than panicing */
508 return HW_LOCK_TIMEOUT_CONTINUE;
509 }
510
511 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
512 "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
513 lock, HW_SPIN_TIMEOUT_ARG(to, st),
514 *lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
515 }
516
517 static const struct hw_spin_policy hw_lock_bit_policy = {
518 .hwsp_name = "hw_lock_bit_t",
519 .hwsp_timeout_atomic = &lock_panic_timeout,
520 .hwsp_op_timeout = hw_lock_bit_timeout_panic,
521 };
522
523 #if __arm64__
524 const uint64_t hw_lock_bit_timeout_2s = 0x3000000;
525 const struct hw_spin_policy hw_lock_bit_policy_2s = {
526 .hwsp_name = "hw_lock_bit_t",
527 .hwsp_timeout = &hw_lock_bit_timeout_2s,
528 .hwsp_op_timeout = hw_lock_bit_timeout_panic,
529 };
530 #endif
531
532 /*
533 * Routine: hw_lock_lock_contended
534 *
535 * Spin until lock is acquired or timeout expires.
536 * timeout is in mach_absolute_time ticks. Called with
537 * preemption disabled.
538 */
539 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,uintptr_t data,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))540 hw_lock_lock_contended(
541 hw_lock_t lock,
542 uintptr_t data,
543 hw_spin_policy_t pol
544 LCK_GRP_ARG(lck_grp_t *grp))
545 {
546 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
547 hw_spin_state_t state = { };
548 hw_lock_status_t rc = HW_LOCK_CONTENDED;
549
550 if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
551 HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
552 panic("hwlock: thread %p is trying to lock %p recursively",
553 HW_LOCK_STATE_TO_THREAD(data), lock);
554 }
555
556 #if CONFIG_DTRACE || LOCK_STATS
557 uint64_t begin = 0;
558 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
559
560 if (__improbable(stat_enabled)) {
561 begin = mach_absolute_time();
562 }
563 #endif /* CONFIG_DTRACE || LOCK_STATS */
564
565 if (!hw_spin_in_ppl(to)) {
566 /*
567 * This code is used by the PPL and can't write to globals.
568 */
569 lck_spinlock_timeout_set_orig_owner(lock->lock_data);
570 }
571
572 do {
573 for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
574 cpu_pause();
575 if (hw_lock_trylock_contended(lock, data)) {
576 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
577 rc = HW_LOCK_ACQUIRED;
578 goto end;
579 }
580 }
581 } while (hw_spin_should_keep_spinning(lock, pol, to, &state));
582
583 end:
584 #if CONFIG_DTRACE || LOCK_STATS
585 if (__improbable(stat_enabled)) {
586 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
587 mach_absolute_time() - begin);
588 }
589 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
590 #endif /* CONFIG_DTRACE || LOCK_STATS */
591 return rc;
592 }
593
594 static hw_spin_timeout_status_t
hw_wait_while_equals32_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)595 hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
596 {
597 uint32_t *address = _lock;
598
599 if (!spinlock_timeout_panic) {
600 /* keep spinning rather than panicing */
601 return HW_LOCK_TIMEOUT_CONTINUE;
602 }
603
604 panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
605 "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
606 address, HW_SPIN_TIMEOUT_ARG(to, st),
607 *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
608 }
609
610 static const struct hw_spin_policy hw_wait_while_equals32_policy = {
611 .hwsp_name = "hw_wait_while_equals32",
612 .hwsp_timeout_atomic = &lock_panic_timeout,
613 .hwsp_op_timeout = hw_wait_while_equals32_panic,
614 };
615
616 static hw_spin_timeout_status_t
hw_wait_while_equals64_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)617 hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
618 {
619 uint64_t *address = _lock;
620
621 if (!spinlock_timeout_panic) {
622 /* keep spinning rather than panicing */
623 return HW_LOCK_TIMEOUT_CONTINUE;
624 }
625
626 panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
627 "current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
628 address, HW_SPIN_TIMEOUT_ARG(to, st),
629 *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
630 }
631
632 static const struct hw_spin_policy hw_wait_while_equals64_policy = {
633 .hwsp_name = "hw_wait_while_equals64",
634 .hwsp_timeout_atomic = &lock_panic_timeout,
635 .hwsp_op_timeout = hw_wait_while_equals64_panic,
636 };
637
638 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)639 hw_wait_while_equals32(uint32_t *address, uint32_t current)
640 {
641 hw_spin_policy_t pol = &hw_wait_while_equals32_policy;
642 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
643 hw_spin_state_t state = { };
644 uint32_t v;
645
646 while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
647 hw_spin_should_keep_spinning(address, pol, to, &state);
648 }
649
650 return v;
651 }
652
653 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)654 hw_wait_while_equals64(uint64_t *address, uint64_t current)
655 {
656 hw_spin_policy_t pol = &hw_wait_while_equals64_policy;
657 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
658 hw_spin_state_t state = { };
659 uint64_t v;
660
661 while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
662 hw_spin_should_keep_spinning(address, pol, to, &state);
663 }
664
665 return v;
666 }
667
668 __result_use_check
669 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))670 hw_lock_to_internal(
671 hw_lock_t lock,
672 thread_t thread,
673 hw_spin_policy_t pol
674 LCK_GRP_ARG(lck_grp_t *grp))
675 {
676 uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
677
678 if (__probable(hw_lock_trylock_contended(lock, state))) {
679 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
680 return HW_LOCK_ACQUIRED;
681 }
682
683 return hw_lock_lock_contended(lock, state, pol LCK_GRP_ARG(grp));
684 }
685
686 /*
687 * Routine: hw_lock_lock
688 *
689 * Acquire lock, spinning until it becomes available,
690 * return with preemption disabled.
691 */
692 void
693 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
694 {
695 thread_t thread = current_thread();
696 lock_disable_preemption_for_thread(thread);
697 (void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
698 LCK_GRP_ARG(grp));
699 }
700
701 /*
702 * Routine: hw_lock_lock_nopreempt
703 *
704 * Acquire lock, spinning until it becomes available.
705 */
706 void
707 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
708 {
709 thread_t thread = current_thread();
710 __lck_require_preemption_disabled(lock, thread);
711 (void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
712 LCK_GRP_ARG(grp));
713 }
714
715 /*
716 * Routine: hw_lock_to
717 *
718 * Acquire lock, spinning until it becomes available or timeout.
719 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
720 * preemption disabled.
721 */
722 unsigned
723 int
724 (hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
725 {
726 thread_t thread = current_thread();
727 lock_disable_preemption_for_thread(thread);
728 return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
729 }
730
731 /*
732 * Routine: hw_lock_to_nopreempt
733 *
734 * Acquire lock, spinning until it becomes available or timeout.
735 * Timeout is in mach_absolute_time ticks, called and return with
736 * preemption disabled.
737 */
738 unsigned
739 int
740 (hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
741 {
742 thread_t thread = current_thread();
743 __lck_require_preemption_disabled(lock, thread);
744 return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
745 }
746
747 __result_use_check
748 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))749 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
750 {
751 if (__probable(lock_cmpxchg(&lock->lock_data, 0,
752 HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
753 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
754 return true;
755 }
756 return false;
757 }
758
759 /*
760 * Routine: hw_lock_try
761 *
762 * returns with preemption disabled on success.
763 */
764 unsigned
765 int
766 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
767 {
768 thread_t thread = current_thread();
769 lock_disable_preemption_for_thread(thread);
770 unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
771 if (!success) {
772 lock_enable_preemption();
773 }
774 return success;
775 }
776
777 unsigned
778 int
779 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
780 {
781 thread_t thread = current_thread();
782 __lck_require_preemption_disabled(lock, thread);
783 return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
784 }
785
786 #if DEBUG || DEVELOPMENT
787 __abortlike
788 static void
__hw_lock_unlock_unowned_panic(hw_lock_t lock)789 __hw_lock_unlock_unowned_panic(hw_lock_t lock)
790 {
791 panic("hwlock: thread %p is trying to lock %p recursively",
792 current_thread(), lock);
793 }
794 #endif /* DEBUG || DEVELOPMENT */
795
796 /*
797 * Routine: hw_lock_unlock
798 *
799 * Unconditionally release lock, release preemption level.
800 */
801 static inline void
hw_lock_unlock_internal(hw_lock_t lock)802 hw_lock_unlock_internal(hw_lock_t lock)
803 {
804 #if DEBUG || DEVELOPMENT
805 if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
806 LOCK_CORRECTNESS_PANIC()) {
807 __hw_lock_unlock_unowned_panic(lock);
808 }
809 #endif /* DEBUG || DEVELOPMENT */
810
811 os_atomic_store(&lock->lock_data, 0, release);
812 #if CONFIG_DTRACE
813 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
814 #endif /* CONFIG_DTRACE */
815 }
816
817 void
818 (hw_lock_unlock)(hw_lock_t lock)
819 {
820 hw_lock_unlock_internal(lock);
821 lock_enable_preemption();
822 }
823
824 void
825 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
826 {
827 hw_lock_unlock_internal(lock);
828 }
829
830 void
hw_lock_assert(__assert_only hw_lock_t lock,__assert_only unsigned int type)831 hw_lock_assert(__assert_only hw_lock_t lock, __assert_only unsigned int type)
832 {
833 #if MACH_ASSERT
834 thread_t thread, holder;
835
836 holder = HW_LOCK_STATE_TO_THREAD(lock->lock_data);
837 thread = current_thread();
838
839 if (type == LCK_ASSERT_OWNED) {
840 if (holder == 0) {
841 panic("Lock not owned %p = %p", lock, holder);
842 }
843 if (holder != thread) {
844 panic("Lock not owned by current thread %p = %p", lock, holder);
845 }
846 } else if (type == LCK_ASSERT_NOTOWNED) {
847 if (holder != THREAD_NULL && holder == thread) {
848 panic("Lock owned by current thread %p = %p", lock, holder);
849 }
850 } else {
851 panic("hw_lock_assert(): invalid arg (%u)", type);
852 }
853 #endif /* MACH_ASSERT */
854 }
855
856 /*
857 * Routine hw_lock_held, doesn't change preemption state.
858 * N.B. Racy, of course.
859 */
860 unsigned int
hw_lock_held(hw_lock_t lock)861 hw_lock_held(hw_lock_t lock)
862 {
863 return ordered_load_hw(lock) != 0;
864 }
865
866 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))867 hw_lock_bit_to_contended(
868 hw_lock_bit_t *lock,
869 uint32_t bit,
870 hw_spin_policy_t pol
871 LCK_GRP_ARG(lck_grp_t *grp))
872 {
873 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
874 hw_spin_state_t state = { };
875 hw_lock_status_t rc = HW_LOCK_CONTENDED;
876
877 #if CONFIG_DTRACE || LOCK_STATS
878 uint64_t begin = 0;
879 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
880
881 if (__improbable(stat_enabled)) {
882 begin = mach_absolute_time();
883 }
884 #endif /* LOCK_STATS || CONFIG_DTRACE */
885
886 do {
887 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
888 rc = hw_lock_trylock_bit(lock, bit, true);
889
890 if (rc == HW_LOCK_ACQUIRED) {
891 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
892 goto end;
893 }
894 }
895
896 assert(rc == HW_LOCK_CONTENDED);
897 } while (hw_spin_should_keep_spinning(lock, pol, to, &state));
898
899 end:
900 #if CONFIG_DTRACE || LOCK_STATS
901 if (__improbable(stat_enabled)) {
902 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
903 mach_absolute_time() - begin);
904 }
905 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
906 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
907 return rc;
908 }
909
910 __result_use_check
911 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))912 hw_lock_bit_to_internal(
913 hw_lock_bit_t *lock,
914 unsigned int bit,
915 hw_spin_policy_t pol
916 LCK_GRP_ARG(lck_grp_t *grp))
917 {
918 if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
919 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
920 return HW_LOCK_ACQUIRED;
921 }
922
923 return (unsigned)hw_lock_bit_to_contended(lock, bit, pol LCK_GRP_ARG(grp));
924 }
925
926 /*
927 * Routine: hw_lock_bit_to
928 *
929 * Acquire bit lock, spinning until it becomes available or timeout.
930 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
931 * preemption disabled.
932 */
933 unsigned
934 int
935 (hw_lock_bit_to)(
936 hw_lock_bit_t * lock,
937 uint32_t bit,
938 hw_spin_policy_t pol
939 LCK_GRP_ARG(lck_grp_t *grp))
940 {
941 _disable_preemption();
942 return hw_lock_bit_to_internal(lock, bit, pol LCK_GRP_ARG(grp));
943 }
944
945 /*
946 * Routine: hw_lock_bit
947 *
948 * Acquire bit lock, spinning until it becomes available,
949 * return with preemption disabled.
950 */
951 void
952 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
953 {
954 _disable_preemption();
955 (void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
956 }
957
958 /*
959 * Routine: hw_lock_bit_nopreempt
960 *
961 * Acquire bit lock, spinning until it becomes available.
962 */
963 void
964 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
965 {
966 __lck_require_preemption_disabled(lock, current_thread());
967 (void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
968 }
969
970
971 unsigned
972 int
973 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
974 {
975 boolean_t success = false;
976
977 _disable_preemption();
978 success = hw_lock_trylock_bit(lock, bit, false);
979 if (!success) {
980 lock_enable_preemption();
981 }
982
983 if (success) {
984 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
985 }
986
987 return success;
988 }
989
990 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)991 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
992 {
993 os_atomic_andnot(lock, 1u << bit, release);
994 #if CONFIG_DTRACE
995 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
996 #endif
997 }
998
999 /*
1000 * Routine: hw_unlock_bit
1001 *
1002 * Release spin-lock. The second parameter is the bit number to test and set.
1003 * Decrement the preemption level.
1004 */
1005 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)1006 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1007 {
1008 hw_unlock_bit_internal(lock, bit);
1009 lock_enable_preemption();
1010 }
1011
1012 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)1013 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1014 {
1015 __lck_require_preemption_disabled(lock, current_thread());
1016 hw_unlock_bit_internal(lock, bit);
1017 }
1018
1019
1020 #pragma mark - lck_*_sleep
1021
1022 /*
1023 * Routine: lck_spin_sleep
1024 */
1025 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1026 lck_spin_sleep_grp(
1027 lck_spin_t *lck,
1028 lck_sleep_action_t lck_sleep_action,
1029 event_t event,
1030 wait_interrupt_t interruptible,
1031 lck_grp_t *grp)
1032 {
1033 wait_result_t res;
1034
1035 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1036 panic("Invalid lock sleep action %x", lck_sleep_action);
1037 }
1038
1039 res = assert_wait(event, interruptible);
1040 if (res == THREAD_WAITING) {
1041 lck_spin_unlock(lck);
1042 res = thread_block(THREAD_CONTINUE_NULL);
1043 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1044 lck_spin_lock_grp(lck, grp);
1045 }
1046 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1047 lck_spin_unlock(lck);
1048 }
1049
1050 return res;
1051 }
1052
1053 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1054 lck_spin_sleep(
1055 lck_spin_t *lck,
1056 lck_sleep_action_t lck_sleep_action,
1057 event_t event,
1058 wait_interrupt_t interruptible)
1059 {
1060 return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1061 }
1062
1063 /*
1064 * Routine: lck_spin_sleep_deadline
1065 */
1066 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1067 lck_spin_sleep_deadline(
1068 lck_spin_t *lck,
1069 lck_sleep_action_t lck_sleep_action,
1070 event_t event,
1071 wait_interrupt_t interruptible,
1072 uint64_t deadline)
1073 {
1074 wait_result_t res;
1075
1076 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1077 panic("Invalid lock sleep action %x", lck_sleep_action);
1078 }
1079
1080 res = assert_wait_deadline(event, interruptible, deadline);
1081 if (res == THREAD_WAITING) {
1082 lck_spin_unlock(lck);
1083 res = thread_block(THREAD_CONTINUE_NULL);
1084 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1085 lck_spin_lock(lck);
1086 }
1087 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1088 lck_spin_unlock(lck);
1089 }
1090
1091 return res;
1092 }
1093
1094 /*
1095 * Routine: lck_mtx_sleep
1096 */
1097 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1098 lck_mtx_sleep(
1099 lck_mtx_t *lck,
1100 lck_sleep_action_t lck_sleep_action,
1101 event_t event,
1102 wait_interrupt_t interruptible)
1103 {
1104 wait_result_t res;
1105 thread_pri_floor_t token;
1106
1107 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1108 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1109
1110 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1111 panic("Invalid lock sleep action %x", lck_sleep_action);
1112 }
1113
1114 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1115 /*
1116 * We get a priority floor
1117 * during the time that this thread is asleep, so that when it
1118 * is re-awakened (and not yet contending on the mutex), it is
1119 * runnable at a reasonably high priority.
1120 */
1121 token = thread_priority_floor_start();
1122 }
1123
1124 res = assert_wait(event, interruptible);
1125 if (res == THREAD_WAITING) {
1126 lck_mtx_unlock(lck);
1127 res = thread_block(THREAD_CONTINUE_NULL);
1128 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1129 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1130 lck_mtx_lock_spin(lck);
1131 } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1132 lck_mtx_lock_spin_always(lck);
1133 } else {
1134 lck_mtx_lock(lck);
1135 }
1136 }
1137 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1138 lck_mtx_unlock(lck);
1139 }
1140
1141 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1142 thread_priority_floor_end(&token);
1143 }
1144
1145 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1146
1147 return res;
1148 }
1149
1150
1151 /*
1152 * Routine: lck_mtx_sleep_deadline
1153 */
1154 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1155 lck_mtx_sleep_deadline(
1156 lck_mtx_t *lck,
1157 lck_sleep_action_t lck_sleep_action,
1158 event_t event,
1159 wait_interrupt_t interruptible,
1160 uint64_t deadline)
1161 {
1162 wait_result_t res;
1163 thread_pri_floor_t token;
1164
1165 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1166 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1167
1168 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1169 panic("Invalid lock sleep action %x", lck_sleep_action);
1170 }
1171
1172 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1173 /*
1174 * See lck_mtx_sleep().
1175 */
1176 token = thread_priority_floor_start();
1177 }
1178
1179 res = assert_wait_deadline(event, interruptible, deadline);
1180 if (res == THREAD_WAITING) {
1181 lck_mtx_unlock(lck);
1182 res = thread_block(THREAD_CONTINUE_NULL);
1183 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1184 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1185 lck_mtx_lock_spin(lck);
1186 } else {
1187 lck_mtx_lock(lck);
1188 }
1189 }
1190 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1191 lck_mtx_unlock(lck);
1192 }
1193
1194 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1195 thread_priority_floor_end(&token);
1196 }
1197
1198 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1199
1200 return res;
1201 }
1202
1203 /*
1204 * sleep_with_inheritor and wakeup_with_inheritor KPI
1205 *
1206 * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1207 * the latest thread specified as inheritor.
1208 *
1209 * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1210 * direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1211 * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1212 *
1213 * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1214 *
1215 * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1216 * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1217 * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1218 * invoking any turnstile operation.
1219 *
1220 * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1221 * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1222 * is instantiated for this KPI to manage the hash without interrupt disabled.
1223 * Also:
1224 * - all events on the system that hash on the same bucket will contend on the same spinlock.
1225 * - every event will have a dedicated wait_queue.
1226 *
1227 * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1228 * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1229 */
1230
1231 static kern_return_t
wakeup_with_inheritor_and_turnstile(event_t event,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1232 wakeup_with_inheritor_and_turnstile(
1233 event_t event,
1234 wait_result_t result,
1235 bool wake_one,
1236 lck_wake_action_t action,
1237 thread_t *thread_wokenup)
1238 {
1239 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1240 uint32_t index;
1241 struct turnstile *ts = NULL;
1242 kern_return_t ret = KERN_NOT_WAITING;
1243
1244 /*
1245 * the hash bucket spinlock is used as turnstile interlock
1246 */
1247 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1248
1249 ts = turnstile_prepare_hash((uintptr_t)event, type);
1250
1251 if (wake_one) {
1252 waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1253
1254 if (action == LCK_WAKE_DEFAULT) {
1255 flags = WAITQ_UPDATE_INHERITOR;
1256 } else {
1257 assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1258 }
1259
1260 /*
1261 * WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1262 * if it finds a thread
1263 */
1264 if (thread_wokenup) {
1265 thread_t wokeup;
1266
1267 wokeup = waitq_wakeup64_identify(&ts->ts_waitq,
1268 CAST_EVENT64_T(event), result, flags);
1269 *thread_wokenup = wokeup;
1270 ret = wokeup ? KERN_SUCCESS : KERN_NOT_WAITING;
1271 } else {
1272 ret = waitq_wakeup64_one(&ts->ts_waitq,
1273 CAST_EVENT64_T(event), result, flags);
1274 }
1275 if (ret == KERN_SUCCESS && action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1276 goto complete;
1277 }
1278 if (ret == KERN_NOT_WAITING) {
1279 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
1280 TURNSTILE_IMMEDIATE_UPDATE);
1281 }
1282 } else {
1283 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event),
1284 result, WAITQ_UPDATE_INHERITOR);
1285 }
1286
1287 /*
1288 * turnstile_update_inheritor_complete could be called while holding the interlock.
1289 * In this case the new inheritor or is null, or is a thread that is just been woken up
1290 * and have not blocked because it is racing with the same interlock used here
1291 * after the wait.
1292 * So there is no chain to update for the new inheritor.
1293 *
1294 * However unless the current thread is the old inheritor,
1295 * old inheritor can be blocked and requires a chain update.
1296 *
1297 * The chain should be short because kernel turnstiles cannot have user turnstiles
1298 * chained after them.
1299 *
1300 * We can anyway optimize this by asking turnstile to tell us
1301 * if old inheritor needs an update and drop the lock
1302 * just in that case.
1303 */
1304 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1305
1306 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1307
1308 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1309
1310 complete:
1311 turnstile_complete_hash((uintptr_t)event, type);
1312
1313 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1314
1315 turnstile_cleanup();
1316
1317 return ret;
1318 }
1319
1320 static wait_result_t
1321 sleep_with_inheritor_and_turnstile(
1322 event_t event,
1323 thread_t inheritor,
1324 wait_interrupt_t interruptible,
1325 uint64_t deadline,
1326 void (^primitive_lock)(void),
1327 void (^primitive_unlock)(void))
1328 {
1329 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1330 wait_result_t ret;
1331 uint32_t index;
1332 struct turnstile *ts = NULL;
1333
1334 /*
1335 * the hash bucket spinlock is used as turnstile interlock,
1336 * lock it before releasing the primitive lock
1337 */
1338 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1339
1340 primitive_unlock();
1341
1342 ts = turnstile_prepare_hash((uintptr_t)event, type);
1343
1344 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1345 /*
1346 * We need TURNSTILE_DELAYED_UPDATE because we will call
1347 * waitq_assert_wait64 after.
1348 */
1349 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1350
1351 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1352
1353 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1354
1355 /*
1356 * Update new and old inheritor chains outside the interlock;
1357 */
1358 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1359
1360 if (ret == THREAD_WAITING) {
1361 ret = thread_block(THREAD_CONTINUE_NULL);
1362 }
1363
1364 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1365
1366 turnstile_complete_hash((uintptr_t)event, type);
1367
1368 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1369
1370 turnstile_cleanup();
1371
1372 primitive_lock();
1373
1374 return ret;
1375 }
1376
1377 /*
1378 * change_sleep_inheritor is independent from the locking primitive.
1379 */
1380
1381 /*
1382 * Name: change_sleep_inheritor
1383 *
1384 * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1385 *
1386 * Args:
1387 * Arg1: event to redirect the push.
1388 * Arg2: new inheritor for event.
1389 *
1390 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1391 *
1392 * Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1393 * wakeup for the event is called.
1394 * NOTE: this cannot be called from interrupt context.
1395 */
1396 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1397 change_sleep_inheritor(event_t event, thread_t inheritor)
1398 {
1399 uint32_t index;
1400 struct turnstile *ts = NULL;
1401 kern_return_t ret = KERN_SUCCESS;
1402 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1403
1404 /*
1405 * the hash bucket spinlock is used as turnstile interlock
1406 */
1407 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1408
1409 ts = turnstile_prepare_hash((uintptr_t)event, type);
1410
1411 if (!turnstile_has_waiters(ts)) {
1412 ret = KERN_NOT_WAITING;
1413 }
1414
1415 /*
1416 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1417 */
1418 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1419
1420 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1421
1422 /*
1423 * update the chains outside the interlock
1424 */
1425 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1426
1427 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1428
1429 turnstile_complete_hash((uintptr_t)event, type);
1430
1431 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1432
1433 turnstile_cleanup();
1434
1435 return ret;
1436 }
1437
1438 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1439 lck_spin_sleep_with_inheritor(
1440 lck_spin_t *lock,
1441 lck_sleep_action_t lck_sleep_action,
1442 event_t event,
1443 thread_t inheritor,
1444 wait_interrupt_t interruptible,
1445 uint64_t deadline)
1446 {
1447 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1448 return sleep_with_inheritor_and_turnstile(event, inheritor,
1449 interruptible, deadline,
1450 ^{}, ^{ lck_spin_unlock(lock); });
1451 } else {
1452 return sleep_with_inheritor_and_turnstile(event, inheritor,
1453 interruptible, deadline,
1454 ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1455 }
1456 }
1457
1458 wait_result_t
hw_lck_ticket_sleep_with_inheritor(hw_lck_ticket_t * lock,lck_grp_t * grp __unused,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1459 hw_lck_ticket_sleep_with_inheritor(
1460 hw_lck_ticket_t *lock,
1461 lck_grp_t *grp __unused,
1462 lck_sleep_action_t lck_sleep_action,
1463 event_t event,
1464 thread_t inheritor,
1465 wait_interrupt_t interruptible,
1466 uint64_t deadline)
1467 {
1468 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1469 return sleep_with_inheritor_and_turnstile(event, inheritor,
1470 interruptible, deadline,
1471 ^{}, ^{ hw_lck_ticket_unlock(lock); });
1472 } else {
1473 return sleep_with_inheritor_and_turnstile(event, inheritor,
1474 interruptible, deadline,
1475 ^{ hw_lck_ticket_lock(lock, grp); }, ^{ hw_lck_ticket_unlock(lock); });
1476 }
1477 }
1478
1479 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1480 lck_ticket_sleep_with_inheritor(
1481 lck_ticket_t *lock,
1482 lck_grp_t *grp,
1483 lck_sleep_action_t lck_sleep_action,
1484 event_t event,
1485 thread_t inheritor,
1486 wait_interrupt_t interruptible,
1487 uint64_t deadline)
1488 {
1489 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1490 return sleep_with_inheritor_and_turnstile(event, inheritor,
1491 interruptible, deadline,
1492 ^{}, ^{ lck_ticket_unlock(lock); });
1493 } else {
1494 return sleep_with_inheritor_and_turnstile(event, inheritor,
1495 interruptible, deadline,
1496 ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1497 }
1498 }
1499
1500 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1501 lck_mtx_sleep_with_inheritor(
1502 lck_mtx_t *lock,
1503 lck_sleep_action_t lck_sleep_action,
1504 event_t event,
1505 thread_t inheritor,
1506 wait_interrupt_t interruptible,
1507 uint64_t deadline)
1508 {
1509 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1510
1511 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1512 return sleep_with_inheritor_and_turnstile(event,
1513 inheritor,
1514 interruptible,
1515 deadline,
1516 ^{;},
1517 ^{lck_mtx_unlock(lock);});
1518 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1519 return sleep_with_inheritor_and_turnstile(event,
1520 inheritor,
1521 interruptible,
1522 deadline,
1523 ^{lck_mtx_lock_spin(lock);},
1524 ^{lck_mtx_unlock(lock);});
1525 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1526 return sleep_with_inheritor_and_turnstile(event,
1527 inheritor,
1528 interruptible,
1529 deadline,
1530 ^{lck_mtx_lock_spin_always(lock);},
1531 ^{lck_mtx_unlock(lock);});
1532 } else {
1533 return sleep_with_inheritor_and_turnstile(event,
1534 inheritor,
1535 interruptible,
1536 deadline,
1537 ^{lck_mtx_lock(lock);},
1538 ^{lck_mtx_unlock(lock);});
1539 }
1540 }
1541
1542 /*
1543 * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1544 */
1545
1546 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1547 lck_rw_sleep_with_inheritor(
1548 lck_rw_t *lock,
1549 lck_sleep_action_t lck_sleep_action,
1550 event_t event,
1551 thread_t inheritor,
1552 wait_interrupt_t interruptible,
1553 uint64_t deadline)
1554 {
1555 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1556
1557 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1558
1559 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1560 return sleep_with_inheritor_and_turnstile(event,
1561 inheritor,
1562 interruptible,
1563 deadline,
1564 ^{;},
1565 ^{lck_rw_type = lck_rw_done(lock);});
1566 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1567 return sleep_with_inheritor_and_turnstile(event,
1568 inheritor,
1569 interruptible,
1570 deadline,
1571 ^{lck_rw_lock(lock, lck_rw_type);},
1572 ^{lck_rw_type = lck_rw_done(lock);});
1573 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1574 return sleep_with_inheritor_and_turnstile(event,
1575 inheritor,
1576 interruptible,
1577 deadline,
1578 ^{lck_rw_lock_exclusive(lock);},
1579 ^{lck_rw_type = lck_rw_done(lock);});
1580 } else {
1581 return sleep_with_inheritor_and_turnstile(event,
1582 inheritor,
1583 interruptible,
1584 deadline,
1585 ^{lck_rw_lock_shared(lock);},
1586 ^{lck_rw_type = lck_rw_done(lock);});
1587 }
1588 }
1589
1590 /*
1591 * wakeup_with_inheritor functions are independent from the locking primitive.
1592 */
1593
1594 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1595 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1596 {
1597 return wakeup_with_inheritor_and_turnstile(event,
1598 result,
1599 TRUE,
1600 action,
1601 thread_wokenup);
1602 }
1603
1604 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1605 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1606 {
1607 return wakeup_with_inheritor_and_turnstile(event,
1608 result,
1609 FALSE,
1610 0,
1611 NULL);
1612 }
1613
1614 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1615 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1616 {
1617 assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1618 assert(waitq_type(waitq) == WQT_TURNSTILE);
1619 waitinfo->owner = 0;
1620 waitinfo->context = 0;
1621
1622 if (waitq_held(waitq)) {
1623 return;
1624 }
1625
1626 struct turnstile *turnstile = waitq_to_turnstile(waitq);
1627 assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1628 waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1629 }
1630
1631 static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1632 static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1633 static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1634
1635 static wait_result_t
1636 cond_sleep_with_inheritor_and_turnstile_type(
1637 cond_swi_var_t cond,
1638 bool (^cond_sleep_check)(ctid_t*),
1639 wait_interrupt_t interruptible,
1640 uint64_t deadline,
1641 turnstile_type_t type)
1642 {
1643 wait_result_t ret;
1644 uint32_t index;
1645 struct turnstile *ts = NULL;
1646 ctid_t ctid = 0;
1647 thread_t inheritor;
1648
1649 /*
1650 * the hash bucket spinlock is used as turnstile interlock,
1651 * lock it before checking the sleep condition
1652 */
1653 turnstile_hash_bucket_lock((uintptr_t)cond, &index, type);
1654
1655 /*
1656 * In case the sleep check succeeds, the block will
1657 * provide us the ctid observed on the variable.
1658 */
1659 if (!cond_sleep_check(&ctid)) {
1660 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1661 return THREAD_NOT_WAITING;
1662 }
1663
1664 /*
1665 * We can translate the ctid to a thread_t only
1666 * if cond_sleep_check succeded.
1667 */
1668 inheritor = ctid_get_thread(ctid);
1669 assert(inheritor != NULL);
1670
1671 ts = turnstile_prepare_hash((uintptr_t)cond, type);
1672
1673 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1674 /*
1675 * We need TURNSTILE_DELAYED_UPDATE because we will call
1676 * waitq_assert_wait64 after.
1677 */
1678 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1679
1680 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1681
1682 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1683
1684 /*
1685 * Update new and old inheritor chains outside the interlock;
1686 */
1687 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1688 if (ret == THREAD_WAITING) {
1689 ret = thread_block(THREAD_CONTINUE_NULL);
1690 }
1691
1692 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1693
1694 turnstile_complete_hash((uintptr_t)cond, type);
1695
1696 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1697
1698 turnstile_cleanup();
1699 return ret;
1700 }
1701
1702 /*
1703 * Name: cond_sleep_with_inheritor32_mask
1704 *
1705 * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1706 * Allows a thread to conditionally sleep while indicating which thread should
1707 * inherit the priority push associated with the condition.
1708 * The condition should be expressed through a cond_swi_var32_s pointer.
1709 * The condition needs to be populated by the caller with the ctid of the
1710 * thread that should inherit the push. The remaining bits of the condition
1711 * can be used by the caller to implement its own synchronization logic.
1712 * A copy of the condition value observed by the caller when it decided to call
1713 * this function should be provided to prevent races with matching wakeups.
1714 * This function will atomically check the value stored in the condition against
1715 * the expected/observed one provided only for the bits that are set in the mask.
1716 * If the check doesn't pass the thread will not sleep and the function will return.
1717 * The ctid provided in the condition will be used only after a successful
1718 * check.
1719 *
1720 * Args:
1721 * Arg1: cond_swi_var32_s pointer that stores the condition to check.
1722 * Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1723 * Arg3: mask to apply to the condition to check.
1724 * Arg4: interruptible flag for wait.
1725 * Arg5: deadline for wait.
1726 *
1727 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1728 * wakeup for the cond is called.
1729 *
1730 * Returns: result of the wait.
1731 */
1732 static wait_result_t
cond_sleep_with_inheritor32_mask(cond_swi_var_t cond,cond_swi_var32_s expected_cond,uint32_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1733 cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1734 {
1735 bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1736 cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1737 bool ret;
1738 if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1739 ret = true;
1740 *ctid = cond_val.cond32_owner;
1741 } else {
1742 ret = false;
1743 }
1744 return ret;
1745 };
1746
1747 return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1748 }
1749
1750 /*
1751 * Name: cond_sleep_with_inheritor64_mask
1752 *
1753 * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1754 * Allows a thread to conditionally sleep while indicating which thread should
1755 * inherit the priority push associated with the condition.
1756 * The condition should be expressed through a cond_swi_var64_s pointer.
1757 * The condition needs to be populated by the caller with the ctid of the
1758 * thread that should inherit the push. The remaining bits of the condition
1759 * can be used by the caller to implement its own synchronization logic.
1760 * A copy of the condition value observed by the caller when it decided to call
1761 * this function should be provided to prevent races with matching wakeups.
1762 * This function will atomically check the value stored in the condition against
1763 * the expected/observed one provided only for the bits that are set in the mask.
1764 * If the check doesn't pass the thread will not sleep and the function will return.
1765 * The ctid provided in the condition will be used only after a successful
1766 * check.
1767 *
1768 * Args:
1769 * Arg1: cond_swi_var64_s pointer that stores the condition to check.
1770 * Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1771 * Arg3: mask to apply to the condition to check.
1772 * Arg4: interruptible flag for wait.
1773 * Arg5: deadline for wait.
1774 *
1775 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1776 * wakeup for the cond is called.
1777 *
1778 * Returns: result of the wait.
1779 */
1780 wait_result_t
cond_sleep_with_inheritor64_mask(cond_swi_var_t cond,cond_swi_var64_s expected_cond,uint64_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1781 cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1782 {
1783 bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1784 cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1785 bool ret;
1786 if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1787 ret = true;
1788 *ctid = cond_val.cond64_owner;
1789 } else {
1790 ret = false;
1791 }
1792 return ret;
1793 };
1794
1795 return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1796 }
1797
1798 /*
1799 * Name: cond_sleep_with_inheritor32
1800 *
1801 * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1802 * Allows a thread to conditionally sleep while indicating which thread should
1803 * inherit the priority push associated with the condition.
1804 * The condition should be expressed through a cond_swi_var32_s pointer.
1805 * The condition needs to be populated by the caller with the ctid of the
1806 * thread that should inherit the push. The remaining bits of the condition
1807 * can be used by the caller to implement its own synchronization logic.
1808 * A copy of the condition value observed by the caller when it decided to call
1809 * this function should be provided to prevent races with matching wakeups.
1810 * This function will atomically check the value stored in the condition against
1811 * the expected/observed one provided. If the check doesn't pass the thread will not
1812 * sleep and the function will return.
1813 * The ctid provided in the condition will be used only after a successful
1814 * check.
1815 *
1816 * Args:
1817 * Arg1: cond_swi_var32_s pointer that stores the condition to check.
1818 * Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1819 * Arg3: interruptible flag for wait.
1820 * Arg4: deadline for wait.
1821 *
1822 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1823 * wakeup for the cond is called.
1824 *
1825 * Returns: result of the wait.
1826 */
1827 wait_result_t
cond_sleep_with_inheritor32(cond_swi_var_t cond,cond_swi_var32_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1828 cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1829 {
1830 return cond_sleep_with_inheritor32_mask(cond, expected_cond, ~0u, interruptible, deadline);
1831 }
1832
1833 /*
1834 * Name: cond_sleep_with_inheritor64
1835 *
1836 * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1837 * Allows a thread to conditionally sleep while indicating which thread should
1838 * inherit the priority push associated with the condition.
1839 * The condition should be expressed through a cond_swi_var64_s pointer.
1840 * The condition needs to be populated by the caller with the ctid of the
1841 * thread that should inherit the push. The remaining bits of the condition
1842 * can be used by the caller to implement its own synchronization logic.
1843 * A copy of the condition value observed by the caller when it decided to call
1844 * this function should be provided to prevent races with matching wakeups.
1845 * This function will atomically check the value stored in the condition against
1846 * the expected/observed one provided. If the check doesn't pass the thread will not
1847 * sleep and the function will return.
1848 * The ctid provided in the condition will be used only after a successful
1849 * check.
1850 *
1851 * Args:
1852 * Arg1: cond_swi_var64_s pointer that stores the condition to check.
1853 * Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1854 * Arg3: interruptible flag for wait.
1855 * Arg4: deadline for wait.
1856 *
1857 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1858 * wakeup for the cond is called.
1859 *
1860 * Returns: result of the wait.
1861 */
1862 wait_result_t
cond_sleep_with_inheritor64(cond_swi_var_t cond,cond_swi_var64_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1863 cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1864 {
1865 return cond_sleep_with_inheritor64_mask(cond, expected_cond, ~0ull, interruptible, deadline);
1866 }
1867
1868 /*
1869 * Name: cond_wakeup_one_with_inheritor
1870 *
1871 * Description: Wake up one waiter waiting on the condition (if any).
1872 * The thread woken up will be the one with the higher sched priority waiting on the condition.
1873 * The push for the condition will be transferred from the last inheritor to the woken up thread.
1874 *
1875 * Args:
1876 * Arg1: condition to wake from.
1877 * Arg2: wait result to pass to the woken up thread.
1878 * Arg3: pointer for storing the thread wokenup.
1879 *
1880 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1881 *
1882 * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1883 * condition or a wakeup for the event is called.
1884 * A reference for the wokenup thread is acquired.
1885 * NOTE: this cannot be called from interrupt context.
1886 */
1887 kern_return_t
cond_wakeup_one_with_inheritor(cond_swi_var_t cond,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1888 cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1889 {
1890 return wakeup_with_inheritor_and_turnstile((event_t)cond,
1891 result,
1892 TRUE,
1893 action,
1894 thread_wokenup);
1895 }
1896
1897 /*
1898 * Name: cond_wakeup_all_with_inheritor
1899 *
1900 * Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1901 *
1902 * Args:
1903 * Arg1: condition to wake from.
1904 * Arg2: wait result to pass to the woken up threads.
1905 *
1906 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1907 *
1908 * Conditions: NOTE: this cannot be called from interrupt context.
1909 */
1910 kern_return_t
cond_wakeup_all_with_inheritor(cond_swi_var_t cond,wait_result_t result)1911 cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1912 {
1913 return wakeup_with_inheritor_and_turnstile((event_t)cond,
1914 result,
1915 FALSE,
1916 0,
1917 NULL);
1918 }
1919
1920
1921 #pragma mark - gates
1922
1923 #define GATE_TYPE 3
1924 #define GATE_ILOCK_BIT 0
1925 #define GATE_WAITERS_BIT 1
1926
1927 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1928 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1929
1930 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1931 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1932 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1933 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1934 #define ordered_store_gate(gate, value) os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1935
1936 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1937 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1938 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1939 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1940
1941 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1942
1943 #define GATE_EVENT(gate) ((event_t) gate)
1944 #define EVENT_TO_GATE(event) ((gate_t *) event)
1945
1946 typedef void (*void_func_void)(void);
1947
1948 __abortlike
1949 static void
gate_verify_tag_panic(gate_t * gate)1950 gate_verify_tag_panic(gate_t *gate)
1951 {
1952 panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1953 }
1954
1955 __abortlike
1956 static void
gate_verify_destroy_panic(gate_t * gate)1957 gate_verify_destroy_panic(gate_t *gate)
1958 {
1959 panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1960 }
1961
1962 static void
gate_verify(gate_t * gate)1963 gate_verify(gate_t *gate)
1964 {
1965 if (gate->gt_type != GATE_TYPE) {
1966 gate_verify_tag_panic(gate);
1967 }
1968 if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
1969 gate_verify_destroy_panic(gate);
1970 }
1971
1972 assert(gate->gt_refs > 0);
1973 }
1974
1975 __abortlike
1976 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)1977 gate_already_owned_panic(gate_t *gate, thread_t holder)
1978 {
1979 panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
1980 }
1981
1982 static kern_return_t
gate_try_close(gate_t * gate)1983 gate_try_close(gate_t *gate)
1984 {
1985 uintptr_t state;
1986 thread_t holder;
1987 kern_return_t ret;
1988 thread_t thread = current_thread();
1989
1990 gate_verify(gate);
1991
1992 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
1993 return KERN_SUCCESS;
1994 }
1995
1996 gate_ilock(gate);
1997 state = ordered_load_gate(gate);
1998 holder = GATE_STATE_TO_THREAD(state);
1999
2000 if (holder == NULL) {
2001 assert(gate_has_waiter_bit(state) == FALSE);
2002
2003 state = GATE_THREAD_TO_STATE(current_thread());
2004 state |= GATE_ILOCK;
2005 ordered_store_gate(gate, state);
2006 ret = KERN_SUCCESS;
2007 } else {
2008 if (holder == current_thread()) {
2009 gate_already_owned_panic(gate, holder);
2010 }
2011 ret = KERN_FAILURE;
2012 }
2013
2014 gate_iunlock(gate);
2015 return ret;
2016 }
2017
2018 static void
gate_close(gate_t * gate)2019 gate_close(gate_t* gate)
2020 {
2021 uintptr_t state;
2022 thread_t holder;
2023 thread_t thread = current_thread();
2024
2025 gate_verify(gate);
2026
2027 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2028 return;
2029 }
2030
2031 gate_ilock(gate);
2032 state = ordered_load_gate(gate);
2033 holder = GATE_STATE_TO_THREAD(state);
2034
2035 if (holder != NULL) {
2036 gate_already_owned_panic(gate, holder);
2037 }
2038
2039 assert(gate_has_waiter_bit(state) == FALSE);
2040
2041 state = GATE_THREAD_TO_STATE(thread);
2042 state |= GATE_ILOCK;
2043 ordered_store_gate(gate, state);
2044
2045 gate_iunlock(gate);
2046 }
2047
2048 static void
gate_open_turnstile(gate_t * gate)2049 gate_open_turnstile(gate_t *gate)
2050 {
2051 struct turnstile *ts = NULL;
2052
2053 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile,
2054 TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2055 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2056 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2057 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2058 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2059 /*
2060 * We can do the cleanup while holding the interlock.
2061 * It is ok because:
2062 * 1. current_thread is the previous inheritor and it is running
2063 * 2. new inheritor is NULL.
2064 * => No chain of turnstiles needs to be updated.
2065 */
2066 turnstile_cleanup();
2067 }
2068
2069 __abortlike
2070 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2071 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2072 {
2073 if (open) {
2074 panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2075 } else {
2076 panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2077 }
2078 }
2079
2080 static void
gate_open(gate_t * gate)2081 gate_open(gate_t *gate)
2082 {
2083 uintptr_t state;
2084 thread_t holder;
2085 bool waiters;
2086 thread_t thread = current_thread();
2087
2088 gate_verify(gate);
2089 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2090 return;
2091 }
2092
2093 gate_ilock(gate);
2094 state = ordered_load_gate(gate);
2095 holder = GATE_STATE_TO_THREAD(state);
2096 waiters = gate_has_waiter_bit(state);
2097
2098 if (holder != thread) {
2099 gate_not_owned_panic(gate, holder, true);
2100 }
2101
2102 if (waiters) {
2103 gate_open_turnstile(gate);
2104 }
2105
2106 state = GATE_ILOCK;
2107 ordered_store_gate(gate, state);
2108
2109 gate_iunlock(gate);
2110 }
2111
2112 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2113 gate_handoff_turnstile(gate_t *gate,
2114 int flags,
2115 thread_t *thread_woken_up,
2116 bool *waiters)
2117 {
2118 struct turnstile *ts = NULL;
2119 kern_return_t ret = KERN_FAILURE;
2120 thread_t hp_thread;
2121
2122 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2123 /*
2124 * Wake up the higest priority thread waiting on the gate
2125 */
2126 hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2127 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2128
2129 if (hp_thread != NULL) {
2130 /*
2131 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2132 */
2133 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2134 *thread_woken_up = hp_thread;
2135 *waiters = turnstile_has_waiters(ts);
2136 /*
2137 * Note: hp_thread is the new holder and the new inheritor.
2138 * In case there are no more waiters, it doesn't need to be the inheritor
2139 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2140 * handoff can go through the fast path.
2141 * We could set the inheritor to NULL here, or the new holder itself can set it
2142 * on its way back from the sleep. In the latter case there are more chanses that
2143 * new waiters will come by, avoiding to do the opearation at all.
2144 */
2145 ret = KERN_SUCCESS;
2146 } else {
2147 /*
2148 * waiters can have been woken up by an interrupt and still not
2149 * have updated gate->waiters, so we couldn't find them on the waitq.
2150 * Update the inheritor to NULL here, so that the current thread can return to userspace
2151 * indipendently from when the interrupted waiters will finish the wait.
2152 */
2153 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2154 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2155 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2156 }
2157 // there are no waiters.
2158 ret = KERN_NOT_WAITING;
2159 }
2160
2161 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2162
2163 /*
2164 * We can do the cleanup while holding the interlock.
2165 * It is ok because:
2166 * 1. current_thread is the previous inheritor and it is running
2167 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2168 * of the gate before trying to sleep.
2169 * => No chain of turnstiles needs to be updated.
2170 */
2171 turnstile_cleanup();
2172
2173 return ret;
2174 }
2175
2176 static kern_return_t
gate_handoff(gate_t * gate,int flags)2177 gate_handoff(gate_t *gate,
2178 int flags)
2179 {
2180 kern_return_t ret;
2181 thread_t new_holder = NULL;
2182 uintptr_t state;
2183 thread_t holder;
2184 bool waiters;
2185 thread_t thread = current_thread();
2186
2187 assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2188 gate_verify(gate);
2189
2190 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2191 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2192 //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2193 return KERN_NOT_WAITING;
2194 }
2195 }
2196
2197 gate_ilock(gate);
2198 state = ordered_load_gate(gate);
2199 holder = GATE_STATE_TO_THREAD(state);
2200 waiters = gate_has_waiter_bit(state);
2201
2202 if (holder != current_thread()) {
2203 gate_not_owned_panic(gate, holder, false);
2204 }
2205
2206 if (waiters) {
2207 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2208 if (ret == KERN_SUCCESS) {
2209 state = GATE_THREAD_TO_STATE(new_holder);
2210 if (waiters) {
2211 state |= GATE_WAITERS;
2212 }
2213 } else {
2214 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2215 state = 0;
2216 }
2217 }
2218 } else {
2219 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2220 state = 0;
2221 }
2222 ret = KERN_NOT_WAITING;
2223 }
2224 state |= GATE_ILOCK;
2225 ordered_store_gate(gate, state);
2226
2227 gate_iunlock(gate);
2228
2229 if (new_holder) {
2230 thread_deallocate(new_holder);
2231 }
2232 return ret;
2233 }
2234
2235 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2236 gate_steal_turnstile(gate_t *gate,
2237 thread_t new_inheritor)
2238 {
2239 struct turnstile *ts = NULL;
2240
2241 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2242
2243 turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2244 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2245 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2246
2247 /*
2248 * turnstile_cleanup might need to update the chain of the old holder.
2249 * This operation should happen without the turnstile interlock held.
2250 */
2251 return turnstile_cleanup;
2252 }
2253
2254 __abortlike
2255 static void
gate_not_closed_panic(gate_t * gate,bool wait)2256 gate_not_closed_panic(gate_t *gate, bool wait)
2257 {
2258 if (wait) {
2259 panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2260 } else {
2261 panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2262 }
2263 }
2264
2265 static void
gate_steal(gate_t * gate)2266 gate_steal(gate_t *gate)
2267 {
2268 uintptr_t state;
2269 thread_t holder;
2270 thread_t thread = current_thread();
2271 bool waiters;
2272
2273 void_func_void func_after_interlock_unlock;
2274
2275 gate_verify(gate);
2276
2277 gate_ilock(gate);
2278 state = ordered_load_gate(gate);
2279 holder = GATE_STATE_TO_THREAD(state);
2280 waiters = gate_has_waiter_bit(state);
2281
2282 if (holder == NULL) {
2283 gate_not_closed_panic(gate, false);
2284 }
2285
2286 state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2287 if (waiters) {
2288 state |= GATE_WAITERS;
2289 ordered_store_gate(gate, state);
2290 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2291 gate_iunlock(gate);
2292
2293 func_after_interlock_unlock();
2294 } else {
2295 ordered_store_gate(gate, state);
2296 gate_iunlock(gate);
2297 }
2298 }
2299
2300 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2301 gate_wait_turnstile(gate_t *gate,
2302 wait_interrupt_t interruptible,
2303 uint64_t deadline,
2304 thread_t holder,
2305 wait_result_t* wait,
2306 bool* waiters)
2307 {
2308 struct turnstile *ts;
2309 uintptr_t state;
2310
2311 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2312
2313 turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2314 waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2315
2316 gate_iunlock(gate);
2317
2318 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2319
2320 *wait = thread_block(THREAD_CONTINUE_NULL);
2321
2322 gate_ilock(gate);
2323
2324 *waiters = turnstile_has_waiters(ts);
2325
2326 if (!*waiters) {
2327 /*
2328 * We want to enable the fast path as soon as we see that there are no more waiters.
2329 * On the fast path the holder will not do any turnstile operations.
2330 * Set the inheritor as NULL here.
2331 *
2332 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2333 * already been set to NULL.
2334 */
2335 state = ordered_load_gate(gate);
2336 holder = GATE_STATE_TO_THREAD(state);
2337 if (holder &&
2338 ((*wait != THREAD_AWAKENED) || // thread interrupted or timedout
2339 holder == current_thread())) { // thread was woken up and it is the new holder
2340 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2341 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2342 }
2343 }
2344
2345 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2346
2347 /*
2348 * turnstile_cleanup might need to update the chain of the old holder.
2349 * This operation should happen without the turnstile primitive interlock held.
2350 */
2351 return turnstile_cleanup;
2352 }
2353
2354 static void
gate_free_internal(gate_t * gate)2355 gate_free_internal(gate_t *gate)
2356 {
2357 zfree(KT_GATE, gate);
2358 }
2359
2360 __abortlike
2361 static void
gate_too_many_refs_panic(gate_t * gate)2362 gate_too_many_refs_panic(gate_t *gate)
2363 {
2364 panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2365 }
2366
2367 static gate_wait_result_t
2368 gate_wait(gate_t* gate,
2369 wait_interrupt_t interruptible,
2370 uint64_t deadline,
2371 void (^primitive_unlock)(void),
2372 void (^primitive_lock)(void))
2373 {
2374 gate_wait_result_t ret;
2375 void_func_void func_after_interlock_unlock;
2376 wait_result_t wait_result;
2377 uintptr_t state;
2378 thread_t holder;
2379 bool waiters;
2380
2381 gate_verify(gate);
2382
2383 gate_ilock(gate);
2384 state = ordered_load_gate(gate);
2385 holder = GATE_STATE_TO_THREAD(state);
2386
2387 if (holder == NULL) {
2388 gate_not_closed_panic(gate, true);
2389 }
2390
2391 /*
2392 * Get a ref on the gate so it will not
2393 * be freed while we are coming back from the sleep.
2394 */
2395 if (gate->gt_refs == UINT16_MAX) {
2396 gate_too_many_refs_panic(gate);
2397 }
2398 gate->gt_refs++;
2399 state |= GATE_WAITERS;
2400 ordered_store_gate(gate, state);
2401
2402 /*
2403 * Release the primitive lock before any
2404 * turnstile operation. Turnstile
2405 * does not support a blocking primitive as
2406 * interlock.
2407 *
2408 * In this way, concurrent threads will be
2409 * able to acquire the primitive lock
2410 * but still will wait for me through the
2411 * gate interlock.
2412 */
2413 primitive_unlock();
2414
2415 func_after_interlock_unlock = gate_wait_turnstile( gate,
2416 interruptible,
2417 deadline,
2418 holder,
2419 &wait_result,
2420 &waiters);
2421
2422 state = ordered_load_gate(gate);
2423 holder = GATE_STATE_TO_THREAD(state);
2424
2425 switch (wait_result) {
2426 case THREAD_INTERRUPTED:
2427 case THREAD_TIMED_OUT:
2428 assert(holder != current_thread());
2429
2430 if (waiters) {
2431 state |= GATE_WAITERS;
2432 } else {
2433 state &= ~GATE_WAITERS;
2434 }
2435 ordered_store_gate(gate, state);
2436
2437 if (wait_result == THREAD_INTERRUPTED) {
2438 ret = GATE_INTERRUPTED;
2439 } else {
2440 ret = GATE_TIMED_OUT;
2441 }
2442 break;
2443 default:
2444 /*
2445 * Note it is possible that even if the gate was handed off to
2446 * me, someone called gate_steal() before I woke up.
2447 *
2448 * As well as it is possible that the gate was opened, but someone
2449 * closed it while I was waking up.
2450 *
2451 * In both cases we return GATE_OPENED, as the gate was opened to me
2452 * at one point, it is the caller responsibility to check again if
2453 * the gate is open.
2454 */
2455 if (holder == current_thread()) {
2456 ret = GATE_HANDOFF;
2457 } else {
2458 ret = GATE_OPENED;
2459 }
2460 break;
2461 }
2462
2463 assert(gate->gt_refs > 0);
2464 uint32_t ref = --gate->gt_refs;
2465 bool to_free = gate->gt_alloc;
2466 gate_iunlock(gate);
2467
2468 if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2469 if (to_free == true) {
2470 assert(!waiters);
2471 if (ref == 0) {
2472 gate_free_internal(gate);
2473 }
2474 ret = GATE_OPENED;
2475 } else {
2476 gate_verify_destroy_panic(gate);
2477 }
2478 }
2479
2480 /*
2481 * turnstile func that needs to be executed without
2482 * holding the primitive interlock
2483 */
2484 func_after_interlock_unlock();
2485
2486 primitive_lock();
2487
2488 return ret;
2489 }
2490
2491 static void
gate_assert(gate_t * gate,int flags)2492 gate_assert(gate_t *gate, int flags)
2493 {
2494 uintptr_t state;
2495 thread_t holder;
2496
2497 gate_verify(gate);
2498
2499 gate_ilock(gate);
2500 state = ordered_load_gate(gate);
2501 holder = GATE_STATE_TO_THREAD(state);
2502
2503 switch (flags) {
2504 case GATE_ASSERT_CLOSED:
2505 assert(holder != NULL);
2506 break;
2507 case GATE_ASSERT_OPEN:
2508 assert(holder == NULL);
2509 break;
2510 case GATE_ASSERT_HELD:
2511 assert(holder == current_thread());
2512 break;
2513 default:
2514 panic("invalid %s flag %d", __func__, flags);
2515 }
2516
2517 gate_iunlock(gate);
2518 }
2519
2520 enum {
2521 GT_INIT_DEFAULT = 0,
2522 GT_INIT_ALLOC
2523 };
2524
2525 static void
gate_init(gate_t * gate,uint type)2526 gate_init(gate_t *gate, uint type)
2527 {
2528 bzero(gate, sizeof(gate_t));
2529
2530 gate->gt_data = 0;
2531 gate->gt_turnstile = NULL;
2532 gate->gt_refs = 1;
2533 switch (type) {
2534 case GT_INIT_ALLOC:
2535 gate->gt_alloc = 1;
2536 break;
2537 default:
2538 gate->gt_alloc = 0;
2539 break;
2540 }
2541 gate->gt_type = GATE_TYPE;
2542 gate->gt_flags_pad = 0;
2543 }
2544
2545 static gate_t*
gate_alloc_init(void)2546 gate_alloc_init(void)
2547 {
2548 gate_t *gate;
2549 gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2550 gate_init(gate, GT_INIT_ALLOC);
2551 return gate;
2552 }
2553
2554 __abortlike
2555 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2556 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2557 {
2558 panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2559 }
2560
2561 __abortlike
2562 static void
gate_destroy_waiter_panic(gate_t * gate)2563 gate_destroy_waiter_panic(gate_t *gate)
2564 {
2565 panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2566 }
2567
2568 static uint16_t
gate_destroy_internal(gate_t * gate)2569 gate_destroy_internal(gate_t *gate)
2570 {
2571 uintptr_t state;
2572 thread_t holder;
2573 uint16_t ref;
2574
2575 gate_ilock(gate);
2576 state = ordered_load_gate(gate);
2577 holder = GATE_STATE_TO_THREAD(state);
2578
2579 /*
2580 * The gate must be open
2581 * and all the threads must
2582 * have been woken up by this time
2583 */
2584 if (holder != NULL) {
2585 gate_destroy_owned_panic(gate, holder);
2586 }
2587 if (gate_has_waiter_bit(state)) {
2588 gate_destroy_waiter_panic(gate);
2589 }
2590
2591 assert(gate->gt_refs > 0);
2592
2593 ref = --gate->gt_refs;
2594
2595 /*
2596 * Mark the gate as destroyed.
2597 * The interlock bit still need
2598 * to be available to let the
2599 * last wokenup threads to clear
2600 * the wait.
2601 */
2602 state = GATE_DESTROYED;
2603 state |= GATE_ILOCK;
2604 ordered_store_gate(gate, state);
2605 gate_iunlock(gate);
2606 return ref;
2607 }
2608
2609 __abortlike
2610 static void
gate_destroy_panic(gate_t * gate)2611 gate_destroy_panic(gate_t *gate)
2612 {
2613 panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2614 }
2615
2616 static void
gate_destroy(gate_t * gate)2617 gate_destroy(gate_t *gate)
2618 {
2619 gate_verify(gate);
2620 if (gate->gt_alloc == 1) {
2621 gate_destroy_panic(gate);
2622 }
2623 gate_destroy_internal(gate);
2624 }
2625
2626 __abortlike
2627 static void
gate_free_panic(gate_t * gate)2628 gate_free_panic(gate_t *gate)
2629 {
2630 panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2631 }
2632
2633 static void
gate_free(gate_t * gate)2634 gate_free(gate_t *gate)
2635 {
2636 uint16_t ref;
2637
2638 gate_verify(gate);
2639
2640 if (gate->gt_alloc == 0) {
2641 gate_free_panic(gate);
2642 }
2643
2644 ref = gate_destroy_internal(gate);
2645 /*
2646 * Some of the threads waiting on the gate
2647 * might still need to run after being woken up.
2648 * They will access the gate to cleanup the
2649 * state, so we cannot free it.
2650 * The last waiter will free the gate in this case.
2651 */
2652 if (ref == 0) {
2653 gate_free_internal(gate);
2654 }
2655 }
2656
2657 /*
2658 * Name: lck_rw_gate_init
2659 *
2660 * Description: initializes a variable declared with decl_lck_rw_gate_data.
2661 *
2662 * Args:
2663 * Arg1: lck_rw_t lock used to protect the gate.
2664 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2665 */
2666 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2667 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2668 {
2669 (void) lock;
2670 gate_init(gate, GT_INIT_DEFAULT);
2671 }
2672
2673 /*
2674 * Name: lck_rw_gate_alloc_init
2675 *
2676 * Description: allocates and initializes a gate_t.
2677 *
2678 * Args:
2679 * Arg1: lck_rw_t lock used to protect the gate.
2680 *
2681 * Returns:
2682 * gate_t allocated.
2683 */
2684 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2685 lck_rw_gate_alloc_init(lck_rw_t *lock)
2686 {
2687 (void) lock;
2688 return gate_alloc_init();
2689 }
2690
2691 /*
2692 * Name: lck_rw_gate_destroy
2693 *
2694 * Description: destroys a variable previously initialized
2695 * with lck_rw_gate_init().
2696 *
2697 * Args:
2698 * Arg1: lck_rw_t lock used to protect the gate.
2699 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2700 */
2701 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2702 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2703 {
2704 (void) lock;
2705 gate_destroy(gate);
2706 }
2707
2708 /*
2709 * Name: lck_rw_gate_free
2710 *
2711 * Description: destroys and tries to free a gate previously allocated
2712 * with lck_rw_gate_alloc_init().
2713 * The gate free might be delegated to the last thread returning
2714 * from the gate_wait().
2715 *
2716 * Args:
2717 * Arg1: lck_rw_t lock used to protect the gate.
2718 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2719 */
2720 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2721 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2722 {
2723 (void) lock;
2724 gate_free(gate);
2725 }
2726
2727 /*
2728 * Name: lck_rw_gate_try_close
2729 *
2730 * Description: Tries to close the gate.
2731 * In case of success the current thread will be set as
2732 * the holder of the gate.
2733 *
2734 * Args:
2735 * Arg1: lck_rw_t lock used to protect the gate.
2736 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2737 *
2738 * Conditions: Lock must be held. Returns with the lock held.
2739 *
2740 * Returns:
2741 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2742 * of the gate.
2743 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2744 * to wake up possible waiters on the gate before returning to userspace.
2745 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2746 * between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2747 *
2748 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2749 * lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2750 * The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2751 * be done without dropping the lock that is protecting the gate in between.
2752 */
2753 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2754 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2755 {
2756 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2757
2758 return gate_try_close(gate);
2759 }
2760
2761 /*
2762 * Name: lck_rw_gate_close
2763 *
2764 * Description: Closes the gate. The current thread will be set as
2765 * the holder of the gate. Will panic if the gate is already closed.
2766 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2767 * to wake up possible waiters on the gate before returning to userspace.
2768 *
2769 * Args:
2770 * Arg1: lck_rw_t lock used to protect the gate.
2771 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2772 *
2773 * Conditions: Lock must be held. Returns with the lock held.
2774 * The gate must be open.
2775 *
2776 */
2777 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2778 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2779 {
2780 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2781
2782 return gate_close(gate);
2783 }
2784
2785 /*
2786 * Name: lck_rw_gate_open
2787 *
2788 * Description: Opens the gate and wakes up possible waiters.
2789 *
2790 * Args:
2791 * Arg1: lck_rw_t lock used to protect the gate.
2792 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2793 *
2794 * Conditions: Lock must be held. Returns with the lock held.
2795 * The current thread must be the holder of the gate.
2796 *
2797 */
2798 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2799 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2800 {
2801 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2802
2803 gate_open(gate);
2804 }
2805
2806 /*
2807 * Name: lck_rw_gate_handoff
2808 *
2809 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2810 * priority will be selected as the new holder of the gate, and woken up,
2811 * with the gate remaining in the closed state throughout.
2812 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2813 * will be returned.
2814 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2815 * case no waiters were found.
2816 *
2817 *
2818 * Args:
2819 * Arg1: lck_rw_t lock used to protect the gate.
2820 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2821 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2822 *
2823 * Conditions: Lock must be held. Returns with the lock held.
2824 * The current thread must be the holder of the gate.
2825 *
2826 * Returns:
2827 * KERN_SUCCESS in case one of the waiters became the new holder.
2828 * KERN_NOT_WAITING in case there were no waiters.
2829 *
2830 */
2831 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2832 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2833 {
2834 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2835
2836 return gate_handoff(gate, flags);
2837 }
2838
2839 /*
2840 * Name: lck_rw_gate_steal
2841 *
2842 * Description: Set the current ownership of the gate. It sets the current thread as the
2843 * new holder of the gate.
2844 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2845 * to wake up possible waiters on the gate before returning to userspace.
2846 * NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2847 * anymore.
2848 *
2849 *
2850 * Args:
2851 * Arg1: lck_rw_t lock used to protect the gate.
2852 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2853 *
2854 * Conditions: Lock must be held. Returns with the lock held.
2855 * The gate must be closed and the current thread must not already be the holder.
2856 *
2857 */
2858 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2859 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2860 {
2861 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2862
2863 gate_steal(gate);
2864 }
2865
2866 /*
2867 * Name: lck_rw_gate_wait
2868 *
2869 * Description: Waits for the current thread to become the holder of the gate or for the
2870 * gate to become open. An interruptible mode and deadline can be specified
2871 * to return earlier from the wait.
2872 *
2873 * Args:
2874 * Arg1: lck_rw_t lock used to protect the gate.
2875 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2876 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2877 * Arg3: interruptible flag for wait.
2878 * Arg4: deadline
2879 *
2880 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2881 * Lock will be dropped while waiting.
2882 * The gate must be closed.
2883 *
2884 * Returns: Reason why the thread was woken up.
2885 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2886 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2887 * to wake up possible waiters on the gate before returning to userspace.
2888 * GATE_OPENED - the gate was opened by the holder.
2889 * GATE_TIMED_OUT - the thread was woken up by a timeout.
2890 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
2891 */
2892 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2893 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2894 {
2895 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2896
2897 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2898
2899 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2900 return gate_wait(gate,
2901 interruptible,
2902 deadline,
2903 ^{lck_rw_type = lck_rw_done(lock);},
2904 ^{;});
2905 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2906 return gate_wait(gate,
2907 interruptible,
2908 deadline,
2909 ^{lck_rw_type = lck_rw_done(lock);},
2910 ^{lck_rw_lock(lock, lck_rw_type);});
2911 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2912 return gate_wait(gate,
2913 interruptible,
2914 deadline,
2915 ^{lck_rw_type = lck_rw_done(lock);},
2916 ^{lck_rw_lock_exclusive(lock);});
2917 } else {
2918 return gate_wait(gate,
2919 interruptible,
2920 deadline,
2921 ^{lck_rw_type = lck_rw_done(lock);},
2922 ^{lck_rw_lock_shared(lock);});
2923 }
2924 }
2925
2926 /*
2927 * Name: lck_rw_gate_assert
2928 *
2929 * Description: asserts that the gate is in the specified state.
2930 *
2931 * Args:
2932 * Arg1: lck_rw_t lock used to protect the gate.
2933 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2934 * Arg3: flags to specified assert type.
2935 * GATE_ASSERT_CLOSED - the gate is currently closed
2936 * GATE_ASSERT_OPEN - the gate is currently opened
2937 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2938 */
2939 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2940 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2941 {
2942 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2943
2944 gate_assert(gate, flags);
2945 return;
2946 }
2947
2948 /*
2949 * Name: lck_mtx_gate_init
2950 *
2951 * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2952 *
2953 * Args:
2954 * Arg1: lck_mtx_t lock used to protect the gate.
2955 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2956 */
2957 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)2958 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2959 {
2960 (void) lock;
2961 gate_init(gate, GT_INIT_DEFAULT);
2962 }
2963
2964 /*
2965 * Name: lck_mtx_gate_alloc_init
2966 *
2967 * Description: allocates and initializes a gate_t.
2968 *
2969 * Args:
2970 * Arg1: lck_mtx_t lock used to protect the gate.
2971 *
2972 * Returns:
2973 * gate_t allocated.
2974 */
2975 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)2976 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
2977 {
2978 (void) lock;
2979 return gate_alloc_init();
2980 }
2981
2982 /*
2983 * Name: lck_mtx_gate_destroy
2984 *
2985 * Description: destroys a variable previously initialized
2986 * with lck_mtx_gate_init().
2987 *
2988 * Args:
2989 * Arg1: lck_mtx_t lock used to protect the gate.
2990 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2991 */
2992 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)2993 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
2994 {
2995 (void) lock;
2996 gate_destroy(gate);
2997 }
2998
2999 /*
3000 * Name: lck_mtx_gate_free
3001 *
3002 * Description: destroys and tries to free a gate previously allocated
3003 * with lck_mtx_gate_alloc_init().
3004 * The gate free might be delegated to the last thread returning
3005 * from the gate_wait().
3006 *
3007 * Args:
3008 * Arg1: lck_mtx_t lock used to protect the gate.
3009 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3010 */
3011 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3012 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3013 {
3014 (void) lock;
3015 gate_free(gate);
3016 }
3017
3018 /*
3019 * Name: lck_mtx_gate_try_close
3020 *
3021 * Description: Tries to close the gate.
3022 * In case of success the current thread will be set as
3023 * the holder of the gate.
3024 *
3025 * Args:
3026 * Arg1: lck_mtx_t lock used to protect the gate.
3027 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3028 *
3029 * Conditions: Lock must be held. Returns with the lock held.
3030 *
3031 * Returns:
3032 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3033 * of the gate.
3034 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3035 * to wake up possible waiters on the gate before returning to userspace.
3036 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3037 * between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3038 *
3039 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3040 * lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3041 * The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3042 * be done without dropping the lock that is protecting the gate in between.
3043 */
3044 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3045 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3046 {
3047 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3048
3049 return gate_try_close(gate);
3050 }
3051
3052 /*
3053 * Name: lck_mtx_gate_close
3054 *
3055 * Description: Closes the gate. The current thread will be set as
3056 * the holder of the gate. Will panic if the gate is already closed.
3057 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3058 * to wake up possible waiters on the gate before returning to userspace.
3059 *
3060 * Args:
3061 * Arg1: lck_mtx_t lock used to protect the gate.
3062 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3063 *
3064 * Conditions: Lock must be held. Returns with the lock held.
3065 * The gate must be open.
3066 *
3067 */
3068 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3069 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3070 {
3071 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3072
3073 return gate_close(gate);
3074 }
3075
3076 /*
3077 * Name: lck_mtx_gate_open
3078 *
3079 * Description: Opens of the gate and wakes up possible waiters.
3080 *
3081 * Args:
3082 * Arg1: lck_mtx_t lock used to protect the gate.
3083 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3084 *
3085 * Conditions: Lock must be held. Returns with the lock held.
3086 * The current thread must be the holder of the gate.
3087 *
3088 */
3089 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3090 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3091 {
3092 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3093
3094 gate_open(gate);
3095 }
3096
3097 /*
3098 * Name: lck_mtx_gate_handoff
3099 *
3100 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3101 * priority will be selected as the new holder of the gate, and woken up,
3102 * with the gate remaining in the closed state throughout.
3103 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3104 * will be returned.
3105 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3106 * case no waiters were found.
3107 *
3108 *
3109 * Args:
3110 * Arg1: lck_mtx_t lock used to protect the gate.
3111 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3112 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3113 *
3114 * Conditions: Lock must be held. Returns with the lock held.
3115 * The current thread must be the holder of the gate.
3116 *
3117 * Returns:
3118 * KERN_SUCCESS in case one of the waiters became the new holder.
3119 * KERN_NOT_WAITING in case there were no waiters.
3120 *
3121 */
3122 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3123 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3124 {
3125 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3126
3127 return gate_handoff(gate, flags);
3128 }
3129
3130 /*
3131 * Name: lck_mtx_gate_steal
3132 *
3133 * Description: Steals the ownership of the gate. It sets the current thread as the
3134 * new holder of the gate.
3135 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3136 * to wake up possible waiters on the gate before returning to userspace.
3137 * NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3138 * anymore.
3139 *
3140 *
3141 * Args:
3142 * Arg1: lck_mtx_t lock used to protect the gate.
3143 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3144 *
3145 * Conditions: Lock must be held. Returns with the lock held.
3146 * The gate must be closed and the current thread must not already be the holder.
3147 *
3148 */
3149 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3150 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3151 {
3152 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3153
3154 gate_steal(gate);
3155 }
3156
3157 /*
3158 * Name: lck_mtx_gate_wait
3159 *
3160 * Description: Waits for the current thread to become the holder of the gate or for the
3161 * gate to become open. An interruptible mode and deadline can be specified
3162 * to return earlier from the wait.
3163 *
3164 * Args:
3165 * Arg1: lck_mtx_t lock used to protect the gate.
3166 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3167 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3168 * Arg3: interruptible flag for wait.
3169 * Arg4: deadline
3170 *
3171 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3172 * Lock will be dropped while waiting.
3173 * The gate must be closed.
3174 *
3175 * Returns: Reason why the thread was woken up.
3176 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3177 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3178 * to wake up possible waiters on the gate before returning to userspace.
3179 * GATE_OPENED - the gate was opened by the holder.
3180 * GATE_TIMED_OUT - the thread was woken up by a timeout.
3181 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
3182 */
3183 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3184 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3185 {
3186 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3187
3188 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3189 return gate_wait(gate,
3190 interruptible,
3191 deadline,
3192 ^{lck_mtx_unlock(lock);},
3193 ^{;});
3194 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3195 return gate_wait(gate,
3196 interruptible,
3197 deadline,
3198 ^{lck_mtx_unlock(lock);},
3199 ^{lck_mtx_lock_spin(lock);});
3200 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3201 return gate_wait(gate,
3202 interruptible,
3203 deadline,
3204 ^{lck_mtx_unlock(lock);},
3205 ^{lck_mtx_lock_spin_always(lock);});
3206 } else {
3207 return gate_wait(gate,
3208 interruptible,
3209 deadline,
3210 ^{lck_mtx_unlock(lock);},
3211 ^{lck_mtx_lock(lock);});
3212 }
3213 }
3214
3215 /*
3216 * Name: lck_mtx_gate_assert
3217 *
3218 * Description: asserts that the gate is in the specified state.
3219 *
3220 * Args:
3221 * Arg1: lck_mtx_t lock used to protect the gate.
3222 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3223 * Arg3: flags to specified assert type.
3224 * GATE_ASSERT_CLOSED - the gate is currently closed
3225 * GATE_ASSERT_OPEN - the gate is currently opened
3226 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3227 */
3228 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3229 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3230 {
3231 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3232
3233 gate_assert(gate, flags);
3234 }
3235
3236 #pragma mark - LCK_*_DECLARE support
3237
3238 __startup_func
3239 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3240 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3241 {
3242 lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3243 }
3244
3245 __startup_func
3246 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3247 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3248 {
3249 lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3250 }
3251
3252 __startup_func
3253 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3254 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3255 {
3256 lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3257 }
3258
3259 __startup_func
3260 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3261 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3262 {
3263 simple_lock_init(sp->lck, sp->lck_init_arg);
3264 }
3265
3266 __startup_func
3267 void
lck_ticket_startup_init(struct lck_ticket_startup_spec * sp)3268 lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3269 {
3270 lck_ticket_init(sp->lck, sp->lck_grp);
3271 }
3272