1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #define LOCK_PRIVATE 1
58
59 #include <mach_ldebug.h>
60 #include <debug.h>
61
62 #include <mach/kern_return.h>
63
64 #include <kern/locks_internal.h>
65 #include <kern/lock_stat.h>
66 #include <kern/locks.h>
67 #include <kern/misc_protos.h>
68 #include <kern/zalloc.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/sched_prim.h>
72 #include <kern/debug.h>
73 #include <libkern/section_keywords.h>
74 #if defined(__x86_64__)
75 #include <i386/tsc.h>
76 #include <i386/machine_routines.h>
77 #endif
78 #include <machine/atomic.h>
79 #include <machine/machine_cpu.h>
80 #include <string.h>
81 #include <vm/pmap.h>
82
83 #include <sys/kdebug.h>
84
85 #define LCK_MTX_SLEEP_CODE 0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
87 #define LCK_MTX_LCK_WAIT_CODE 2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
89
90 // Panic in tests that check lock usage correctness
91 // These are undesirable when in a panic or a debugger is runnning.
92 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93
94 #if MACH_LDEBUG
95 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96 #else
97 #define ALIGN_TEST(p, t) do{}while(0)
98 #endif
99
100 #define NOINLINE __attribute__((noinline))
101
102 #define ordered_load_hw(lock) os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103 #define ordered_store_hw(lock, value) os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104
105 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106
107 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109
110 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111
112 struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113
114 #if CONFIG_PV_TICKET
115 SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; /* used by waitq.py */
116 #endif
117
118 #if DEBUG
119 TUNABLE(uint32_t, LcksOpts, "lcks", enaLkDeb);
120 #else
121 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
122 #endif
123
124 #if CONFIG_DTRACE
125 #if defined (__x86_64__)
126 machine_timeout_t dtrace_spin_threshold = 500; // 500ns
127 #elif defined(__arm64__)
128 MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129 0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130 #endif
131 #endif
132
133 struct lck_mcs PERCPU_DATA(lck_mcs);
134
135 __kdebug_only
136 uintptr_t
unslide_for_kdebug(const void * object)137 unslide_for_kdebug(const void* object)
138 {
139 if (__improbable(kdebug_enable)) {
140 return VM_KERNEL_UNSLIDE_OR_PERM(object);
141 } else {
142 return 0;
143 }
144 }
145
146 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)147 __lck_require_preemption_disabled_panic(void *lock)
148 {
149 panic("Attempt to take no-preempt lock %p in preemptible context", lock);
150 }
151
152 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)153 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
154 {
155 if (__improbable(!lock_preemption_disabled_for_thread(self))) {
156 __lck_require_preemption_disabled_panic(lock);
157 }
158 }
159
160 #pragma mark - HW Spin policies
161
162 /*
163 * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
164 */
165 __attribute__((always_inline))
166 hw_spin_timeout_t
hw_spin_compute_timeout(hw_spin_policy_t pol)167 hw_spin_compute_timeout(hw_spin_policy_t pol)
168 {
169 hw_spin_timeout_t ret = {
170 .hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
171 };
172
173 ret.hwst_timeout <<= pol->hwsp_timeout_shift;
174 #if SCHED_HYGIENE_DEBUG
175 ret.hwst_in_ppl = pmap_in_ppl();
176 /* Note we can't check if we are interruptible if in ppl */
177 ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
178 #endif /* SCHED_HYGIENE_DEBUG */
179
180 #if SCHED_HYGIENE_DEBUG
181 #ifndef KASAN
182 if (ret.hwst_timeout > 0 &&
183 !ret.hwst_in_ppl &&
184 !ret.hwst_interruptible &&
185 interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
186 uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
187
188 #if defined(__x86_64__)
189 int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
190 #endif
191 if (int_timeout < ret.hwst_timeout) {
192 ret.hwst_timeout = int_timeout;
193 }
194 }
195 #endif /* !KASAN */
196 #endif /* SCHED_HYGIENE_DEBUG */
197
198 return ret;
199 }
200
201 __attribute__((always_inline))
202 bool
hw_spin_in_ppl(hw_spin_timeout_t to)203 hw_spin_in_ppl(hw_spin_timeout_t to)
204 {
205 #if SCHED_HYGIENE_DEBUG
206 return to.hwst_in_ppl;
207 #else
208 (void)to;
209 return pmap_in_ppl();
210 #endif
211 }
212
213 bool
hw_spin_should_keep_spinning(void * lock,hw_spin_policy_t pol,hw_spin_timeout_t to,hw_spin_state_t * state)214 hw_spin_should_keep_spinning(
215 void *lock,
216 hw_spin_policy_t pol,
217 hw_spin_timeout_t to,
218 hw_spin_state_t *state)
219 {
220 hw_spin_timeout_status_t rc;
221 #if SCHED_HYGIENE_DEBUG
222 uint64_t irq_time = 0;
223 #endif
224 uint64_t now;
225
226 if (__improbable(to.hwst_timeout == 0)) {
227 return true;
228 }
229
230 now = ml_get_timebase();
231 if (__probable(now < state->hwss_deadline)) {
232 /* keep spinning */
233 return true;
234 }
235
236 #if SCHED_HYGIENE_DEBUG
237 if (to.hwst_interruptible) {
238 irq_time = current_thread()->machine.int_time_mt;
239 }
240 #endif /* SCHED_HYGIENE_DEBUG */
241
242 if (__probable(state->hwss_deadline == 0)) {
243 state->hwss_start = now;
244 state->hwss_deadline = now + to.hwst_timeout;
245 #if SCHED_HYGIENE_DEBUG
246 state->hwss_irq_start = irq_time;
247 #endif
248 return true;
249 }
250
251 /*
252 * Update fields that the callback needs
253 */
254 state->hwss_now = now;
255 #if SCHED_HYGIENE_DEBUG
256 state->hwss_irq_end = irq_time;
257 #endif /* SCHED_HYGIENE_DEBUG */
258
259 rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
260 to, *state);
261 if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
262 /* push the deadline */
263 state->hwss_deadline += to.hwst_timeout;
264 }
265 return rc == HW_LOCK_TIMEOUT_CONTINUE;
266 }
267
268 __attribute__((always_inline))
269 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)270 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
271 {
272 #if DEBUG || DEVELOPMENT
273 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
274 #else
275 (void)owner;
276 #endif
277 }
278
279 __attribute__((always_inline))
280 void
lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)281 lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
282 {
283 #if DEBUG || DEVELOPMENT
284 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
285 (uintptr_t)ctid_get_thread_unsafe(ctid);
286 #else
287 (void)ctid;
288 #endif
289 }
290
291 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)292 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
293 {
294 lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
295
296 if (owner < (1u << CTID_SIZE_BIT)) {
297 owner = (uintptr_t)ctid_get_thread_unsafe((uint32_t)owner);
298 } else {
299 /* strip possible bits used by the lock implementations */
300 owner &= ~0x7ul;
301 }
302
303 lsti->lock = lck;
304 lsti->owner_thread_cur = owner;
305 lsti->owner_cpu = ~0u;
306 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
307
308 if (owner == 0) {
309 /* if the owner isn't known, just bail */
310 goto out;
311 }
312
313 for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
314 cpu_data_t *data = cpu_datap(i);
315 if (data && (uintptr_t)data->cpu_active_thread == owner) {
316 lsti->owner_cpu = i;
317 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
318 #if __x86_64__
319 if ((uint32_t)cpu_number() != i) {
320 /* Cause NMI and panic on the owner's cpu */
321 NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
322 }
323 #endif
324 break;
325 }
326 }
327
328 out:
329 return lsti;
330 }
331
332 #pragma mark - HW locks
333
334 /*
335 * Routine: hw_lock_init
336 *
337 * Initialize a hardware lock.
338 */
339 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)340 hw_lock_init(hw_lock_t lock)
341 {
342 ordered_store_hw(lock, 0);
343 }
344
345 __result_use_check
346 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)347 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
348 {
349 #if OS_ATOMIC_USE_LLSC
350 uintptr_t oldval;
351 os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
352 if (oldval != 0) {
353 wait_for_event(); // clears the monitor so we don't need give_up()
354 return false;
355 }
356 });
357 return true;
358 #else // !OS_ATOMIC_USE_LLSC
359 #if OS_ATOMIC_HAS_LLSC
360 uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
361 if (oldval != 0) {
362 wait_for_event(); // clears the monitor so we don't need give_up()
363 return false;
364 }
365 #endif
366 return lock_cmpxchg(&lock->lock_data, 0, newval, acquire);
367 #endif // !OS_ATOMIC_USE_LLSC
368 }
369
370 __result_use_check
371 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)372 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
373 {
374 uint32_t mask = 1u << bit;
375
376 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
377 uint32_t oldval, newval;
378 os_atomic_rmw_loop(target, oldval, newval, acquire, {
379 newval = oldval | mask;
380 if (__improbable(oldval & mask)) {
381 #if OS_ATOMIC_HAS_LLSC
382 if (wait) {
383 wait_for_event(); // clears the monitor so we don't need give_up()
384 } else {
385 os_atomic_clear_exclusive();
386 }
387 #else
388 if (wait) {
389 cpu_pause();
390 }
391 #endif
392 return false;
393 }
394 });
395 return true;
396 #else
397 uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
398 if (__improbable(oldval & mask)) {
399 if (wait) {
400 wait_for_event(); // clears the monitor so we don't need give_up()
401 } else {
402 os_atomic_clear_exclusive();
403 }
404 return false;
405 }
406 return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
407 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
408 }
409
410 static hw_spin_timeout_status_t
hw_spin_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)411 hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
412 {
413 hw_lock_t lock = _lock;
414 uintptr_t owner = lock->lock_data & ~0x7ul;
415 lck_spinlock_to_info_t lsti;
416
417 if (!spinlock_timeout_panic) {
418 /* keep spinning rather than panicing */
419 return HW_LOCK_TIMEOUT_CONTINUE;
420 }
421
422 if (pmap_in_ppl()) {
423 /*
424 * This code is used by the PPL and can't write to globals.
425 */
426 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
427 "current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
428 lock, HW_SPIN_TIMEOUT_ARG(to, st),
429 (void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
430 }
431
432 // Capture the actual time spent blocked, which may be higher than the timeout
433 // if a misbehaving interrupt stole this thread's CPU time.
434 lsti = lck_spinlock_timeout_hit(lock, owner);
435 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
436 "current owner: %p (on cpu %d), "
437 #if DEBUG || DEVELOPMENT
438 "initial owner: %p, "
439 #endif /* DEBUG || DEVELOPMENT */
440 HW_SPIN_TIMEOUT_DETAILS_FMT,
441 lock, HW_SPIN_TIMEOUT_ARG(to, st),
442 (void *)lsti->owner_thread_cur, lsti->owner_cpu,
443 #if DEBUG || DEVELOPMENT
444 (void *)lsti->owner_thread_orig,
445 #endif /* DEBUG || DEVELOPMENT */
446 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
447 }
448
449 const struct hw_spin_policy hw_lock_spin_policy = {
450 .hwsp_name = "hw_lock_t",
451 .hwsp_timeout_atomic = &lock_panic_timeout,
452 .hwsp_op_timeout = hw_spin_timeout_panic,
453 };
454
455 static hw_spin_timeout_status_t
hw_spin_always_return(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)456 hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
457 {
458 #pragma unused(_lock, to, st)
459 return HW_LOCK_TIMEOUT_RETURN;
460 }
461
462 const struct hw_spin_policy hw_lock_spin_panic_policy = {
463 .hwsp_name = "hw_lock_t[panic]",
464 #if defined(__x86_64__)
465 .hwsp_timeout = &LockTimeOutTSC,
466 #else
467 .hwsp_timeout_atomic = &LockTimeOut,
468 #endif
469 .hwsp_timeout_shift = 2,
470 .hwsp_op_timeout = hw_spin_always_return,
471 };
472
473 #if DEBUG || DEVELOPMENT
474 static machine_timeout_t hw_lock_test_to;
475 const struct hw_spin_policy hw_lock_test_give_up_policy = {
476 .hwsp_name = "testing policy",
477 #if defined(__x86_64__)
478 .hwsp_timeout = &LockTimeOutTSC,
479 #else
480 .hwsp_timeout_atomic = &LockTimeOut,
481 #endif
482 .hwsp_timeout_shift = 2,
483 .hwsp_op_timeout = hw_spin_always_return,
484 };
485
486 __startup_func
487 static void
hw_lock_test_to_init(void)488 hw_lock_test_to_init(void)
489 {
490 uint64_t timeout;
491
492 nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &timeout);
493 #if defined(__x86_64__)
494 timeout = tmrCvt(timeout, tscFCvtn2t);
495 #endif
496 os_atomic_init(&hw_lock_test_to, timeout);
497 }
498 STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
499 #endif
500
501 static hw_spin_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)502 hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
503 {
504 hw_lock_bit_t *lock = _lock;
505
506 if (!spinlock_timeout_panic) {
507 /* keep spinning rather than panicing */
508 return HW_LOCK_TIMEOUT_CONTINUE;
509 }
510
511 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
512 "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
513 lock, HW_SPIN_TIMEOUT_ARG(to, st),
514 *lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
515 }
516
517 static const struct hw_spin_policy hw_lock_bit_policy = {
518 .hwsp_name = "hw_lock_bit_t",
519 .hwsp_timeout_atomic = &lock_panic_timeout,
520 .hwsp_op_timeout = hw_lock_bit_timeout_panic,
521 };
522
523 #if __arm64__
524 const uint64_t hw_lock_bit_timeout_2s = 0x3000000;
525 const struct hw_spin_policy hw_lock_bit_policy_2s = {
526 .hwsp_name = "hw_lock_bit_t",
527 .hwsp_timeout = &hw_lock_bit_timeout_2s,
528 .hwsp_op_timeout = hw_lock_bit_timeout_panic,
529 };
530 #endif
531
532 /*
533 * Routine: hw_lock_lock_contended
534 *
535 * Spin until lock is acquired or timeout expires.
536 * timeout is in mach_absolute_time ticks. Called with
537 * preemption disabled.
538 */
539 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,uintptr_t data,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))540 hw_lock_lock_contended(
541 hw_lock_t lock,
542 uintptr_t data,
543 hw_spin_policy_t pol
544 LCK_GRP_ARG(lck_grp_t *grp))
545 {
546 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
547 hw_spin_state_t state = { };
548 hw_lock_status_t rc = HW_LOCK_CONTENDED;
549
550 if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
551 HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
552 panic("hwlock: thread %p is trying to lock %p recursively",
553 HW_LOCK_STATE_TO_THREAD(data), lock);
554 }
555
556 #if CONFIG_DTRACE || LOCK_STATS
557 uint64_t begin = 0;
558 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
559
560 if (__improbable(stat_enabled)) {
561 begin = mach_absolute_time();
562 }
563 #endif /* CONFIG_DTRACE || LOCK_STATS */
564
565 if (!hw_spin_in_ppl(to)) {
566 /*
567 * This code is used by the PPL and can't write to globals.
568 */
569 lck_spinlock_timeout_set_orig_owner(lock->lock_data);
570 }
571
572 do {
573 for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
574 cpu_pause();
575 if (hw_lock_trylock_contended(lock, data)) {
576 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
577 rc = HW_LOCK_ACQUIRED;
578 goto end;
579 }
580 }
581 } while (hw_spin_should_keep_spinning(lock, pol, to, &state));
582
583 end:
584 #if CONFIG_DTRACE || LOCK_STATS
585 if (__improbable(stat_enabled)) {
586 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
587 mach_absolute_time() - begin);
588 }
589 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
590 #endif /* CONFIG_DTRACE || LOCK_STATS */
591 return rc;
592 }
593
594 static hw_spin_timeout_status_t
hw_wait_while_equals32_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)595 hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
596 {
597 uint32_t *address = _lock;
598
599 if (!spinlock_timeout_panic) {
600 /* keep spinning rather than panicing */
601 return HW_LOCK_TIMEOUT_CONTINUE;
602 }
603
604 panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
605 "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
606 address, HW_SPIN_TIMEOUT_ARG(to, st),
607 *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
608 }
609
610 static const struct hw_spin_policy hw_wait_while_equals32_policy = {
611 .hwsp_name = "hw_wait_while_equals32",
612 .hwsp_timeout_atomic = &lock_panic_timeout,
613 .hwsp_op_timeout = hw_wait_while_equals32_panic,
614 };
615
616 static hw_spin_timeout_status_t
hw_wait_while_equals64_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)617 hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
618 {
619 uint64_t *address = _lock;
620
621 if (!spinlock_timeout_panic) {
622 /* keep spinning rather than panicing */
623 return HW_LOCK_TIMEOUT_CONTINUE;
624 }
625
626 panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
627 "current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
628 address, HW_SPIN_TIMEOUT_ARG(to, st),
629 *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
630 }
631
632 static const struct hw_spin_policy hw_wait_while_equals64_policy = {
633 .hwsp_name = "hw_wait_while_equals64",
634 .hwsp_timeout_atomic = &lock_panic_timeout,
635 .hwsp_op_timeout = hw_wait_while_equals64_panic,
636 };
637
638 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)639 hw_wait_while_equals32(uint32_t *address, uint32_t current)
640 {
641 hw_spin_policy_t pol = &hw_wait_while_equals32_policy;
642 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
643 hw_spin_state_t state = { };
644 uint32_t v;
645
646 while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
647 hw_spin_should_keep_spinning(address, pol, to, &state);
648 }
649
650 return v;
651 }
652
653 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)654 hw_wait_while_equals64(uint64_t *address, uint64_t current)
655 {
656 hw_spin_policy_t pol = &hw_wait_while_equals64_policy;
657 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
658 hw_spin_state_t state = { };
659 uint64_t v;
660
661 while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
662 hw_spin_should_keep_spinning(address, pol, to, &state);
663 }
664
665 return v;
666 }
667
668 __result_use_check
669 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))670 hw_lock_to_internal(
671 hw_lock_t lock,
672 thread_t thread,
673 hw_spin_policy_t pol
674 LCK_GRP_ARG(lck_grp_t *grp))
675 {
676 uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
677
678 if (__probable(hw_lock_trylock_contended(lock, state))) {
679 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
680 return HW_LOCK_ACQUIRED;
681 }
682
683 return hw_lock_lock_contended(lock, state, pol LCK_GRP_ARG(grp));
684 }
685
686 /*
687 * Routine: hw_lock_lock
688 *
689 * Acquire lock, spinning until it becomes available,
690 * return with preemption disabled.
691 */
692 void
693 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
694 {
695 thread_t thread = current_thread();
696 lock_disable_preemption_for_thread(thread);
697 (void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
698 LCK_GRP_ARG(grp));
699 }
700
701 /*
702 * Routine: hw_lock_lock_nopreempt
703 *
704 * Acquire lock, spinning until it becomes available.
705 */
706 void
707 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
708 {
709 thread_t thread = current_thread();
710 __lck_require_preemption_disabled(lock, thread);
711 (void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
712 LCK_GRP_ARG(grp));
713 }
714
715 /*
716 * Routine: hw_lock_to
717 *
718 * Acquire lock, spinning until it becomes available or timeout.
719 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
720 * preemption disabled.
721 */
722 unsigned
723 int
724 (hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
725 {
726 thread_t thread = current_thread();
727 lock_disable_preemption_for_thread(thread);
728 return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
729 }
730
731 /*
732 * Routine: hw_lock_to_nopreempt
733 *
734 * Acquire lock, spinning until it becomes available or timeout.
735 * Timeout is in mach_absolute_time ticks, called and return with
736 * preemption disabled.
737 */
738 unsigned
739 int
740 (hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
741 {
742 thread_t thread = current_thread();
743 __lck_require_preemption_disabled(lock, thread);
744 return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
745 }
746
747 __result_use_check
748 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))749 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
750 {
751 if (__probable(lock_cmpxchg(&lock->lock_data, 0,
752 HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
753 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
754 return true;
755 }
756 return false;
757 }
758
759 /*
760 * Routine: hw_lock_try
761 *
762 * returns with preemption disabled on success.
763 */
764 unsigned
765 int
766 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
767 {
768 thread_t thread = current_thread();
769 lock_disable_preemption_for_thread(thread);
770 unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
771 if (!success) {
772 lock_enable_preemption();
773 }
774 return success;
775 }
776
777 unsigned
778 int
779 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
780 {
781 thread_t thread = current_thread();
782 __lck_require_preemption_disabled(lock, thread);
783 return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
784 }
785
786 #if DEBUG || DEVELOPMENT
787 __abortlike
788 static void
__hw_lock_unlock_unowned_panic(hw_lock_t lock)789 __hw_lock_unlock_unowned_panic(hw_lock_t lock)
790 {
791 panic("hwlock: thread %p is trying to lock %p recursively",
792 current_thread(), lock);
793 }
794 #endif /* DEBUG || DEVELOPMENT */
795
796 /*
797 * Routine: hw_lock_unlock
798 *
799 * Unconditionally release lock, release preemption level.
800 */
801 static inline void
hw_lock_unlock_internal(hw_lock_t lock)802 hw_lock_unlock_internal(hw_lock_t lock)
803 {
804 #if DEBUG || DEVELOPMENT
805 if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
806 LOCK_CORRECTNESS_PANIC()) {
807 __hw_lock_unlock_unowned_panic(lock);
808 }
809 #endif /* DEBUG || DEVELOPMENT */
810
811 os_atomic_store(&lock->lock_data, 0, release);
812 #if CONFIG_DTRACE
813 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
814 #endif /* CONFIG_DTRACE */
815 }
816
817 void
818 (hw_lock_unlock)(hw_lock_t lock)
819 {
820 hw_lock_unlock_internal(lock);
821 lock_enable_preemption();
822 }
823
824 void
825 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
826 {
827 hw_lock_unlock_internal(lock);
828 }
829
830 void
hw_lock_assert(__assert_only hw_lock_t lock,__assert_only unsigned int type)831 hw_lock_assert(__assert_only hw_lock_t lock, __assert_only unsigned int type)
832 {
833 #if MACH_ASSERT
834 thread_t thread, holder;
835
836 holder = HW_LOCK_STATE_TO_THREAD(lock->lock_data);
837 thread = current_thread();
838
839 if (type == LCK_ASSERT_OWNED) {
840 if (holder == 0) {
841 panic("Lock not owned %p = %p", lock, holder);
842 }
843 if (holder != thread) {
844 panic("Lock not owned by current thread %p = %p", lock, holder);
845 }
846 } else if (type == LCK_ASSERT_NOTOWNED) {
847 if (holder != THREAD_NULL && holder == thread) {
848 panic("Lock owned by current thread %p = %p", lock, holder);
849 }
850 } else {
851 panic("hw_lock_assert(): invalid arg (%u)", type);
852 }
853 #endif /* MACH_ASSERT */
854 }
855
856 /*
857 * Routine hw_lock_held, doesn't change preemption state.
858 * N.B. Racy, of course.
859 */
860 unsigned int
hw_lock_held(hw_lock_t lock)861 hw_lock_held(hw_lock_t lock)
862 {
863 return ordered_load_hw(lock) != 0;
864 }
865
866 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))867 hw_lock_bit_to_contended(
868 hw_lock_bit_t *lock,
869 uint32_t bit,
870 hw_spin_policy_t pol
871 LCK_GRP_ARG(lck_grp_t *grp))
872 {
873 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
874 hw_spin_state_t state = { };
875 hw_lock_status_t rc = HW_LOCK_CONTENDED;
876
877 #if CONFIG_DTRACE || LOCK_STATS
878 uint64_t begin = 0;
879 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
880
881 if (__improbable(stat_enabled)) {
882 begin = mach_absolute_time();
883 }
884 #endif /* LOCK_STATS || CONFIG_DTRACE */
885
886 do {
887 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
888 rc = hw_lock_trylock_bit(lock, bit, true);
889
890 if (rc == HW_LOCK_ACQUIRED) {
891 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
892 goto end;
893 }
894 }
895
896 assert(rc == HW_LOCK_CONTENDED);
897 } while (hw_spin_should_keep_spinning(lock, pol, to, &state));
898
899 end:
900 #if CONFIG_DTRACE || LOCK_STATS
901 if (__improbable(stat_enabled)) {
902 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
903 mach_absolute_time() - begin);
904 }
905 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
906 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
907 return rc;
908 }
909
910 __result_use_check
911 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))912 hw_lock_bit_to_internal(
913 hw_lock_bit_t *lock,
914 unsigned int bit,
915 hw_spin_policy_t pol
916 LCK_GRP_ARG(lck_grp_t *grp))
917 {
918 if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
919 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
920 return HW_LOCK_ACQUIRED;
921 }
922
923 return (unsigned)hw_lock_bit_to_contended(lock, bit, pol LCK_GRP_ARG(grp));
924 }
925
926 /*
927 * Routine: hw_lock_bit_to
928 *
929 * Acquire bit lock, spinning until it becomes available or timeout.
930 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
931 * preemption disabled.
932 */
933 unsigned
934 int
935 (hw_lock_bit_to)(
936 hw_lock_bit_t * lock,
937 uint32_t bit,
938 hw_spin_policy_t pol
939 LCK_GRP_ARG(lck_grp_t *grp))
940 {
941 _disable_preemption();
942 return hw_lock_bit_to_internal(lock, bit, pol LCK_GRP_ARG(grp));
943 }
944
945 /*
946 * Routine: hw_lock_bit
947 *
948 * Acquire bit lock, spinning until it becomes available,
949 * return with preemption disabled.
950 */
951 void
952 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
953 {
954 _disable_preemption();
955 (void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
956 }
957
958 /*
959 * Routine: hw_lock_bit_nopreempt
960 *
961 * Acquire bit lock, spinning until it becomes available.
962 */
963 void
964 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
965 {
966 __lck_require_preemption_disabled(lock, current_thread());
967 (void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
968 }
969
970
971 unsigned
972 int
973 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
974 {
975 boolean_t success = false;
976
977 _disable_preemption();
978 success = hw_lock_trylock_bit(lock, bit, false);
979 if (!success) {
980 lock_enable_preemption();
981 }
982
983 if (success) {
984 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
985 }
986
987 return success;
988 }
989
990 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)991 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
992 {
993 os_atomic_andnot(lock, 1u << bit, release);
994 #if CONFIG_DTRACE
995 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
996 #endif
997 }
998
999 /*
1000 * Routine: hw_unlock_bit
1001 *
1002 * Release spin-lock. The second parameter is the bit number to test and set.
1003 * Decrement the preemption level.
1004 */
1005 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)1006 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1007 {
1008 hw_unlock_bit_internal(lock, bit);
1009 lock_enable_preemption();
1010 }
1011
1012 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)1013 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1014 {
1015 __lck_require_preemption_disabled(lock, current_thread());
1016 hw_unlock_bit_internal(lock, bit);
1017 }
1018
1019
1020 #pragma mark - lck_*_sleep
1021
1022 /*
1023 * Routine: lck_spin_sleep
1024 */
1025 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1026 lck_spin_sleep_grp(
1027 lck_spin_t *lck,
1028 lck_sleep_action_t lck_sleep_action,
1029 event_t event,
1030 wait_interrupt_t interruptible,
1031 lck_grp_t *grp)
1032 {
1033 wait_result_t res;
1034
1035 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1036 panic("Invalid lock sleep action %x", lck_sleep_action);
1037 }
1038
1039 res = assert_wait(event, interruptible);
1040 if (res == THREAD_WAITING) {
1041 lck_spin_unlock(lck);
1042 res = thread_block(THREAD_CONTINUE_NULL);
1043 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1044 lck_spin_lock_grp(lck, grp);
1045 }
1046 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1047 lck_spin_unlock(lck);
1048 }
1049
1050 return res;
1051 }
1052
1053 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1054 lck_spin_sleep(
1055 lck_spin_t *lck,
1056 lck_sleep_action_t lck_sleep_action,
1057 event_t event,
1058 wait_interrupt_t interruptible)
1059 {
1060 return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1061 }
1062
1063 /*
1064 * Routine: lck_spin_sleep_deadline
1065 */
1066 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1067 lck_spin_sleep_deadline(
1068 lck_spin_t *lck,
1069 lck_sleep_action_t lck_sleep_action,
1070 event_t event,
1071 wait_interrupt_t interruptible,
1072 uint64_t deadline)
1073 {
1074 wait_result_t res;
1075
1076 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1077 panic("Invalid lock sleep action %x", lck_sleep_action);
1078 }
1079
1080 res = assert_wait_deadline(event, interruptible, deadline);
1081 if (res == THREAD_WAITING) {
1082 lck_spin_unlock(lck);
1083 res = thread_block(THREAD_CONTINUE_NULL);
1084 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1085 lck_spin_lock(lck);
1086 }
1087 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1088 lck_spin_unlock(lck);
1089 }
1090
1091 return res;
1092 }
1093
1094 /*
1095 * Routine: lck_mtx_sleep
1096 */
1097 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1098 lck_mtx_sleep(
1099 lck_mtx_t *lck,
1100 lck_sleep_action_t lck_sleep_action,
1101 event_t event,
1102 wait_interrupt_t interruptible)
1103 {
1104 wait_result_t res;
1105 thread_pri_floor_t token;
1106
1107 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1108 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1109
1110 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1111 panic("Invalid lock sleep action %x", lck_sleep_action);
1112 }
1113
1114 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1115 /*
1116 * We get a priority floor
1117 * during the time that this thread is asleep, so that when it
1118 * is re-awakened (and not yet contending on the mutex), it is
1119 * runnable at a reasonably high priority.
1120 */
1121 token = thread_priority_floor_start();
1122 }
1123
1124 res = assert_wait(event, interruptible);
1125 if (res == THREAD_WAITING) {
1126 lck_mtx_unlock(lck);
1127 res = thread_block(THREAD_CONTINUE_NULL);
1128 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1129 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1130 lck_mtx_lock_spin(lck);
1131 } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1132 lck_mtx_lock_spin_always(lck);
1133 } else {
1134 lck_mtx_lock(lck);
1135 }
1136 }
1137 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1138 lck_mtx_unlock(lck);
1139 }
1140
1141 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1142 thread_priority_floor_end(&token);
1143 }
1144
1145 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1146
1147 return res;
1148 }
1149
1150
1151 /*
1152 * Routine: lck_mtx_sleep_deadline
1153 */
1154 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1155 lck_mtx_sleep_deadline(
1156 lck_mtx_t *lck,
1157 lck_sleep_action_t lck_sleep_action,
1158 event_t event,
1159 wait_interrupt_t interruptible,
1160 uint64_t deadline)
1161 {
1162 wait_result_t res;
1163 thread_pri_floor_t token;
1164
1165 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1166 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1167
1168 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1169 panic("Invalid lock sleep action %x", lck_sleep_action);
1170 }
1171
1172 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1173 /*
1174 * See lck_mtx_sleep().
1175 */
1176 token = thread_priority_floor_start();
1177 }
1178
1179 res = assert_wait_deadline(event, interruptible, deadline);
1180 if (res == THREAD_WAITING) {
1181 lck_mtx_unlock(lck);
1182 res = thread_block(THREAD_CONTINUE_NULL);
1183 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1184 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1185 lck_mtx_lock_spin(lck);
1186 } else {
1187 lck_mtx_lock(lck);
1188 }
1189 }
1190 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1191 lck_mtx_unlock(lck);
1192 }
1193
1194 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1195 thread_priority_floor_end(&token);
1196 }
1197
1198 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1199
1200 return res;
1201 }
1202
1203 /*
1204 * sleep_with_inheritor and wakeup_with_inheritor KPI
1205 *
1206 * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1207 * the latest thread specified as inheritor.
1208 *
1209 * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1210 * direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1211 * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1212 *
1213 * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1214 *
1215 * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1216 * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1217 * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1218 * invoking any turnstile operation.
1219 *
1220 * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1221 * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1222 * is instantiated for this KPI to manage the hash without interrupt disabled.
1223 * Also:
1224 * - all events on the system that hash on the same bucket will contend on the same spinlock.
1225 * - every event will have a dedicated wait_queue.
1226 *
1227 * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1228 * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1229 */
1230
1231 static kern_return_t
wakeup_with_inheritor_and_turnstile(event_t event,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1232 wakeup_with_inheritor_and_turnstile(
1233 event_t event,
1234 wait_result_t result,
1235 bool wake_one,
1236 lck_wake_action_t action,
1237 thread_t *thread_wokenup)
1238 {
1239 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1240 uint32_t index;
1241 struct turnstile *ts = NULL;
1242 kern_return_t ret = KERN_NOT_WAITING;
1243 thread_t wokeup;
1244
1245 /*
1246 * the hash bucket spinlock is used as turnstile interlock
1247 */
1248 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1249
1250 ts = turnstile_prepare_hash((uintptr_t)event, type);
1251
1252 if (wake_one) {
1253 waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1254
1255 if (action == LCK_WAKE_DEFAULT) {
1256 flags = WAITQ_UPDATE_INHERITOR;
1257 } else {
1258 assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1259 }
1260
1261 /*
1262 * WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1263 * if it finds a thread
1264 */
1265 wokeup = waitq_wakeup64_identify(&ts->ts_waitq,
1266 CAST_EVENT64_T(event), result, flags);
1267 if (wokeup != NULL) {
1268 if (thread_wokenup != NULL) {
1269 *thread_wokenup = wokeup;
1270 } else {
1271 thread_deallocate_safe(wokeup);
1272 }
1273 ret = KERN_SUCCESS;
1274 if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1275 goto complete;
1276 }
1277 } else {
1278 if (thread_wokenup != NULL) {
1279 *thread_wokenup = NULL;
1280 }
1281 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1282 ret = KERN_NOT_WAITING;
1283 }
1284 } else {
1285 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event),
1286 result, WAITQ_UPDATE_INHERITOR);
1287 }
1288
1289 /*
1290 * turnstile_update_inheritor_complete could be called while holding the interlock.
1291 * In this case the new inheritor or is null, or is a thread that is just been woken up
1292 * and have not blocked because it is racing with the same interlock used here
1293 * after the wait.
1294 * So there is no chain to update for the new inheritor.
1295 *
1296 * However unless the current thread is the old inheritor,
1297 * old inheritor can be blocked and requires a chain update.
1298 *
1299 * The chain should be short because kernel turnstiles cannot have user turnstiles
1300 * chained after them.
1301 *
1302 * We can anyway optimize this by asking turnstile to tell us
1303 * if old inheritor needs an update and drop the lock
1304 * just in that case.
1305 */
1306 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1307
1308 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1309
1310 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1311
1312 complete:
1313 turnstile_complete_hash((uintptr_t)event, type);
1314
1315 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1316
1317 turnstile_cleanup();
1318
1319 return ret;
1320 }
1321
1322 static wait_result_t
1323 sleep_with_inheritor_and_turnstile(
1324 event_t event,
1325 thread_t inheritor,
1326 wait_interrupt_t interruptible,
1327 uint64_t deadline,
1328 void (^primitive_lock)(void),
1329 void (^primitive_unlock)(void))
1330 {
1331 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1332 wait_result_t ret;
1333 uint32_t index;
1334 struct turnstile *ts = NULL;
1335
1336 /*
1337 * the hash bucket spinlock is used as turnstile interlock,
1338 * lock it before releasing the primitive lock
1339 */
1340 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1341
1342 primitive_unlock();
1343
1344 ts = turnstile_prepare_hash((uintptr_t)event, type);
1345
1346 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1347 /*
1348 * We need TURNSTILE_DELAYED_UPDATE because we will call
1349 * waitq_assert_wait64 after.
1350 */
1351 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1352
1353 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1354
1355 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1356
1357 /*
1358 * Update new and old inheritor chains outside the interlock;
1359 */
1360 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1361
1362 if (ret == THREAD_WAITING) {
1363 ret = thread_block(THREAD_CONTINUE_NULL);
1364 }
1365
1366 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1367
1368 turnstile_complete_hash((uintptr_t)event, type);
1369
1370 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1371
1372 turnstile_cleanup();
1373
1374 primitive_lock();
1375
1376 return ret;
1377 }
1378
1379 /*
1380 * change_sleep_inheritor is independent from the locking primitive.
1381 */
1382
1383 /*
1384 * Name: change_sleep_inheritor
1385 *
1386 * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1387 *
1388 * Args:
1389 * Arg1: event to redirect the push.
1390 * Arg2: new inheritor for event.
1391 *
1392 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1393 *
1394 * Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1395 * wakeup for the event is called.
1396 * NOTE: this cannot be called from interrupt context.
1397 */
1398 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1399 change_sleep_inheritor(event_t event, thread_t inheritor)
1400 {
1401 uint32_t index;
1402 struct turnstile *ts = NULL;
1403 kern_return_t ret = KERN_SUCCESS;
1404 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1405
1406 /*
1407 * the hash bucket spinlock is used as turnstile interlock
1408 */
1409 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1410
1411 ts = turnstile_prepare_hash((uintptr_t)event, type);
1412
1413 if (!turnstile_has_waiters(ts)) {
1414 ret = KERN_NOT_WAITING;
1415 }
1416
1417 /*
1418 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1419 */
1420 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1421
1422 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1423
1424 /*
1425 * update the chains outside the interlock
1426 */
1427 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1428
1429 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1430
1431 turnstile_complete_hash((uintptr_t)event, type);
1432
1433 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1434
1435 turnstile_cleanup();
1436
1437 return ret;
1438 }
1439
1440 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1441 lck_spin_sleep_with_inheritor(
1442 lck_spin_t *lock,
1443 lck_sleep_action_t lck_sleep_action,
1444 event_t event,
1445 thread_t inheritor,
1446 wait_interrupt_t interruptible,
1447 uint64_t deadline)
1448 {
1449 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1450 return sleep_with_inheritor_and_turnstile(event, inheritor,
1451 interruptible, deadline,
1452 ^{}, ^{ lck_spin_unlock(lock); });
1453 } else {
1454 return sleep_with_inheritor_and_turnstile(event, inheritor,
1455 interruptible, deadline,
1456 ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1457 }
1458 }
1459
1460 wait_result_t
hw_lck_ticket_sleep_with_inheritor(hw_lck_ticket_t * lock,lck_grp_t * grp __unused,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1461 hw_lck_ticket_sleep_with_inheritor(
1462 hw_lck_ticket_t *lock,
1463 lck_grp_t *grp __unused,
1464 lck_sleep_action_t lck_sleep_action,
1465 event_t event,
1466 thread_t inheritor,
1467 wait_interrupt_t interruptible,
1468 uint64_t deadline)
1469 {
1470 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1471 return sleep_with_inheritor_and_turnstile(event, inheritor,
1472 interruptible, deadline,
1473 ^{}, ^{ hw_lck_ticket_unlock(lock); });
1474 } else {
1475 return sleep_with_inheritor_and_turnstile(event, inheritor,
1476 interruptible, deadline,
1477 ^{ hw_lck_ticket_lock(lock, grp); }, ^{ hw_lck_ticket_unlock(lock); });
1478 }
1479 }
1480
1481 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1482 lck_ticket_sleep_with_inheritor(
1483 lck_ticket_t *lock,
1484 lck_grp_t *grp,
1485 lck_sleep_action_t lck_sleep_action,
1486 event_t event,
1487 thread_t inheritor,
1488 wait_interrupt_t interruptible,
1489 uint64_t deadline)
1490 {
1491 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1492 return sleep_with_inheritor_and_turnstile(event, inheritor,
1493 interruptible, deadline,
1494 ^{}, ^{ lck_ticket_unlock(lock); });
1495 } else {
1496 return sleep_with_inheritor_and_turnstile(event, inheritor,
1497 interruptible, deadline,
1498 ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1499 }
1500 }
1501
1502 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1503 lck_mtx_sleep_with_inheritor(
1504 lck_mtx_t *lock,
1505 lck_sleep_action_t lck_sleep_action,
1506 event_t event,
1507 thread_t inheritor,
1508 wait_interrupt_t interruptible,
1509 uint64_t deadline)
1510 {
1511 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1512
1513 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1514 return sleep_with_inheritor_and_turnstile(event,
1515 inheritor,
1516 interruptible,
1517 deadline,
1518 ^{;},
1519 ^{lck_mtx_unlock(lock);});
1520 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1521 return sleep_with_inheritor_and_turnstile(event,
1522 inheritor,
1523 interruptible,
1524 deadline,
1525 ^{lck_mtx_lock_spin(lock);},
1526 ^{lck_mtx_unlock(lock);});
1527 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1528 return sleep_with_inheritor_and_turnstile(event,
1529 inheritor,
1530 interruptible,
1531 deadline,
1532 ^{lck_mtx_lock_spin_always(lock);},
1533 ^{lck_mtx_unlock(lock);});
1534 } else {
1535 return sleep_with_inheritor_and_turnstile(event,
1536 inheritor,
1537 interruptible,
1538 deadline,
1539 ^{lck_mtx_lock(lock);},
1540 ^{lck_mtx_unlock(lock);});
1541 }
1542 }
1543
1544 /*
1545 * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1546 */
1547
1548 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1549 lck_rw_sleep_with_inheritor(
1550 lck_rw_t *lock,
1551 lck_sleep_action_t lck_sleep_action,
1552 event_t event,
1553 thread_t inheritor,
1554 wait_interrupt_t interruptible,
1555 uint64_t deadline)
1556 {
1557 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1558
1559 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1560
1561 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1562 return sleep_with_inheritor_and_turnstile(event,
1563 inheritor,
1564 interruptible,
1565 deadline,
1566 ^{;},
1567 ^{lck_rw_type = lck_rw_done(lock);});
1568 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1569 return sleep_with_inheritor_and_turnstile(event,
1570 inheritor,
1571 interruptible,
1572 deadline,
1573 ^{lck_rw_lock(lock, lck_rw_type);},
1574 ^{lck_rw_type = lck_rw_done(lock);});
1575 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1576 return sleep_with_inheritor_and_turnstile(event,
1577 inheritor,
1578 interruptible,
1579 deadline,
1580 ^{lck_rw_lock_exclusive(lock);},
1581 ^{lck_rw_type = lck_rw_done(lock);});
1582 } else {
1583 return sleep_with_inheritor_and_turnstile(event,
1584 inheritor,
1585 interruptible,
1586 deadline,
1587 ^{lck_rw_lock_shared(lock);},
1588 ^{lck_rw_type = lck_rw_done(lock);});
1589 }
1590 }
1591
1592 /*
1593 * wakeup_with_inheritor functions are independent from the locking primitive.
1594 */
1595
1596 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1597 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1598 {
1599 return wakeup_with_inheritor_and_turnstile(event,
1600 result,
1601 TRUE,
1602 action,
1603 thread_wokenup);
1604 }
1605
1606 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1607 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1608 {
1609 return wakeup_with_inheritor_and_turnstile(event,
1610 result,
1611 FALSE,
1612 0,
1613 NULL);
1614 }
1615
1616 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1617 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1618 {
1619 assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1620 assert(waitq_type(waitq) == WQT_TURNSTILE);
1621 waitinfo->owner = 0;
1622 waitinfo->context = 0;
1623
1624 if (waitq_held(waitq)) {
1625 return;
1626 }
1627
1628 struct turnstile *turnstile = waitq_to_turnstile(waitq);
1629 assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1630 waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1631 }
1632
1633 static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1634 static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1635 static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1636
1637 static wait_result_t
1638 cond_sleep_with_inheritor_and_turnstile_type(
1639 cond_swi_var_t cond,
1640 bool (^cond_sleep_check)(ctid_t*),
1641 wait_interrupt_t interruptible,
1642 uint64_t deadline,
1643 turnstile_type_t type)
1644 {
1645 wait_result_t ret;
1646 uint32_t index;
1647 struct turnstile *ts = NULL;
1648 ctid_t ctid = 0;
1649 thread_t inheritor;
1650
1651 /*
1652 * the hash bucket spinlock is used as turnstile interlock,
1653 * lock it before checking the sleep condition
1654 */
1655 turnstile_hash_bucket_lock((uintptr_t)cond, &index, type);
1656
1657 /*
1658 * In case the sleep check succeeds, the block will
1659 * provide us the ctid observed on the variable.
1660 */
1661 if (!cond_sleep_check(&ctid)) {
1662 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1663 return THREAD_NOT_WAITING;
1664 }
1665
1666 /*
1667 * We can translate the ctid to a thread_t only
1668 * if cond_sleep_check succeded.
1669 */
1670 inheritor = ctid_get_thread(ctid);
1671 assert(inheritor != NULL);
1672
1673 ts = turnstile_prepare_hash((uintptr_t)cond, type);
1674
1675 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1676 /*
1677 * We need TURNSTILE_DELAYED_UPDATE because we will call
1678 * waitq_assert_wait64 after.
1679 */
1680 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1681
1682 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1683
1684 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1685
1686 /*
1687 * Update new and old inheritor chains outside the interlock;
1688 */
1689 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1690 if (ret == THREAD_WAITING) {
1691 ret = thread_block(THREAD_CONTINUE_NULL);
1692 }
1693
1694 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1695
1696 turnstile_complete_hash((uintptr_t)cond, type);
1697
1698 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1699
1700 turnstile_cleanup();
1701 return ret;
1702 }
1703
1704 /*
1705 * Name: cond_sleep_with_inheritor32_mask
1706 *
1707 * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1708 * Allows a thread to conditionally sleep while indicating which thread should
1709 * inherit the priority push associated with the condition.
1710 * The condition should be expressed through a cond_swi_var32_s pointer.
1711 * The condition needs to be populated by the caller with the ctid of the
1712 * thread that should inherit the push. The remaining bits of the condition
1713 * can be used by the caller to implement its own synchronization logic.
1714 * A copy of the condition value observed by the caller when it decided to call
1715 * this function should be provided to prevent races with matching wakeups.
1716 * This function will atomically check the value stored in the condition against
1717 * the expected/observed one provided only for the bits that are set in the mask.
1718 * If the check doesn't pass the thread will not sleep and the function will return.
1719 * The ctid provided in the condition will be used only after a successful
1720 * check.
1721 *
1722 * Args:
1723 * Arg1: cond_swi_var32_s pointer that stores the condition to check.
1724 * Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1725 * Arg3: mask to apply to the condition to check.
1726 * Arg4: interruptible flag for wait.
1727 * Arg5: deadline for wait.
1728 *
1729 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1730 * wakeup for the cond is called.
1731 *
1732 * Returns: result of the wait.
1733 */
1734 static wait_result_t
cond_sleep_with_inheritor32_mask(cond_swi_var_t cond,cond_swi_var32_s expected_cond,uint32_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1735 cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1736 {
1737 bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1738 cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1739 bool ret;
1740 if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1741 ret = true;
1742 *ctid = cond_val.cond32_owner;
1743 } else {
1744 ret = false;
1745 }
1746 return ret;
1747 };
1748
1749 return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1750 }
1751
1752 /*
1753 * Name: cond_sleep_with_inheritor64_mask
1754 *
1755 * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1756 * Allows a thread to conditionally sleep while indicating which thread should
1757 * inherit the priority push associated with the condition.
1758 * The condition should be expressed through a cond_swi_var64_s pointer.
1759 * The condition needs to be populated by the caller with the ctid of the
1760 * thread that should inherit the push. The remaining bits of the condition
1761 * can be used by the caller to implement its own synchronization logic.
1762 * A copy of the condition value observed by the caller when it decided to call
1763 * this function should be provided to prevent races with matching wakeups.
1764 * This function will atomically check the value stored in the condition against
1765 * the expected/observed one provided only for the bits that are set in the mask.
1766 * If the check doesn't pass the thread will not sleep and the function will return.
1767 * The ctid provided in the condition will be used only after a successful
1768 * check.
1769 *
1770 * Args:
1771 * Arg1: cond_swi_var64_s pointer that stores the condition to check.
1772 * Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1773 * Arg3: mask to apply to the condition to check.
1774 * Arg4: interruptible flag for wait.
1775 * Arg5: deadline for wait.
1776 *
1777 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1778 * wakeup for the cond is called.
1779 *
1780 * Returns: result of the wait.
1781 */
1782 wait_result_t
cond_sleep_with_inheritor64_mask(cond_swi_var_t cond,cond_swi_var64_s expected_cond,uint64_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1783 cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1784 {
1785 bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1786 cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1787 bool ret;
1788 if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1789 ret = true;
1790 *ctid = cond_val.cond64_owner;
1791 } else {
1792 ret = false;
1793 }
1794 return ret;
1795 };
1796
1797 return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1798 }
1799
1800 /*
1801 * Name: cond_sleep_with_inheritor32
1802 *
1803 * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1804 * Allows a thread to conditionally sleep while indicating which thread should
1805 * inherit the priority push associated with the condition.
1806 * The condition should be expressed through a cond_swi_var32_s pointer.
1807 * The condition needs to be populated by the caller with the ctid of the
1808 * thread that should inherit the push. The remaining bits of the condition
1809 * can be used by the caller to implement its own synchronization logic.
1810 * A copy of the condition value observed by the caller when it decided to call
1811 * this function should be provided to prevent races with matching wakeups.
1812 * This function will atomically check the value stored in the condition against
1813 * the expected/observed one provided. If the check doesn't pass the thread will not
1814 * sleep and the function will return.
1815 * The ctid provided in the condition will be used only after a successful
1816 * check.
1817 *
1818 * Args:
1819 * Arg1: cond_swi_var32_s pointer that stores the condition to check.
1820 * Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1821 * Arg3: interruptible flag for wait.
1822 * Arg4: deadline for wait.
1823 *
1824 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1825 * wakeup for the cond is called.
1826 *
1827 * Returns: result of the wait.
1828 */
1829 wait_result_t
cond_sleep_with_inheritor32(cond_swi_var_t cond,cond_swi_var32_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1830 cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1831 {
1832 return cond_sleep_with_inheritor32_mask(cond, expected_cond, ~0u, interruptible, deadline);
1833 }
1834
1835 /*
1836 * Name: cond_sleep_with_inheritor64
1837 *
1838 * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1839 * Allows a thread to conditionally sleep while indicating which thread should
1840 * inherit the priority push associated with the condition.
1841 * The condition should be expressed through a cond_swi_var64_s pointer.
1842 * The condition needs to be populated by the caller with the ctid of the
1843 * thread that should inherit the push. The remaining bits of the condition
1844 * can be used by the caller to implement its own synchronization logic.
1845 * A copy of the condition value observed by the caller when it decided to call
1846 * this function should be provided to prevent races with matching wakeups.
1847 * This function will atomically check the value stored in the condition against
1848 * the expected/observed one provided. If the check doesn't pass the thread will not
1849 * sleep and the function will return.
1850 * The ctid provided in the condition will be used only after a successful
1851 * check.
1852 *
1853 * Args:
1854 * Arg1: cond_swi_var64_s pointer that stores the condition to check.
1855 * Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1856 * Arg3: interruptible flag for wait.
1857 * Arg4: deadline for wait.
1858 *
1859 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1860 * wakeup for the cond is called.
1861 *
1862 * Returns: result of the wait.
1863 */
1864 wait_result_t
cond_sleep_with_inheritor64(cond_swi_var_t cond,cond_swi_var64_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1865 cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1866 {
1867 return cond_sleep_with_inheritor64_mask(cond, expected_cond, ~0ull, interruptible, deadline);
1868 }
1869
1870 /*
1871 * Name: cond_wakeup_one_with_inheritor
1872 *
1873 * Description: Wake up one waiter waiting on the condition (if any).
1874 * The thread woken up will be the one with the higher sched priority waiting on the condition.
1875 * The push for the condition will be transferred from the last inheritor to the woken up thread.
1876 *
1877 * Args:
1878 * Arg1: condition to wake from.
1879 * Arg2: wait result to pass to the woken up thread.
1880 * Arg3: pointer for storing the thread wokenup.
1881 *
1882 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1883 *
1884 * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1885 * condition or a wakeup for the event is called.
1886 * A reference for the wokenup thread is acquired.
1887 * NOTE: this cannot be called from interrupt context.
1888 */
1889 kern_return_t
cond_wakeup_one_with_inheritor(cond_swi_var_t cond,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1890 cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1891 {
1892 return wakeup_with_inheritor_and_turnstile((event_t)cond,
1893 result,
1894 TRUE,
1895 action,
1896 thread_wokenup);
1897 }
1898
1899 /*
1900 * Name: cond_wakeup_all_with_inheritor
1901 *
1902 * Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1903 *
1904 * Args:
1905 * Arg1: condition to wake from.
1906 * Arg2: wait result to pass to the woken up threads.
1907 *
1908 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1909 *
1910 * Conditions: NOTE: this cannot be called from interrupt context.
1911 */
1912 kern_return_t
cond_wakeup_all_with_inheritor(cond_swi_var_t cond,wait_result_t result)1913 cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1914 {
1915 return wakeup_with_inheritor_and_turnstile((event_t)cond,
1916 result,
1917 FALSE,
1918 0,
1919 NULL);
1920 }
1921
1922
1923 #pragma mark - gates
1924
1925 #define GATE_TYPE 3
1926 #define GATE_ILOCK_BIT 0
1927 #define GATE_WAITERS_BIT 1
1928
1929 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1930 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1931
1932 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1933 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1934 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1935 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1936 #define ordered_store_gate(gate, value) os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1937
1938 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1939 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1940 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1941 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1942
1943 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1944
1945 #define GATE_EVENT(gate) ((event_t) gate)
1946 #define EVENT_TO_GATE(event) ((gate_t *) event)
1947
1948 typedef void (*void_func_void)(void);
1949
1950 __abortlike
1951 static void
gate_verify_tag_panic(gate_t * gate)1952 gate_verify_tag_panic(gate_t *gate)
1953 {
1954 panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1955 }
1956
1957 __abortlike
1958 static void
gate_verify_destroy_panic(gate_t * gate)1959 gate_verify_destroy_panic(gate_t *gate)
1960 {
1961 panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1962 }
1963
1964 static void
gate_verify(gate_t * gate)1965 gate_verify(gate_t *gate)
1966 {
1967 if (gate->gt_type != GATE_TYPE) {
1968 gate_verify_tag_panic(gate);
1969 }
1970 if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
1971 gate_verify_destroy_panic(gate);
1972 }
1973
1974 assert(gate->gt_refs > 0);
1975 }
1976
1977 __abortlike
1978 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)1979 gate_already_owned_panic(gate_t *gate, thread_t holder)
1980 {
1981 panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
1982 }
1983
1984 static kern_return_t
gate_try_close(gate_t * gate)1985 gate_try_close(gate_t *gate)
1986 {
1987 uintptr_t state;
1988 thread_t holder;
1989 kern_return_t ret;
1990 thread_t thread = current_thread();
1991
1992 gate_verify(gate);
1993
1994 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
1995 return KERN_SUCCESS;
1996 }
1997
1998 gate_ilock(gate);
1999 state = ordered_load_gate(gate);
2000 holder = GATE_STATE_TO_THREAD(state);
2001
2002 if (holder == NULL) {
2003 assert(gate_has_waiter_bit(state) == FALSE);
2004
2005 state = GATE_THREAD_TO_STATE(current_thread());
2006 state |= GATE_ILOCK;
2007 ordered_store_gate(gate, state);
2008 ret = KERN_SUCCESS;
2009 } else {
2010 if (holder == current_thread()) {
2011 gate_already_owned_panic(gate, holder);
2012 }
2013 ret = KERN_FAILURE;
2014 }
2015
2016 gate_iunlock(gate);
2017 return ret;
2018 }
2019
2020 static void
gate_close(gate_t * gate)2021 gate_close(gate_t* gate)
2022 {
2023 uintptr_t state;
2024 thread_t holder;
2025 thread_t thread = current_thread();
2026
2027 gate_verify(gate);
2028
2029 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2030 return;
2031 }
2032
2033 gate_ilock(gate);
2034 state = ordered_load_gate(gate);
2035 holder = GATE_STATE_TO_THREAD(state);
2036
2037 if (holder != NULL) {
2038 gate_already_owned_panic(gate, holder);
2039 }
2040
2041 assert(gate_has_waiter_bit(state) == FALSE);
2042
2043 state = GATE_THREAD_TO_STATE(thread);
2044 state |= GATE_ILOCK;
2045 ordered_store_gate(gate, state);
2046
2047 gate_iunlock(gate);
2048 }
2049
2050 static void
gate_open_turnstile(gate_t * gate)2051 gate_open_turnstile(gate_t *gate)
2052 {
2053 struct turnstile *ts = NULL;
2054
2055 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile,
2056 TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2057 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2058 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2059 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2060 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2061 /*
2062 * We can do the cleanup while holding the interlock.
2063 * It is ok because:
2064 * 1. current_thread is the previous inheritor and it is running
2065 * 2. new inheritor is NULL.
2066 * => No chain of turnstiles needs to be updated.
2067 */
2068 turnstile_cleanup();
2069 }
2070
2071 __abortlike
2072 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2073 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2074 {
2075 if (open) {
2076 panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2077 } else {
2078 panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2079 }
2080 }
2081
2082 static void
gate_open(gate_t * gate)2083 gate_open(gate_t *gate)
2084 {
2085 uintptr_t state;
2086 thread_t holder;
2087 bool waiters;
2088 thread_t thread = current_thread();
2089
2090 gate_verify(gate);
2091 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2092 return;
2093 }
2094
2095 gate_ilock(gate);
2096 state = ordered_load_gate(gate);
2097 holder = GATE_STATE_TO_THREAD(state);
2098 waiters = gate_has_waiter_bit(state);
2099
2100 if (holder != thread) {
2101 gate_not_owned_panic(gate, holder, true);
2102 }
2103
2104 if (waiters) {
2105 gate_open_turnstile(gate);
2106 }
2107
2108 state = GATE_ILOCK;
2109 ordered_store_gate(gate, state);
2110
2111 gate_iunlock(gate);
2112 }
2113
2114 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2115 gate_handoff_turnstile(gate_t *gate,
2116 int flags,
2117 thread_t *thread_woken_up,
2118 bool *waiters)
2119 {
2120 struct turnstile *ts = NULL;
2121 kern_return_t ret = KERN_FAILURE;
2122 thread_t hp_thread;
2123
2124 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2125 /*
2126 * Wake up the higest priority thread waiting on the gate
2127 */
2128 hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2129 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2130
2131 if (hp_thread != NULL) {
2132 /*
2133 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2134 */
2135 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2136 *thread_woken_up = hp_thread;
2137 *waiters = turnstile_has_waiters(ts);
2138 /*
2139 * Note: hp_thread is the new holder and the new inheritor.
2140 * In case there are no more waiters, it doesn't need to be the inheritor
2141 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2142 * handoff can go through the fast path.
2143 * We could set the inheritor to NULL here, or the new holder itself can set it
2144 * on its way back from the sleep. In the latter case there are more chanses that
2145 * new waiters will come by, avoiding to do the opearation at all.
2146 */
2147 ret = KERN_SUCCESS;
2148 } else {
2149 /*
2150 * waiters can have been woken up by an interrupt and still not
2151 * have updated gate->waiters, so we couldn't find them on the waitq.
2152 * Update the inheritor to NULL here, so that the current thread can return to userspace
2153 * indipendently from when the interrupted waiters will finish the wait.
2154 */
2155 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2156 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2157 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2158 }
2159 // there are no waiters.
2160 ret = KERN_NOT_WAITING;
2161 }
2162
2163 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2164
2165 /*
2166 * We can do the cleanup while holding the interlock.
2167 * It is ok because:
2168 * 1. current_thread is the previous inheritor and it is running
2169 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2170 * of the gate before trying to sleep.
2171 * => No chain of turnstiles needs to be updated.
2172 */
2173 turnstile_cleanup();
2174
2175 return ret;
2176 }
2177
2178 static kern_return_t
gate_handoff(gate_t * gate,int flags)2179 gate_handoff(gate_t *gate,
2180 int flags)
2181 {
2182 kern_return_t ret;
2183 thread_t new_holder = NULL;
2184 uintptr_t state;
2185 thread_t holder;
2186 bool waiters;
2187 thread_t thread = current_thread();
2188
2189 assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2190 gate_verify(gate);
2191
2192 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2193 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2194 //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2195 return KERN_NOT_WAITING;
2196 }
2197 }
2198
2199 gate_ilock(gate);
2200 state = ordered_load_gate(gate);
2201 holder = GATE_STATE_TO_THREAD(state);
2202 waiters = gate_has_waiter_bit(state);
2203
2204 if (holder != current_thread()) {
2205 gate_not_owned_panic(gate, holder, false);
2206 }
2207
2208 if (waiters) {
2209 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2210 if (ret == KERN_SUCCESS) {
2211 state = GATE_THREAD_TO_STATE(new_holder);
2212 if (waiters) {
2213 state |= GATE_WAITERS;
2214 }
2215 } else {
2216 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2217 state = 0;
2218 }
2219 }
2220 } else {
2221 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2222 state = 0;
2223 }
2224 ret = KERN_NOT_WAITING;
2225 }
2226 state |= GATE_ILOCK;
2227 ordered_store_gate(gate, state);
2228
2229 gate_iunlock(gate);
2230
2231 if (new_holder) {
2232 thread_deallocate(new_holder);
2233 }
2234 return ret;
2235 }
2236
2237 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2238 gate_steal_turnstile(gate_t *gate,
2239 thread_t new_inheritor)
2240 {
2241 struct turnstile *ts = NULL;
2242
2243 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2244
2245 turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2246 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2247 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2248
2249 /*
2250 * turnstile_cleanup might need to update the chain of the old holder.
2251 * This operation should happen without the turnstile interlock held.
2252 */
2253 return turnstile_cleanup;
2254 }
2255
2256 __abortlike
2257 static void
gate_not_closed_panic(gate_t * gate,bool wait)2258 gate_not_closed_panic(gate_t *gate, bool wait)
2259 {
2260 if (wait) {
2261 panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2262 } else {
2263 panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2264 }
2265 }
2266
2267 static void
gate_steal(gate_t * gate)2268 gate_steal(gate_t *gate)
2269 {
2270 uintptr_t state;
2271 thread_t holder;
2272 thread_t thread = current_thread();
2273 bool waiters;
2274
2275 void_func_void func_after_interlock_unlock;
2276
2277 gate_verify(gate);
2278
2279 gate_ilock(gate);
2280 state = ordered_load_gate(gate);
2281 holder = GATE_STATE_TO_THREAD(state);
2282 waiters = gate_has_waiter_bit(state);
2283
2284 if (holder == NULL) {
2285 gate_not_closed_panic(gate, false);
2286 }
2287
2288 state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2289 if (waiters) {
2290 state |= GATE_WAITERS;
2291 ordered_store_gate(gate, state);
2292 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2293 gate_iunlock(gate);
2294
2295 func_after_interlock_unlock();
2296 } else {
2297 ordered_store_gate(gate, state);
2298 gate_iunlock(gate);
2299 }
2300 }
2301
2302 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2303 gate_wait_turnstile(gate_t *gate,
2304 wait_interrupt_t interruptible,
2305 uint64_t deadline,
2306 thread_t holder,
2307 wait_result_t* wait,
2308 bool* waiters)
2309 {
2310 struct turnstile *ts;
2311 uintptr_t state;
2312
2313 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2314
2315 turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2316 waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2317
2318 gate_iunlock(gate);
2319
2320 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2321
2322 *wait = thread_block(THREAD_CONTINUE_NULL);
2323
2324 gate_ilock(gate);
2325
2326 *waiters = turnstile_has_waiters(ts);
2327
2328 if (!*waiters) {
2329 /*
2330 * We want to enable the fast path as soon as we see that there are no more waiters.
2331 * On the fast path the holder will not do any turnstile operations.
2332 * Set the inheritor as NULL here.
2333 *
2334 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2335 * already been set to NULL.
2336 */
2337 state = ordered_load_gate(gate);
2338 holder = GATE_STATE_TO_THREAD(state);
2339 if (holder &&
2340 ((*wait != THREAD_AWAKENED) || // thread interrupted or timedout
2341 holder == current_thread())) { // thread was woken up and it is the new holder
2342 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2343 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2344 }
2345 }
2346
2347 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2348
2349 /*
2350 * turnstile_cleanup might need to update the chain of the old holder.
2351 * This operation should happen without the turnstile primitive interlock held.
2352 */
2353 return turnstile_cleanup;
2354 }
2355
2356 static void
gate_free_internal(gate_t * gate)2357 gate_free_internal(gate_t *gate)
2358 {
2359 zfree(KT_GATE, gate);
2360 }
2361
2362 __abortlike
2363 static void
gate_too_many_refs_panic(gate_t * gate)2364 gate_too_many_refs_panic(gate_t *gate)
2365 {
2366 panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2367 }
2368
2369 static gate_wait_result_t
2370 gate_wait(gate_t* gate,
2371 wait_interrupt_t interruptible,
2372 uint64_t deadline,
2373 void (^primitive_unlock)(void),
2374 void (^primitive_lock)(void))
2375 {
2376 gate_wait_result_t ret;
2377 void_func_void func_after_interlock_unlock;
2378 wait_result_t wait_result;
2379 uintptr_t state;
2380 thread_t holder;
2381 bool waiters;
2382
2383 gate_verify(gate);
2384
2385 gate_ilock(gate);
2386 state = ordered_load_gate(gate);
2387 holder = GATE_STATE_TO_THREAD(state);
2388
2389 if (holder == NULL) {
2390 gate_not_closed_panic(gate, true);
2391 }
2392
2393 /*
2394 * Get a ref on the gate so it will not
2395 * be freed while we are coming back from the sleep.
2396 */
2397 if (gate->gt_refs == UINT16_MAX) {
2398 gate_too_many_refs_panic(gate);
2399 }
2400 gate->gt_refs++;
2401 state |= GATE_WAITERS;
2402 ordered_store_gate(gate, state);
2403
2404 /*
2405 * Release the primitive lock before any
2406 * turnstile operation. Turnstile
2407 * does not support a blocking primitive as
2408 * interlock.
2409 *
2410 * In this way, concurrent threads will be
2411 * able to acquire the primitive lock
2412 * but still will wait for me through the
2413 * gate interlock.
2414 */
2415 primitive_unlock();
2416
2417 func_after_interlock_unlock = gate_wait_turnstile( gate,
2418 interruptible,
2419 deadline,
2420 holder,
2421 &wait_result,
2422 &waiters);
2423
2424 state = ordered_load_gate(gate);
2425 holder = GATE_STATE_TO_THREAD(state);
2426
2427 switch (wait_result) {
2428 case THREAD_INTERRUPTED:
2429 case THREAD_TIMED_OUT:
2430 assert(holder != current_thread());
2431
2432 if (waiters) {
2433 state |= GATE_WAITERS;
2434 } else {
2435 state &= ~GATE_WAITERS;
2436 }
2437 ordered_store_gate(gate, state);
2438
2439 if (wait_result == THREAD_INTERRUPTED) {
2440 ret = GATE_INTERRUPTED;
2441 } else {
2442 ret = GATE_TIMED_OUT;
2443 }
2444 break;
2445 default:
2446 /*
2447 * Note it is possible that even if the gate was handed off to
2448 * me, someone called gate_steal() before I woke up.
2449 *
2450 * As well as it is possible that the gate was opened, but someone
2451 * closed it while I was waking up.
2452 *
2453 * In both cases we return GATE_OPENED, as the gate was opened to me
2454 * at one point, it is the caller responsibility to check again if
2455 * the gate is open.
2456 */
2457 if (holder == current_thread()) {
2458 ret = GATE_HANDOFF;
2459 } else {
2460 ret = GATE_OPENED;
2461 }
2462 break;
2463 }
2464
2465 assert(gate->gt_refs > 0);
2466 uint32_t ref = --gate->gt_refs;
2467 bool to_free = gate->gt_alloc;
2468 gate_iunlock(gate);
2469
2470 if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2471 if (to_free == true) {
2472 assert(!waiters);
2473 if (ref == 0) {
2474 gate_free_internal(gate);
2475 }
2476 ret = GATE_OPENED;
2477 } else {
2478 gate_verify_destroy_panic(gate);
2479 }
2480 }
2481
2482 /*
2483 * turnstile func that needs to be executed without
2484 * holding the primitive interlock
2485 */
2486 func_after_interlock_unlock();
2487
2488 primitive_lock();
2489
2490 return ret;
2491 }
2492
2493 static void
gate_assert(gate_t * gate,int flags)2494 gate_assert(gate_t *gate, int flags)
2495 {
2496 uintptr_t state;
2497 thread_t holder;
2498
2499 gate_verify(gate);
2500
2501 gate_ilock(gate);
2502 state = ordered_load_gate(gate);
2503 holder = GATE_STATE_TO_THREAD(state);
2504
2505 switch (flags) {
2506 case GATE_ASSERT_CLOSED:
2507 assert(holder != NULL);
2508 break;
2509 case GATE_ASSERT_OPEN:
2510 assert(holder == NULL);
2511 break;
2512 case GATE_ASSERT_HELD:
2513 assert(holder == current_thread());
2514 break;
2515 default:
2516 panic("invalid %s flag %d", __func__, flags);
2517 }
2518
2519 gate_iunlock(gate);
2520 }
2521
2522 enum {
2523 GT_INIT_DEFAULT = 0,
2524 GT_INIT_ALLOC
2525 };
2526
2527 static void
gate_init(gate_t * gate,uint type)2528 gate_init(gate_t *gate, uint type)
2529 {
2530 bzero(gate, sizeof(gate_t));
2531
2532 gate->gt_data = 0;
2533 gate->gt_turnstile = NULL;
2534 gate->gt_refs = 1;
2535 switch (type) {
2536 case GT_INIT_ALLOC:
2537 gate->gt_alloc = 1;
2538 break;
2539 default:
2540 gate->gt_alloc = 0;
2541 break;
2542 }
2543 gate->gt_type = GATE_TYPE;
2544 gate->gt_flags_pad = 0;
2545 }
2546
2547 static gate_t*
gate_alloc_init(void)2548 gate_alloc_init(void)
2549 {
2550 gate_t *gate;
2551 gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2552 gate_init(gate, GT_INIT_ALLOC);
2553 return gate;
2554 }
2555
2556 __abortlike
2557 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2558 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2559 {
2560 panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2561 }
2562
2563 __abortlike
2564 static void
gate_destroy_waiter_panic(gate_t * gate)2565 gate_destroy_waiter_panic(gate_t *gate)
2566 {
2567 panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2568 }
2569
2570 static uint16_t
gate_destroy_internal(gate_t * gate)2571 gate_destroy_internal(gate_t *gate)
2572 {
2573 uintptr_t state;
2574 thread_t holder;
2575 uint16_t ref;
2576
2577 gate_ilock(gate);
2578 state = ordered_load_gate(gate);
2579 holder = GATE_STATE_TO_THREAD(state);
2580
2581 /*
2582 * The gate must be open
2583 * and all the threads must
2584 * have been woken up by this time
2585 */
2586 if (holder != NULL) {
2587 gate_destroy_owned_panic(gate, holder);
2588 }
2589 if (gate_has_waiter_bit(state)) {
2590 gate_destroy_waiter_panic(gate);
2591 }
2592
2593 assert(gate->gt_refs > 0);
2594
2595 ref = --gate->gt_refs;
2596
2597 /*
2598 * Mark the gate as destroyed.
2599 * The interlock bit still need
2600 * to be available to let the
2601 * last wokenup threads to clear
2602 * the wait.
2603 */
2604 state = GATE_DESTROYED;
2605 state |= GATE_ILOCK;
2606 ordered_store_gate(gate, state);
2607 gate_iunlock(gate);
2608 return ref;
2609 }
2610
2611 __abortlike
2612 static void
gate_destroy_panic(gate_t * gate)2613 gate_destroy_panic(gate_t *gate)
2614 {
2615 panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2616 }
2617
2618 static void
gate_destroy(gate_t * gate)2619 gate_destroy(gate_t *gate)
2620 {
2621 gate_verify(gate);
2622 if (gate->gt_alloc == 1) {
2623 gate_destroy_panic(gate);
2624 }
2625 gate_destroy_internal(gate);
2626 }
2627
2628 __abortlike
2629 static void
gate_free_panic(gate_t * gate)2630 gate_free_panic(gate_t *gate)
2631 {
2632 panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2633 }
2634
2635 static void
gate_free(gate_t * gate)2636 gate_free(gate_t *gate)
2637 {
2638 uint16_t ref;
2639
2640 gate_verify(gate);
2641
2642 if (gate->gt_alloc == 0) {
2643 gate_free_panic(gate);
2644 }
2645
2646 ref = gate_destroy_internal(gate);
2647 /*
2648 * Some of the threads waiting on the gate
2649 * might still need to run after being woken up.
2650 * They will access the gate to cleanup the
2651 * state, so we cannot free it.
2652 * The last waiter will free the gate in this case.
2653 */
2654 if (ref == 0) {
2655 gate_free_internal(gate);
2656 }
2657 }
2658
2659 /*
2660 * Name: lck_rw_gate_init
2661 *
2662 * Description: initializes a variable declared with decl_lck_rw_gate_data.
2663 *
2664 * Args:
2665 * Arg1: lck_rw_t lock used to protect the gate.
2666 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2667 */
2668 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2669 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2670 {
2671 (void) lock;
2672 gate_init(gate, GT_INIT_DEFAULT);
2673 }
2674
2675 /*
2676 * Name: lck_rw_gate_alloc_init
2677 *
2678 * Description: allocates and initializes a gate_t.
2679 *
2680 * Args:
2681 * Arg1: lck_rw_t lock used to protect the gate.
2682 *
2683 * Returns:
2684 * gate_t allocated.
2685 */
2686 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2687 lck_rw_gate_alloc_init(lck_rw_t *lock)
2688 {
2689 (void) lock;
2690 return gate_alloc_init();
2691 }
2692
2693 /*
2694 * Name: lck_rw_gate_destroy
2695 *
2696 * Description: destroys a variable previously initialized
2697 * with lck_rw_gate_init().
2698 *
2699 * Args:
2700 * Arg1: lck_rw_t lock used to protect the gate.
2701 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2702 */
2703 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2704 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2705 {
2706 (void) lock;
2707 gate_destroy(gate);
2708 }
2709
2710 /*
2711 * Name: lck_rw_gate_free
2712 *
2713 * Description: destroys and tries to free a gate previously allocated
2714 * with lck_rw_gate_alloc_init().
2715 * The gate free might be delegated to the last thread returning
2716 * from the gate_wait().
2717 *
2718 * Args:
2719 * Arg1: lck_rw_t lock used to protect the gate.
2720 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2721 */
2722 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2723 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2724 {
2725 (void) lock;
2726 gate_free(gate);
2727 }
2728
2729 /*
2730 * Name: lck_rw_gate_try_close
2731 *
2732 * Description: Tries to close the gate.
2733 * In case of success the current thread will be set as
2734 * the holder of the gate.
2735 *
2736 * Args:
2737 * Arg1: lck_rw_t lock used to protect the gate.
2738 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2739 *
2740 * Conditions: Lock must be held. Returns with the lock held.
2741 *
2742 * Returns:
2743 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2744 * of the gate.
2745 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2746 * to wake up possible waiters on the gate before returning to userspace.
2747 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2748 * between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2749 *
2750 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2751 * lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2752 * The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2753 * be done without dropping the lock that is protecting the gate in between.
2754 */
2755 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2756 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2757 {
2758 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2759
2760 return gate_try_close(gate);
2761 }
2762
2763 /*
2764 * Name: lck_rw_gate_close
2765 *
2766 * Description: Closes the gate. The current thread will be set as
2767 * the holder of the gate. Will panic if the gate is already closed.
2768 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2769 * to wake up possible waiters on the gate before returning to userspace.
2770 *
2771 * Args:
2772 * Arg1: lck_rw_t lock used to protect the gate.
2773 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2774 *
2775 * Conditions: Lock must be held. Returns with the lock held.
2776 * The gate must be open.
2777 *
2778 */
2779 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2780 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2781 {
2782 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2783
2784 return gate_close(gate);
2785 }
2786
2787 /*
2788 * Name: lck_rw_gate_open
2789 *
2790 * Description: Opens the gate and wakes up possible waiters.
2791 *
2792 * Args:
2793 * Arg1: lck_rw_t lock used to protect the gate.
2794 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2795 *
2796 * Conditions: Lock must be held. Returns with the lock held.
2797 * The current thread must be the holder of the gate.
2798 *
2799 */
2800 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2801 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2802 {
2803 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2804
2805 gate_open(gate);
2806 }
2807
2808 /*
2809 * Name: lck_rw_gate_handoff
2810 *
2811 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2812 * priority will be selected as the new holder of the gate, and woken up,
2813 * with the gate remaining in the closed state throughout.
2814 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2815 * will be returned.
2816 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2817 * case no waiters were found.
2818 *
2819 *
2820 * Args:
2821 * Arg1: lck_rw_t lock used to protect the gate.
2822 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2823 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2824 *
2825 * Conditions: Lock must be held. Returns with the lock held.
2826 * The current thread must be the holder of the gate.
2827 *
2828 * Returns:
2829 * KERN_SUCCESS in case one of the waiters became the new holder.
2830 * KERN_NOT_WAITING in case there were no waiters.
2831 *
2832 */
2833 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2834 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2835 {
2836 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2837
2838 return gate_handoff(gate, flags);
2839 }
2840
2841 /*
2842 * Name: lck_rw_gate_steal
2843 *
2844 * Description: Set the current ownership of the gate. It sets the current thread as the
2845 * new holder of the gate.
2846 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2847 * to wake up possible waiters on the gate before returning to userspace.
2848 * NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2849 * anymore.
2850 *
2851 *
2852 * Args:
2853 * Arg1: lck_rw_t lock used to protect the gate.
2854 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2855 *
2856 * Conditions: Lock must be held. Returns with the lock held.
2857 * The gate must be closed and the current thread must not already be the holder.
2858 *
2859 */
2860 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2861 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2862 {
2863 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2864
2865 gate_steal(gate);
2866 }
2867
2868 /*
2869 * Name: lck_rw_gate_wait
2870 *
2871 * Description: Waits for the current thread to become the holder of the gate or for the
2872 * gate to become open. An interruptible mode and deadline can be specified
2873 * to return earlier from the wait.
2874 *
2875 * Args:
2876 * Arg1: lck_rw_t lock used to protect the gate.
2877 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2878 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2879 * Arg3: interruptible flag for wait.
2880 * Arg4: deadline
2881 *
2882 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2883 * Lock will be dropped while waiting.
2884 * The gate must be closed.
2885 *
2886 * Returns: Reason why the thread was woken up.
2887 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2888 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2889 * to wake up possible waiters on the gate before returning to userspace.
2890 * GATE_OPENED - the gate was opened by the holder.
2891 * GATE_TIMED_OUT - the thread was woken up by a timeout.
2892 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
2893 */
2894 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2895 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2896 {
2897 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2898
2899 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2900
2901 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2902 return gate_wait(gate,
2903 interruptible,
2904 deadline,
2905 ^{lck_rw_type = lck_rw_done(lock);},
2906 ^{;});
2907 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2908 return gate_wait(gate,
2909 interruptible,
2910 deadline,
2911 ^{lck_rw_type = lck_rw_done(lock);},
2912 ^{lck_rw_lock(lock, lck_rw_type);});
2913 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2914 return gate_wait(gate,
2915 interruptible,
2916 deadline,
2917 ^{lck_rw_type = lck_rw_done(lock);},
2918 ^{lck_rw_lock_exclusive(lock);});
2919 } else {
2920 return gate_wait(gate,
2921 interruptible,
2922 deadline,
2923 ^{lck_rw_type = lck_rw_done(lock);},
2924 ^{lck_rw_lock_shared(lock);});
2925 }
2926 }
2927
2928 /*
2929 * Name: lck_rw_gate_assert
2930 *
2931 * Description: asserts that the gate is in the specified state.
2932 *
2933 * Args:
2934 * Arg1: lck_rw_t lock used to protect the gate.
2935 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2936 * Arg3: flags to specified assert type.
2937 * GATE_ASSERT_CLOSED - the gate is currently closed
2938 * GATE_ASSERT_OPEN - the gate is currently opened
2939 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2940 */
2941 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2942 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2943 {
2944 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2945
2946 gate_assert(gate, flags);
2947 return;
2948 }
2949
2950 /*
2951 * Name: lck_mtx_gate_init
2952 *
2953 * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2954 *
2955 * Args:
2956 * Arg1: lck_mtx_t lock used to protect the gate.
2957 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2958 */
2959 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)2960 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2961 {
2962 (void) lock;
2963 gate_init(gate, GT_INIT_DEFAULT);
2964 }
2965
2966 /*
2967 * Name: lck_mtx_gate_alloc_init
2968 *
2969 * Description: allocates and initializes a gate_t.
2970 *
2971 * Args:
2972 * Arg1: lck_mtx_t lock used to protect the gate.
2973 *
2974 * Returns:
2975 * gate_t allocated.
2976 */
2977 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)2978 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
2979 {
2980 (void) lock;
2981 return gate_alloc_init();
2982 }
2983
2984 /*
2985 * Name: lck_mtx_gate_destroy
2986 *
2987 * Description: destroys a variable previously initialized
2988 * with lck_mtx_gate_init().
2989 *
2990 * Args:
2991 * Arg1: lck_mtx_t lock used to protect the gate.
2992 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2993 */
2994 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)2995 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
2996 {
2997 (void) lock;
2998 gate_destroy(gate);
2999 }
3000
3001 /*
3002 * Name: lck_mtx_gate_free
3003 *
3004 * Description: destroys and tries to free a gate previously allocated
3005 * with lck_mtx_gate_alloc_init().
3006 * The gate free might be delegated to the last thread returning
3007 * from the gate_wait().
3008 *
3009 * Args:
3010 * Arg1: lck_mtx_t lock used to protect the gate.
3011 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3012 */
3013 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3014 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3015 {
3016 (void) lock;
3017 gate_free(gate);
3018 }
3019
3020 /*
3021 * Name: lck_mtx_gate_try_close
3022 *
3023 * Description: Tries to close the gate.
3024 * In case of success the current thread will be set as
3025 * the holder of the gate.
3026 *
3027 * Args:
3028 * Arg1: lck_mtx_t lock used to protect the gate.
3029 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3030 *
3031 * Conditions: Lock must be held. Returns with the lock held.
3032 *
3033 * Returns:
3034 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3035 * of the gate.
3036 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3037 * to wake up possible waiters on the gate before returning to userspace.
3038 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3039 * between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3040 *
3041 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3042 * lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3043 * The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3044 * be done without dropping the lock that is protecting the gate in between.
3045 */
3046 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3047 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3048 {
3049 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3050
3051 return gate_try_close(gate);
3052 }
3053
3054 /*
3055 * Name: lck_mtx_gate_close
3056 *
3057 * Description: Closes the gate. The current thread will be set as
3058 * the holder of the gate. Will panic if the gate is already closed.
3059 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3060 * to wake up possible waiters on the gate before returning to userspace.
3061 *
3062 * Args:
3063 * Arg1: lck_mtx_t lock used to protect the gate.
3064 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3065 *
3066 * Conditions: Lock must be held. Returns with the lock held.
3067 * The gate must be open.
3068 *
3069 */
3070 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3071 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3072 {
3073 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3074
3075 return gate_close(gate);
3076 }
3077
3078 /*
3079 * Name: lck_mtx_gate_open
3080 *
3081 * Description: Opens of the gate and wakes up possible waiters.
3082 *
3083 * Args:
3084 * Arg1: lck_mtx_t lock used to protect the gate.
3085 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3086 *
3087 * Conditions: Lock must be held. Returns with the lock held.
3088 * The current thread must be the holder of the gate.
3089 *
3090 */
3091 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3092 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3093 {
3094 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3095
3096 gate_open(gate);
3097 }
3098
3099 /*
3100 * Name: lck_mtx_gate_handoff
3101 *
3102 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3103 * priority will be selected as the new holder of the gate, and woken up,
3104 * with the gate remaining in the closed state throughout.
3105 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3106 * will be returned.
3107 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3108 * case no waiters were found.
3109 *
3110 *
3111 * Args:
3112 * Arg1: lck_mtx_t lock used to protect the gate.
3113 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3114 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3115 *
3116 * Conditions: Lock must be held. Returns with the lock held.
3117 * The current thread must be the holder of the gate.
3118 *
3119 * Returns:
3120 * KERN_SUCCESS in case one of the waiters became the new holder.
3121 * KERN_NOT_WAITING in case there were no waiters.
3122 *
3123 */
3124 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3125 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3126 {
3127 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3128
3129 return gate_handoff(gate, flags);
3130 }
3131
3132 /*
3133 * Name: lck_mtx_gate_steal
3134 *
3135 * Description: Steals the ownership of the gate. It sets the current thread as the
3136 * new holder of the gate.
3137 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3138 * to wake up possible waiters on the gate before returning to userspace.
3139 * NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3140 * anymore.
3141 *
3142 *
3143 * Args:
3144 * Arg1: lck_mtx_t lock used to protect the gate.
3145 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3146 *
3147 * Conditions: Lock must be held. Returns with the lock held.
3148 * The gate must be closed and the current thread must not already be the holder.
3149 *
3150 */
3151 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3152 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3153 {
3154 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3155
3156 gate_steal(gate);
3157 }
3158
3159 /*
3160 * Name: lck_mtx_gate_wait
3161 *
3162 * Description: Waits for the current thread to become the holder of the gate or for the
3163 * gate to become open. An interruptible mode and deadline can be specified
3164 * to return earlier from the wait.
3165 *
3166 * Args:
3167 * Arg1: lck_mtx_t lock used to protect the gate.
3168 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3169 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3170 * Arg3: interruptible flag for wait.
3171 * Arg4: deadline
3172 *
3173 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3174 * Lock will be dropped while waiting.
3175 * The gate must be closed.
3176 *
3177 * Returns: Reason why the thread was woken up.
3178 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3179 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3180 * to wake up possible waiters on the gate before returning to userspace.
3181 * GATE_OPENED - the gate was opened by the holder.
3182 * GATE_TIMED_OUT - the thread was woken up by a timeout.
3183 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
3184 */
3185 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3186 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3187 {
3188 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3189
3190 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3191 return gate_wait(gate,
3192 interruptible,
3193 deadline,
3194 ^{lck_mtx_unlock(lock);},
3195 ^{;});
3196 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3197 return gate_wait(gate,
3198 interruptible,
3199 deadline,
3200 ^{lck_mtx_unlock(lock);},
3201 ^{lck_mtx_lock_spin(lock);});
3202 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3203 return gate_wait(gate,
3204 interruptible,
3205 deadline,
3206 ^{lck_mtx_unlock(lock);},
3207 ^{lck_mtx_lock_spin_always(lock);});
3208 } else {
3209 return gate_wait(gate,
3210 interruptible,
3211 deadline,
3212 ^{lck_mtx_unlock(lock);},
3213 ^{lck_mtx_lock(lock);});
3214 }
3215 }
3216
3217 /*
3218 * Name: lck_mtx_gate_assert
3219 *
3220 * Description: asserts that the gate is in the specified state.
3221 *
3222 * Args:
3223 * Arg1: lck_mtx_t lock used to protect the gate.
3224 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3225 * Arg3: flags to specified assert type.
3226 * GATE_ASSERT_CLOSED - the gate is currently closed
3227 * GATE_ASSERT_OPEN - the gate is currently opened
3228 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3229 */
3230 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3231 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3232 {
3233 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3234
3235 gate_assert(gate, flags);
3236 }
3237
3238 #pragma mark - LCK_*_DECLARE support
3239
3240 __startup_func
3241 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3242 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3243 {
3244 lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3245 }
3246
3247 __startup_func
3248 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3249 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3250 {
3251 lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3252 }
3253
3254 __startup_func
3255 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3256 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3257 {
3258 lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3259 }
3260
3261 __startup_func
3262 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3263 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3264 {
3265 simple_lock_init(sp->lck, sp->lck_init_arg);
3266 }
3267
3268 __startup_func
3269 void
lck_ticket_startup_init(struct lck_ticket_startup_spec * sp)3270 lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3271 {
3272 lck_ticket_init(sp->lck, sp->lck_grp);
3273 }
3274