1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #define LOCK_PRIVATE 1
58
59 #include <mach_ldebug.h>
60 #include <debug.h>
61
62 #include <mach/kern_return.h>
63
64 #include <kern/locks_internal.h>
65 #include <kern/lock_stat.h>
66 #include <kern/locks.h>
67 #include <kern/misc_protos.h>
68 #include <kern/zalloc.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/sched_prim.h>
72 #include <kern/debug.h>
73 #include <libkern/section_keywords.h>
74 #if defined(__x86_64__)
75 #include <i386/tsc.h>
76 #include <i386/machine_routines.h>
77 #endif
78 #include <machine/atomic.h>
79 #include <machine/machine_cpu.h>
80 #include <string.h>
81 #include <vm/pmap.h>
82
83 #include <sys/kdebug.h>
84
85 #define LCK_MTX_SLEEP_CODE 0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
87 #define LCK_MTX_LCK_WAIT_CODE 2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
89
90 // Panic in tests that check lock usage correctness
91 // These are undesirable when in a panic or a debugger is runnning.
92 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93
94 #if MACH_LDEBUG
95 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96 #else
97 #define ALIGN_TEST(p, t) do{}while(0)
98 #endif
99
100 #define NOINLINE __attribute__((noinline))
101
102 #define ordered_load_hw(lock) os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103 #define ordered_store_hw(lock, value) os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104
105 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106
107 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109
110 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111
112 struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113
114 #if CONFIG_PV_TICKET
115 SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; /* used by waitq.py */
116 #endif
117
118 #if DEBUG
119 TUNABLE(uint32_t, LcksOpts, "lcks", LCK_OPTION_ENABLE_DEBUG);
120 #else
121 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
122 #endif
123
124 #if CONFIG_DTRACE
125 #if defined (__x86_64__)
126 machine_timeout_t dtrace_spin_threshold = 500; // 500ns
127 #elif defined(__arm64__)
128 MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129 0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130 #endif
131 #endif
132
133 struct lck_mcs PERCPU_DATA(lck_mcs);
134
135 __kdebug_only
136 uintptr_t
unslide_for_kdebug(const void * object)137 unslide_for_kdebug(const void* object)
138 {
139 if (__improbable(kdebug_enable)) {
140 return VM_KERNEL_UNSLIDE_OR_PERM(object);
141 } else {
142 return 0;
143 }
144 }
145
146 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)147 __lck_require_preemption_disabled_panic(void *lock)
148 {
149 panic("Attempt to take no-preempt lock %p in preemptible context", lock);
150 }
151
152 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)153 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
154 {
155 if (__improbable(!lock_preemption_disabled_for_thread(self))) {
156 __lck_require_preemption_disabled_panic(lock);
157 }
158 }
159
160 #pragma mark - HW Spin policies
161
162 /*
163 * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
164 */
165 __attribute__((always_inline))
166 hw_spin_timeout_t
hw_spin_compute_timeout(hw_spin_policy_t pol)167 hw_spin_compute_timeout(hw_spin_policy_t pol)
168 {
169 hw_spin_timeout_t ret = {
170 .hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
171 };
172
173 ret.hwst_timeout <<= pol->hwsp_timeout_shift;
174 #if SCHED_HYGIENE_DEBUG
175 ret.hwst_in_ppl = pmap_in_ppl();
176 /* Note we can't check if we are interruptible if in ppl */
177 ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
178 #endif /* SCHED_HYGIENE_DEBUG */
179
180 #if SCHED_HYGIENE_DEBUG
181 #ifndef KASAN
182 if (ret.hwst_timeout > 0 &&
183 !ret.hwst_in_ppl &&
184 !ret.hwst_interruptible &&
185 interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
186 uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
187
188 #if defined(__x86_64__)
189 int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
190 #endif
191 if (int_timeout < ret.hwst_timeout) {
192 ret.hwst_timeout = int_timeout;
193 }
194 }
195 #endif /* !KASAN */
196 #endif /* SCHED_HYGIENE_DEBUG */
197
198 return ret;
199 }
200
201 __attribute__((always_inline))
202 bool
hw_spin_in_ppl(hw_spin_timeout_t to)203 hw_spin_in_ppl(hw_spin_timeout_t to)
204 {
205 #if SCHED_HYGIENE_DEBUG
206 return to.hwst_in_ppl;
207 #else
208 (void)to;
209 return pmap_in_ppl();
210 #endif
211 }
212
213 bool
hw_spin_should_keep_spinning(void * lock,hw_spin_policy_t pol,hw_spin_timeout_t to,hw_spin_state_t * state)214 hw_spin_should_keep_spinning(
215 void *lock,
216 hw_spin_policy_t pol,
217 hw_spin_timeout_t to,
218 hw_spin_state_t *state)
219 {
220 hw_spin_timeout_status_t rc;
221 #if SCHED_HYGIENE_DEBUG
222 uint64_t irq_time = 0;
223 #endif
224 uint64_t now;
225
226 if (__improbable(to.hwst_timeout == 0)) {
227 return true;
228 }
229
230 now = ml_get_timebase();
231 if (__probable(now < state->hwss_deadline)) {
232 /* keep spinning */
233 return true;
234 }
235
236 #if SCHED_HYGIENE_DEBUG
237 if (to.hwst_interruptible) {
238 irq_time = current_thread()->machine.int_time_mt;
239 }
240 #endif /* SCHED_HYGIENE_DEBUG */
241
242 if (__probable(state->hwss_deadline == 0)) {
243 state->hwss_start = now;
244 state->hwss_deadline = now + to.hwst_timeout;
245 #if SCHED_HYGIENE_DEBUG
246 state->hwss_irq_start = irq_time;
247 #endif
248 return true;
249 }
250
251 /*
252 * Update fields that the callback needs
253 */
254 state->hwss_now = now;
255 #if SCHED_HYGIENE_DEBUG
256 state->hwss_irq_end = irq_time;
257 #endif /* SCHED_HYGIENE_DEBUG */
258
259 rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
260 to, *state);
261 if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
262 /* push the deadline */
263 state->hwss_deadline += to.hwst_timeout;
264 }
265 return rc == HW_LOCK_TIMEOUT_CONTINUE;
266 }
267
268 __attribute__((always_inline))
269 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)270 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
271 {
272 #if DEBUG || DEVELOPMENT
273 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
274 #else
275 (void)owner;
276 #endif
277 }
278
279 __attribute__((always_inline))
280 void
lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)281 lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
282 {
283 #if DEBUG || DEVELOPMENT
284 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
285 (uintptr_t)ctid_get_thread_unsafe(ctid);
286 #else
287 (void)ctid;
288 #endif
289 }
290
291 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)292 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
293 {
294 lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
295
296 if (owner < (1u << CTID_SIZE_BIT)) {
297 owner = (uintptr_t)ctid_get_thread_unsafe((uint32_t)owner);
298 } else {
299 /* strip possible bits used by the lock implementations */
300 owner &= ~0x7ul;
301 }
302
303 lsti->lock = lck;
304 lsti->owner_thread_cur = owner;
305 lsti->owner_cpu = ~0u;
306 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
307
308 if (owner == 0) {
309 /* if the owner isn't known, just bail */
310 goto out;
311 }
312
313 for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
314 cpu_data_t *data = cpu_datap(i);
315 if (data && (uintptr_t)data->cpu_active_thread == owner) {
316 lsti->owner_cpu = i;
317 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
318 #if __x86_64__
319 if ((uint32_t)cpu_number() != i) {
320 /* Cause NMI and panic on the owner's cpu */
321 NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
322 }
323 #endif
324 break;
325 }
326 }
327
328 out:
329 return lsti;
330 }
331
332 #pragma mark - HW locks
333
334 /*
335 * Routine: hw_lock_init
336 *
337 * Initialize a hardware lock.
338 */
339 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)340 hw_lock_init(hw_lock_t lock)
341 {
342 ordered_store_hw(lock, 0);
343 }
344
345 __result_use_check
346 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)347 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
348 {
349 #if OS_ATOMIC_USE_LLSC
350 uintptr_t oldval;
351 os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
352 if (oldval != 0) {
353 wait_for_event(); // clears the monitor so we don't need give_up()
354 return false;
355 }
356 });
357 return true;
358 #else // !OS_ATOMIC_USE_LLSC
359 #if OS_ATOMIC_HAS_LLSC
360 uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
361 if (oldval != 0) {
362 wait_for_event(); // clears the monitor so we don't need give_up()
363 return false;
364 }
365 #endif
366 return lock_cmpxchg(&lock->lock_data, 0, newval, acquire);
367 #endif // !OS_ATOMIC_USE_LLSC
368 }
369
370 __result_use_check
371 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)372 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
373 {
374 uint32_t mask = 1u << bit;
375
376 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
377 uint32_t oldval, newval;
378 os_atomic_rmw_loop(target, oldval, newval, acquire, {
379 newval = oldval | mask;
380 if (__improbable(oldval & mask)) {
381 #if OS_ATOMIC_HAS_LLSC
382 if (wait) {
383 wait_for_event(); // clears the monitor so we don't need give_up()
384 } else {
385 os_atomic_clear_exclusive();
386 }
387 #else
388 if (wait) {
389 cpu_pause();
390 }
391 #endif
392 return false;
393 }
394 });
395 return true;
396 #else
397 uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
398 if (__improbable(oldval & mask)) {
399 if (wait) {
400 wait_for_event(); // clears the monitor so we don't need give_up()
401 } else {
402 os_atomic_clear_exclusive();
403 }
404 return false;
405 }
406 return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
407 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
408 }
409
410 static hw_spin_timeout_status_t
hw_spin_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)411 hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
412 {
413 hw_lock_t lock = _lock;
414 uintptr_t owner = lock->lock_data & ~0x7ul;
415 lck_spinlock_to_info_t lsti;
416
417 if (!spinlock_timeout_panic) {
418 /* keep spinning rather than panicing */
419 return HW_LOCK_TIMEOUT_CONTINUE;
420 }
421
422 if (pmap_in_ppl()) {
423 /*
424 * This code is used by the PPL and can't write to globals.
425 */
426 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
427 "current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
428 lock, HW_SPIN_TIMEOUT_ARG(to, st),
429 (void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
430 }
431
432 // Capture the actual time spent blocked, which may be higher than the timeout
433 // if a misbehaving interrupt stole this thread's CPU time.
434 lsti = lck_spinlock_timeout_hit(lock, owner);
435 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
436 "current owner: %p (on cpu %d), "
437 #if DEBUG || DEVELOPMENT
438 "initial owner: %p, "
439 #endif /* DEBUG || DEVELOPMENT */
440 HW_SPIN_TIMEOUT_DETAILS_FMT,
441 lock, HW_SPIN_TIMEOUT_ARG(to, st),
442 (void *)lsti->owner_thread_cur, lsti->owner_cpu,
443 #if DEBUG || DEVELOPMENT
444 (void *)lsti->owner_thread_orig,
445 #endif /* DEBUG || DEVELOPMENT */
446 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
447 }
448
449 const struct hw_spin_policy hw_lock_spin_policy = {
450 .hwsp_name = "hw_lock_t",
451 .hwsp_timeout_atomic = &lock_panic_timeout,
452 .hwsp_op_timeout = hw_spin_timeout_panic,
453 };
454
455 static hw_spin_timeout_status_t
hw_spin_always_return(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)456 hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
457 {
458 #pragma unused(_lock, to, st)
459 return HW_LOCK_TIMEOUT_RETURN;
460 }
461
462 const struct hw_spin_policy hw_lock_spin_panic_policy = {
463 .hwsp_name = "hw_lock_t[panic]",
464 #if defined(__x86_64__)
465 .hwsp_timeout = &LockTimeOutTSC,
466 #else
467 .hwsp_timeout_atomic = &LockTimeOut,
468 #endif
469 .hwsp_timeout_shift = 2,
470 .hwsp_op_timeout = hw_spin_always_return,
471 };
472
473 #if DEBUG || DEVELOPMENT
474 static machine_timeout_t hw_lock_test_to;
475 const struct hw_spin_policy hw_lock_test_give_up_policy = {
476 .hwsp_name = "testing policy",
477 #if defined(__x86_64__)
478 .hwsp_timeout = &LockTimeOutTSC,
479 #else
480 .hwsp_timeout_atomic = &LockTimeOut,
481 #endif
482 .hwsp_timeout_shift = 2,
483 .hwsp_op_timeout = hw_spin_always_return,
484 };
485
486 __startup_func
487 static void
hw_lock_test_to_init(void)488 hw_lock_test_to_init(void)
489 {
490 uint64_t timeout;
491
492 nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &timeout);
493 #if defined(__x86_64__)
494 timeout = tmrCvt(timeout, tscFCvtn2t);
495 #endif
496 os_atomic_init(&hw_lock_test_to, timeout);
497 }
498 STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
499 #endif
500
501 static hw_spin_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)502 hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
503 {
504 hw_lock_bit_t *lock = _lock;
505
506 if (!spinlock_timeout_panic) {
507 /* keep spinning rather than panicing */
508 return HW_LOCK_TIMEOUT_CONTINUE;
509 }
510
511 panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
512 "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
513 lock, HW_SPIN_TIMEOUT_ARG(to, st),
514 *lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
515 }
516
517 static const struct hw_spin_policy hw_lock_bit_policy = {
518 .hwsp_name = "hw_lock_bit_t",
519 .hwsp_timeout_atomic = &lock_panic_timeout,
520 .hwsp_op_timeout = hw_lock_bit_timeout_panic,
521 };
522
523 #if __arm64__
524 const uint64_t hw_lock_bit_timeout_2s = 0x3000000;
525 const struct hw_spin_policy hw_lock_bit_policy_2s = {
526 .hwsp_name = "hw_lock_bit_t",
527 .hwsp_timeout = &hw_lock_bit_timeout_2s,
528 .hwsp_op_timeout = hw_lock_bit_timeout_panic,
529 };
530 #endif
531
532 /*
533 * Routine: hw_lock_lock_contended
534 *
535 * Spin until lock is acquired or timeout expires.
536 * timeout is in mach_absolute_time ticks. Called with
537 * preemption disabled.
538 */
539 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,uintptr_t data,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))540 hw_lock_lock_contended(
541 hw_lock_t lock,
542 uintptr_t data,
543 hw_spin_policy_t pol
544 LCK_GRP_ARG(lck_grp_t *grp))
545 {
546 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
547 hw_spin_state_t state = { };
548 hw_lock_status_t rc = HW_LOCK_CONTENDED;
549
550 if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
551 HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
552 panic("hwlock: thread %p is trying to lock %p recursively",
553 HW_LOCK_STATE_TO_THREAD(data), lock);
554 }
555
556 #if CONFIG_DTRACE || LOCK_STATS
557 uint64_t begin = 0;
558 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
559
560 if (__improbable(stat_enabled)) {
561 begin = mach_absolute_time();
562 }
563 #endif /* CONFIG_DTRACE || LOCK_STATS */
564
565 if (!hw_spin_in_ppl(to)) {
566 /*
567 * This code is used by the PPL and can't write to globals.
568 */
569 lck_spinlock_timeout_set_orig_owner(lock->lock_data);
570 }
571
572 do {
573 for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
574 cpu_pause();
575 if (hw_lock_trylock_contended(lock, data)) {
576 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
577 rc = HW_LOCK_ACQUIRED;
578 goto end;
579 }
580 }
581 } while (hw_spin_should_keep_spinning(lock, pol, to, &state));
582
583 end:
584 #if CONFIG_DTRACE || LOCK_STATS
585 if (__improbable(stat_enabled)) {
586 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
587 mach_absolute_time() - begin);
588 }
589 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
590 #endif /* CONFIG_DTRACE || LOCK_STATS */
591 return rc;
592 }
593
594 static hw_spin_timeout_status_t
hw_wait_while_equals32_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)595 hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
596 {
597 uint32_t *address = _lock;
598
599 if (!spinlock_timeout_panic) {
600 /* keep spinning rather than panicing */
601 return HW_LOCK_TIMEOUT_CONTINUE;
602 }
603
604 panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
605 "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
606 address, HW_SPIN_TIMEOUT_ARG(to, st),
607 *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
608 }
609
610 static const struct hw_spin_policy hw_wait_while_equals32_policy = {
611 .hwsp_name = "hw_wait_while_equals32",
612 .hwsp_timeout_atomic = &lock_panic_timeout,
613 .hwsp_op_timeout = hw_wait_while_equals32_panic,
614 };
615
616 static hw_spin_timeout_status_t
hw_wait_while_equals64_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)617 hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
618 {
619 uint64_t *address = _lock;
620
621 if (!spinlock_timeout_panic) {
622 /* keep spinning rather than panicing */
623 return HW_LOCK_TIMEOUT_CONTINUE;
624 }
625
626 panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
627 "current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
628 address, HW_SPIN_TIMEOUT_ARG(to, st),
629 *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
630 }
631
632 static const struct hw_spin_policy hw_wait_while_equals64_policy = {
633 .hwsp_name = "hw_wait_while_equals64",
634 .hwsp_timeout_atomic = &lock_panic_timeout,
635 .hwsp_op_timeout = hw_wait_while_equals64_panic,
636 };
637
638 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)639 hw_wait_while_equals32(uint32_t *address, uint32_t current)
640 {
641 hw_spin_policy_t pol = &hw_wait_while_equals32_policy;
642 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
643 hw_spin_state_t state = { };
644 uint32_t v;
645
646 while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
647 hw_spin_should_keep_spinning(address, pol, to, &state);
648 }
649
650 return v;
651 }
652
653 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)654 hw_wait_while_equals64(uint64_t *address, uint64_t current)
655 {
656 hw_spin_policy_t pol = &hw_wait_while_equals64_policy;
657 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
658 hw_spin_state_t state = { };
659 uint64_t v;
660
661 while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
662 hw_spin_should_keep_spinning(address, pol, to, &state);
663 }
664
665 return v;
666 }
667
668 __result_use_check
669 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))670 hw_lock_to_internal(
671 hw_lock_t lock,
672 thread_t thread,
673 hw_spin_policy_t pol
674 LCK_GRP_ARG(lck_grp_t *grp))
675 {
676 uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
677
678 if (__probable(hw_lock_trylock_contended(lock, state))) {
679 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
680 return HW_LOCK_ACQUIRED;
681 }
682
683 return hw_lock_lock_contended(lock, state, pol LCK_GRP_ARG(grp));
684 }
685
686 /*
687 * Routine: hw_lock_lock
688 *
689 * Acquire lock, spinning until it becomes available,
690 * return with preemption disabled.
691 */
692 void
693 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
694 {
695 thread_t thread = current_thread();
696 lock_disable_preemption_for_thread(thread);
697 (void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
698 LCK_GRP_ARG(grp));
699 }
700
701 /*
702 * Routine: hw_lock_lock_nopreempt
703 *
704 * Acquire lock, spinning until it becomes available.
705 */
706 void
707 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
708 {
709 thread_t thread = current_thread();
710 __lck_require_preemption_disabled(lock, thread);
711 (void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
712 LCK_GRP_ARG(grp));
713 }
714
715 /*
716 * Routine: hw_lock_to
717 *
718 * Acquire lock, spinning until it becomes available or timeout.
719 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
720 * preemption disabled.
721 */
722 unsigned
723 int
724 (hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
725 {
726 thread_t thread = current_thread();
727 lock_disable_preemption_for_thread(thread);
728 return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
729 }
730
731 /*
732 * Routine: hw_lock_to_nopreempt
733 *
734 * Acquire lock, spinning until it becomes available or timeout.
735 * Timeout is in mach_absolute_time ticks, called and return with
736 * preemption disabled.
737 */
738 unsigned
739 int
740 (hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
741 {
742 thread_t thread = current_thread();
743 __lck_require_preemption_disabled(lock, thread);
744 return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
745 }
746
747 __result_use_check
748 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))749 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
750 {
751 if (__probable(lock_cmpxchg(&lock->lock_data, 0,
752 HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
753 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
754 return true;
755 }
756 return false;
757 }
758
759 /*
760 * Routine: hw_lock_try
761 *
762 * returns with preemption disabled on success.
763 */
764 unsigned
765 int
766 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
767 {
768 thread_t thread = current_thread();
769 lock_disable_preemption_for_thread(thread);
770 unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
771 if (!success) {
772 lock_enable_preemption();
773 }
774 return success;
775 }
776
777 unsigned
778 int
779 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
780 {
781 thread_t thread = current_thread();
782 __lck_require_preemption_disabled(lock, thread);
783 return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
784 }
785
786 #if DEBUG || DEVELOPMENT
787 __abortlike
788 static void
__hw_lock_unlock_unowned_panic(hw_lock_t lock)789 __hw_lock_unlock_unowned_panic(hw_lock_t lock)
790 {
791 panic("hwlock: thread %p is trying to lock %p recursively",
792 current_thread(), lock);
793 }
794 #endif /* DEBUG || DEVELOPMENT */
795
796 /*
797 * Routine: hw_lock_unlock
798 *
799 * Unconditionally release lock, release preemption level.
800 */
801 static inline void
hw_lock_unlock_internal(hw_lock_t lock)802 hw_lock_unlock_internal(hw_lock_t lock)
803 {
804 #if DEBUG || DEVELOPMENT
805 if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
806 LOCK_CORRECTNESS_PANIC()) {
807 __hw_lock_unlock_unowned_panic(lock);
808 }
809 #endif /* DEBUG || DEVELOPMENT */
810
811 os_atomic_store(&lock->lock_data, 0, release);
812 #if CONFIG_DTRACE
813 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
814 #endif /* CONFIG_DTRACE */
815 }
816
817 void
818 (hw_lock_unlock)(hw_lock_t lock)
819 {
820 hw_lock_unlock_internal(lock);
821 lock_enable_preemption();
822 }
823
824 void
825 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
826 {
827 hw_lock_unlock_internal(lock);
828 }
829
830 void
hw_lock_assert(__assert_only hw_lock_t lock,__assert_only unsigned int type)831 hw_lock_assert(__assert_only hw_lock_t lock, __assert_only unsigned int type)
832 {
833 #if MACH_ASSERT
834 thread_t thread, holder;
835
836 holder = HW_LOCK_STATE_TO_THREAD(lock->lock_data);
837 thread = current_thread();
838
839 if (type == LCK_ASSERT_OWNED) {
840 if (holder == 0) {
841 panic("Lock not owned %p = %p", lock, holder);
842 }
843 if (holder != thread) {
844 panic("Lock not owned by current thread %p = %p", lock, holder);
845 }
846 } else if (type == LCK_ASSERT_NOTOWNED) {
847 if (holder != THREAD_NULL && holder == thread) {
848 panic("Lock owned by current thread %p = %p", lock, holder);
849 }
850 } else {
851 panic("hw_lock_assert(): invalid arg (%u)", type);
852 }
853 #endif /* MACH_ASSERT */
854 }
855
856 /*
857 * Routine hw_lock_held, doesn't change preemption state.
858 * N.B. Racy, of course.
859 */
860 unsigned int
hw_lock_held(hw_lock_t lock)861 hw_lock_held(hw_lock_t lock)
862 {
863 return ordered_load_hw(lock) != 0;
864 }
865
866 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))867 hw_lock_bit_to_contended(
868 hw_lock_bit_t *lock,
869 uint32_t bit,
870 hw_spin_policy_t pol
871 LCK_GRP_ARG(lck_grp_t *grp))
872 {
873 hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
874 hw_spin_state_t state = { };
875 hw_lock_status_t rc = HW_LOCK_CONTENDED;
876
877 #if CONFIG_DTRACE || LOCK_STATS
878 uint64_t begin = 0;
879 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
880
881 if (__improbable(stat_enabled)) {
882 begin = mach_absolute_time();
883 }
884 #endif /* LOCK_STATS || CONFIG_DTRACE */
885
886 do {
887 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
888 rc = hw_lock_trylock_bit(lock, bit, true);
889
890 if (rc == HW_LOCK_ACQUIRED) {
891 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
892 goto end;
893 }
894 }
895
896 assert(rc == HW_LOCK_CONTENDED);
897 } while (hw_spin_should_keep_spinning(lock, pol, to, &state));
898
899 end:
900 #if CONFIG_DTRACE || LOCK_STATS
901 if (__improbable(stat_enabled)) {
902 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
903 mach_absolute_time() - begin);
904 }
905 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
906 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
907 return rc;
908 }
909
910 __result_use_check
911 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))912 hw_lock_bit_to_internal(
913 hw_lock_bit_t *lock,
914 unsigned int bit,
915 hw_spin_policy_t pol
916 LCK_GRP_ARG(lck_grp_t *grp))
917 {
918 if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
919 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
920 return HW_LOCK_ACQUIRED;
921 }
922
923 return (unsigned)hw_lock_bit_to_contended(lock, bit, pol LCK_GRP_ARG(grp));
924 }
925
926 /*
927 * Routine: hw_lock_bit_to
928 *
929 * Acquire bit lock, spinning until it becomes available or timeout.
930 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
931 * preemption disabled.
932 */
933 unsigned
934 int
935 (hw_lock_bit_to)(
936 hw_lock_bit_t * lock,
937 uint32_t bit,
938 hw_spin_policy_t pol
939 LCK_GRP_ARG(lck_grp_t *grp))
940 {
941 _disable_preemption();
942 return hw_lock_bit_to_internal(lock, bit, pol LCK_GRP_ARG(grp));
943 }
944
945 /*
946 * Routine: hw_lock_bit
947 *
948 * Acquire bit lock, spinning until it becomes available,
949 * return with preemption disabled.
950 */
951 void
952 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
953 {
954 _disable_preemption();
955 (void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
956 }
957
958 /*
959 * Routine: hw_lock_bit_nopreempt
960 *
961 * Acquire bit lock, spinning until it becomes available.
962 */
963 void
964 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
965 {
966 __lck_require_preemption_disabled(lock, current_thread());
967 (void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
968 }
969
970
971 unsigned
972 int
973 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
974 {
975 boolean_t success = false;
976
977 _disable_preemption();
978 success = hw_lock_trylock_bit(lock, bit, false);
979 if (!success) {
980 lock_enable_preemption();
981 }
982
983 if (success) {
984 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
985 }
986
987 return success;
988 }
989
990 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)991 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
992 {
993 os_atomic_andnot(lock, 1u << bit, release);
994 #if CONFIG_DTRACE
995 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
996 #endif
997 }
998
999 /*
1000 * Routine: hw_unlock_bit
1001 *
1002 * Release spin-lock. The second parameter is the bit number to test and set.
1003 * Decrement the preemption level.
1004 */
1005 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)1006 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1007 {
1008 hw_unlock_bit_internal(lock, bit);
1009 lock_enable_preemption();
1010 }
1011
1012 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)1013 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1014 {
1015 __lck_require_preemption_disabled(lock, current_thread());
1016 hw_unlock_bit_internal(lock, bit);
1017 }
1018
1019
1020 #pragma mark - lck_*_sleep
1021
1022 /*
1023 * Routine: lck_spin_sleep
1024 */
1025 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1026 lck_spin_sleep_grp(
1027 lck_spin_t *lck,
1028 lck_sleep_action_t lck_sleep_action,
1029 event_t event,
1030 wait_interrupt_t interruptible,
1031 lck_grp_t *grp)
1032 {
1033 wait_result_t res;
1034
1035 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1036 panic("Invalid lock sleep action %x", lck_sleep_action);
1037 }
1038
1039 res = assert_wait(event, interruptible);
1040 if (res == THREAD_WAITING) {
1041 lck_spin_unlock(lck);
1042 res = thread_block(THREAD_CONTINUE_NULL);
1043 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1044 lck_spin_lock_grp(lck, grp);
1045 }
1046 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1047 lck_spin_unlock(lck);
1048 }
1049
1050 return res;
1051 }
1052
1053 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1054 lck_spin_sleep(
1055 lck_spin_t *lck,
1056 lck_sleep_action_t lck_sleep_action,
1057 event_t event,
1058 wait_interrupt_t interruptible)
1059 {
1060 return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1061 }
1062
1063 /*
1064 * Routine: lck_spin_sleep_deadline
1065 */
1066 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1067 lck_spin_sleep_deadline(
1068 lck_spin_t *lck,
1069 lck_sleep_action_t lck_sleep_action,
1070 event_t event,
1071 wait_interrupt_t interruptible,
1072 uint64_t deadline)
1073 {
1074 wait_result_t res;
1075
1076 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1077 panic("Invalid lock sleep action %x", lck_sleep_action);
1078 }
1079
1080 res = assert_wait_deadline(event, interruptible, deadline);
1081 if (res == THREAD_WAITING) {
1082 lck_spin_unlock(lck);
1083 res = thread_block(THREAD_CONTINUE_NULL);
1084 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1085 lck_spin_lock(lck);
1086 }
1087 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1088 lck_spin_unlock(lck);
1089 }
1090
1091 return res;
1092 }
1093
1094 /*
1095 * Routine: lck_mtx_sleep
1096 */
1097 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1098 lck_mtx_sleep(
1099 lck_mtx_t *lck,
1100 lck_sleep_action_t lck_sleep_action,
1101 event_t event,
1102 wait_interrupt_t interruptible)
1103 {
1104 wait_result_t res;
1105 thread_pri_floor_t token;
1106
1107 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1108 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1109
1110 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1111 panic("Invalid lock sleep action %x", lck_sleep_action);
1112 }
1113
1114 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1115 /*
1116 * We get a priority floor
1117 * during the time that this thread is asleep, so that when it
1118 * is re-awakened (and not yet contending on the mutex), it is
1119 * runnable at a reasonably high priority.
1120 */
1121 token = thread_priority_floor_start();
1122 }
1123
1124 res = assert_wait(event, interruptible);
1125 if (res == THREAD_WAITING) {
1126 lck_mtx_unlock(lck);
1127 res = thread_block(THREAD_CONTINUE_NULL);
1128 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1129 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1130 lck_mtx_lock_spin(lck);
1131 } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1132 lck_mtx_lock_spin_always(lck);
1133 } else {
1134 lck_mtx_lock(lck);
1135 }
1136 }
1137 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1138 lck_mtx_unlock(lck);
1139 }
1140
1141 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1142 thread_priority_floor_end(&token);
1143 }
1144
1145 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1146
1147 return res;
1148 }
1149
1150
1151 /*
1152 * Routine: lck_mtx_sleep_deadline
1153 */
1154 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1155 lck_mtx_sleep_deadline(
1156 lck_mtx_t *lck,
1157 lck_sleep_action_t lck_sleep_action,
1158 event_t event,
1159 wait_interrupt_t interruptible,
1160 uint64_t deadline)
1161 {
1162 wait_result_t res;
1163 thread_pri_floor_t token;
1164
1165 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1166 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1167
1168 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1169 panic("Invalid lock sleep action %x", lck_sleep_action);
1170 }
1171
1172 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1173 /*
1174 * See lck_mtx_sleep().
1175 */
1176 token = thread_priority_floor_start();
1177 }
1178
1179 res = assert_wait_deadline(event, interruptible, deadline);
1180 if (res == THREAD_WAITING) {
1181 lck_mtx_unlock(lck);
1182 res = thread_block(THREAD_CONTINUE_NULL);
1183 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1184 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1185 lck_mtx_lock_spin(lck);
1186 } else {
1187 lck_mtx_lock(lck);
1188 }
1189 }
1190 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1191 lck_mtx_unlock(lck);
1192 }
1193
1194 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1195 thread_priority_floor_end(&token);
1196 }
1197
1198 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1199
1200 return res;
1201 }
1202
1203 /*
1204 * sleep_with_inheritor and wakeup_with_inheritor KPI
1205 *
1206 * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1207 * the latest thread specified as inheritor.
1208 *
1209 * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1210 * direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1211 * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1212 *
1213 * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1214 *
1215 * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1216 * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1217 * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1218 * invoking any turnstile operation.
1219 *
1220 * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1221 * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1222 * is instantiated for this KPI to manage the hash without interrupt disabled.
1223 * Also:
1224 * - all events on the system that hash on the same bucket will contend on the same spinlock.
1225 * - every event will have a dedicated wait_queue.
1226 *
1227 * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1228 * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1229 */
1230
1231
1232 typedef enum {
1233 LCK_WAKEUP_THREAD,
1234 LCK_WAKEUP_ONE,
1235 LCK_WAKEUP_ALL
1236 } lck_wakeup_type_t;
1237
1238 static kern_return_t
wakeup_with_inheritor_and_turnstile(event_t event,wait_result_t result,lck_wakeup_type_t wake_type,lck_wake_action_t action,thread_t * thread_wokenup)1239 wakeup_with_inheritor_and_turnstile(
1240 event_t event,
1241 wait_result_t result,
1242 lck_wakeup_type_t wake_type,
1243 lck_wake_action_t action,
1244 thread_t *thread_wokenup)
1245 {
1246 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1247 uint32_t index;
1248 struct turnstile *ts = NULL;
1249 kern_return_t ret = KERN_NOT_WAITING;
1250
1251 /*
1252 * the hash bucket spinlock is used as turnstile interlock
1253 */
1254 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1255
1256 ts = turnstile_prepare_hash((uintptr_t)event, type);
1257
1258 switch (wake_type) {
1259 case LCK_WAKEUP_ONE: {
1260 waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1261
1262 if (action == LCK_WAKE_DEFAULT) {
1263 flags = WAITQ_UPDATE_INHERITOR;
1264 } else {
1265 assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1266 }
1267
1268 /*
1269 * WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1270 * if it finds a thread
1271 */
1272 if (thread_wokenup) {
1273 thread_t wokeup;
1274
1275 wokeup = waitq_wakeup64_identify(&ts->ts_waitq,
1276 CAST_EVENT64_T(event), result, flags);
1277 *thread_wokenup = wokeup;
1278 ret = wokeup ? KERN_SUCCESS : KERN_NOT_WAITING;
1279 } else {
1280 ret = waitq_wakeup64_one(&ts->ts_waitq,
1281 CAST_EVENT64_T(event), result, flags);
1282 }
1283 if (ret == KERN_SUCCESS && action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1284 goto complete;
1285 }
1286 if (ret == KERN_NOT_WAITING) {
1287 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
1288 TURNSTILE_IMMEDIATE_UPDATE);
1289 }
1290 break;
1291 }
1292 case LCK_WAKEUP_ALL: {
1293 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event),
1294 result, WAITQ_UPDATE_INHERITOR);
1295 break;
1296 }
1297 case LCK_WAKEUP_THREAD: {
1298 assert(thread_wokenup);
1299 ret = waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T(event),
1300 *thread_wokenup, result);
1301 break;
1302 }
1303 }
1304
1305 /*
1306 * turnstile_update_inheritor_complete could be called while holding the interlock.
1307 * In this case the new inheritor or is null, or is a thread that is just been woken up
1308 * and have not blocked because it is racing with the same interlock used here
1309 * after the wait.
1310 * So there is no chain to update for the new inheritor.
1311 *
1312 * However unless the current thread is the old inheritor,
1313 * old inheritor can be blocked and requires a chain update.
1314 *
1315 * The chain should be short because kernel turnstiles cannot have user turnstiles
1316 * chained after them.
1317 *
1318 * We can anyway optimize this by asking turnstile to tell us
1319 * if old inheritor needs an update and drop the lock
1320 * just in that case.
1321 */
1322 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1323
1324 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1325
1326 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1327
1328 complete:
1329 turnstile_complete_hash((uintptr_t)event, type);
1330
1331 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1332
1333 turnstile_cleanup();
1334
1335 return ret;
1336 }
1337
1338 static wait_result_t
1339 sleep_with_inheritor_and_turnstile(
1340 event_t event,
1341 thread_t inheritor,
1342 wait_interrupt_t interruptible,
1343 uint64_t deadline,
1344 void (^primitive_lock)(void),
1345 void (^primitive_unlock)(void))
1346 {
1347 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1348 wait_result_t ret;
1349 uint32_t index;
1350 struct turnstile *ts = NULL;
1351
1352 /*
1353 * the hash bucket spinlock is used as turnstile interlock,
1354 * lock it before releasing the primitive lock
1355 */
1356 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1357
1358 primitive_unlock();
1359
1360 ts = turnstile_prepare_hash((uintptr_t)event, type);
1361
1362 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1363 /*
1364 * We need TURNSTILE_DELAYED_UPDATE because we will call
1365 * waitq_assert_wait64 after.
1366 */
1367 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1368
1369 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1370
1371 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1372
1373 /*
1374 * Update new and old inheritor chains outside the interlock;
1375 */
1376 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1377
1378 if (ret == THREAD_WAITING) {
1379 ret = thread_block(THREAD_CONTINUE_NULL);
1380 }
1381
1382 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1383
1384 turnstile_complete_hash((uintptr_t)event, type);
1385
1386 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1387
1388 turnstile_cleanup();
1389
1390 primitive_lock();
1391
1392 return ret;
1393 }
1394
1395 /*
1396 * change_sleep_inheritor is independent from the locking primitive.
1397 */
1398
1399 /*
1400 * Name: change_sleep_inheritor
1401 *
1402 * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1403 *
1404 * Args:
1405 * Arg1: event to redirect the push.
1406 * Arg2: new inheritor for event.
1407 *
1408 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1409 *
1410 * Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1411 * wakeup for the event is called.
1412 * NOTE: this cannot be called from interrupt context.
1413 */
1414 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1415 change_sleep_inheritor(event_t event, thread_t inheritor)
1416 {
1417 uint32_t index;
1418 struct turnstile *ts = NULL;
1419 kern_return_t ret = KERN_SUCCESS;
1420 turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1421
1422 /*
1423 * the hash bucket spinlock is used as turnstile interlock
1424 */
1425 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1426
1427 ts = turnstile_prepare_hash((uintptr_t)event, type);
1428
1429 if (!turnstile_has_waiters(ts)) {
1430 ret = KERN_NOT_WAITING;
1431 }
1432
1433 /*
1434 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1435 */
1436 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1437
1438 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1439
1440 /*
1441 * update the chains outside the interlock
1442 */
1443 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1444
1445 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1446
1447 turnstile_complete_hash((uintptr_t)event, type);
1448
1449 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1450
1451 turnstile_cleanup();
1452
1453 return ret;
1454 }
1455
1456 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1457 lck_spin_sleep_with_inheritor(
1458 lck_spin_t *lock,
1459 lck_sleep_action_t lck_sleep_action,
1460 event_t event,
1461 thread_t inheritor,
1462 wait_interrupt_t interruptible,
1463 uint64_t deadline)
1464 {
1465 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1466 return sleep_with_inheritor_and_turnstile(event, inheritor,
1467 interruptible, deadline,
1468 ^{}, ^{ lck_spin_unlock(lock); });
1469 } else {
1470 return sleep_with_inheritor_and_turnstile(event, inheritor,
1471 interruptible, deadline,
1472 ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1473 }
1474 }
1475
1476 wait_result_t
hw_lck_ticket_sleep_with_inheritor(hw_lck_ticket_t * lock,lck_grp_t * grp __unused,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1477 hw_lck_ticket_sleep_with_inheritor(
1478 hw_lck_ticket_t *lock,
1479 lck_grp_t *grp __unused,
1480 lck_sleep_action_t lck_sleep_action,
1481 event_t event,
1482 thread_t inheritor,
1483 wait_interrupt_t interruptible,
1484 uint64_t deadline)
1485 {
1486 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1487 return sleep_with_inheritor_and_turnstile(event, inheritor,
1488 interruptible, deadline,
1489 ^{}, ^{ hw_lck_ticket_unlock(lock); });
1490 } else {
1491 return sleep_with_inheritor_and_turnstile(event, inheritor,
1492 interruptible, deadline,
1493 ^{ hw_lck_ticket_lock(lock, grp); }, ^{ hw_lck_ticket_unlock(lock); });
1494 }
1495 }
1496
1497 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1498 lck_ticket_sleep_with_inheritor(
1499 lck_ticket_t *lock,
1500 lck_grp_t *grp,
1501 lck_sleep_action_t lck_sleep_action,
1502 event_t event,
1503 thread_t inheritor,
1504 wait_interrupt_t interruptible,
1505 uint64_t deadline)
1506 {
1507 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1508 return sleep_with_inheritor_and_turnstile(event, inheritor,
1509 interruptible, deadline,
1510 ^{}, ^{ lck_ticket_unlock(lock); });
1511 } else {
1512 return sleep_with_inheritor_and_turnstile(event, inheritor,
1513 interruptible, deadline,
1514 ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1515 }
1516 }
1517
1518 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1519 lck_mtx_sleep_with_inheritor(
1520 lck_mtx_t *lock,
1521 lck_sleep_action_t lck_sleep_action,
1522 event_t event,
1523 thread_t inheritor,
1524 wait_interrupt_t interruptible,
1525 uint64_t deadline)
1526 {
1527 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1528
1529 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1530 return sleep_with_inheritor_and_turnstile(event,
1531 inheritor,
1532 interruptible,
1533 deadline,
1534 ^{;},
1535 ^{lck_mtx_unlock(lock);});
1536 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1537 return sleep_with_inheritor_and_turnstile(event,
1538 inheritor,
1539 interruptible,
1540 deadline,
1541 ^{lck_mtx_lock_spin(lock);},
1542 ^{lck_mtx_unlock(lock);});
1543 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1544 return sleep_with_inheritor_and_turnstile(event,
1545 inheritor,
1546 interruptible,
1547 deadline,
1548 ^{lck_mtx_lock_spin_always(lock);},
1549 ^{lck_mtx_unlock(lock);});
1550 } else {
1551 return sleep_with_inheritor_and_turnstile(event,
1552 inheritor,
1553 interruptible,
1554 deadline,
1555 ^{lck_mtx_lock(lock);},
1556 ^{lck_mtx_unlock(lock);});
1557 }
1558 }
1559
1560 /*
1561 * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1562 */
1563
1564 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1565 lck_rw_sleep_with_inheritor(
1566 lck_rw_t *lock,
1567 lck_sleep_action_t lck_sleep_action,
1568 event_t event,
1569 thread_t inheritor,
1570 wait_interrupt_t interruptible,
1571 uint64_t deadline)
1572 {
1573 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1574
1575 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1576
1577 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1578 return sleep_with_inheritor_and_turnstile(event,
1579 inheritor,
1580 interruptible,
1581 deadline,
1582 ^{;},
1583 ^{lck_rw_type = lck_rw_done(lock);});
1584 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1585 return sleep_with_inheritor_and_turnstile(event,
1586 inheritor,
1587 interruptible,
1588 deadline,
1589 ^{lck_rw_lock(lock, lck_rw_type);},
1590 ^{lck_rw_type = lck_rw_done(lock);});
1591 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1592 return sleep_with_inheritor_and_turnstile(event,
1593 inheritor,
1594 interruptible,
1595 deadline,
1596 ^{lck_rw_lock_exclusive(lock);},
1597 ^{lck_rw_type = lck_rw_done(lock);});
1598 } else {
1599 return sleep_with_inheritor_and_turnstile(event,
1600 inheritor,
1601 interruptible,
1602 deadline,
1603 ^{lck_rw_lock_shared(lock);},
1604 ^{lck_rw_type = lck_rw_done(lock);});
1605 }
1606 }
1607
1608 /*
1609 * wakeup_with_inheritor functions are independent from the locking primitive.
1610 */
1611
1612 kern_return_t
wakeup_thread_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t thread_towake)1613 wakeup_thread_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t thread_towake)
1614 {
1615 return wakeup_with_inheritor_and_turnstile(event,
1616 result,
1617 LCK_WAKEUP_THREAD,
1618 action,
1619 &thread_towake);
1620 }
1621
1622 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1623 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1624 {
1625 return wakeup_with_inheritor_and_turnstile(event,
1626 result,
1627 LCK_WAKEUP_ONE,
1628 action,
1629 thread_wokenup);
1630 }
1631
1632 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1633 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1634 {
1635 return wakeup_with_inheritor_and_turnstile(event,
1636 result,
1637 LCK_WAKEUP_ALL,
1638 0,
1639 NULL);
1640 }
1641
1642 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1643 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1644 {
1645 assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1646 assert(waitq_type(waitq) == WQT_TURNSTILE);
1647 waitinfo->owner = 0;
1648 waitinfo->context = 0;
1649
1650 if (waitq_held(waitq)) {
1651 return;
1652 }
1653
1654 struct turnstile *turnstile = waitq_to_turnstile(waitq);
1655 assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1656 waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1657 }
1658
1659 static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1660 static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1661 static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1662
1663 static wait_result_t
1664 cond_sleep_with_inheritor_and_turnstile_type(
1665 cond_swi_var_t cond,
1666 bool (^cond_sleep_check)(ctid_t*),
1667 wait_interrupt_t interruptible,
1668 uint64_t deadline,
1669 turnstile_type_t type)
1670 {
1671 wait_result_t ret;
1672 uint32_t index;
1673 struct turnstile *ts = NULL;
1674 ctid_t ctid = 0;
1675 thread_t inheritor;
1676
1677 /*
1678 * the hash bucket spinlock is used as turnstile interlock,
1679 * lock it before checking the sleep condition
1680 */
1681 turnstile_hash_bucket_lock((uintptr_t)cond, &index, type);
1682
1683 /*
1684 * In case the sleep check succeeds, the block will
1685 * provide us the ctid observed on the variable.
1686 */
1687 if (!cond_sleep_check(&ctid)) {
1688 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1689 return THREAD_NOT_WAITING;
1690 }
1691
1692 /*
1693 * We can translate the ctid to a thread_t only
1694 * if cond_sleep_check succeded.
1695 */
1696 inheritor = ctid_get_thread(ctid);
1697 assert(inheritor != NULL);
1698
1699 ts = turnstile_prepare_hash((uintptr_t)cond, type);
1700
1701 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1702 /*
1703 * We need TURNSTILE_DELAYED_UPDATE because we will call
1704 * waitq_assert_wait64 after.
1705 */
1706 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1707
1708 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1709
1710 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1711
1712 /*
1713 * Update new and old inheritor chains outside the interlock;
1714 */
1715 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1716 if (ret == THREAD_WAITING) {
1717 ret = thread_block(THREAD_CONTINUE_NULL);
1718 }
1719
1720 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1721
1722 turnstile_complete_hash((uintptr_t)cond, type);
1723
1724 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1725
1726 turnstile_cleanup();
1727 return ret;
1728 }
1729
1730 /*
1731 * Name: cond_sleep_with_inheritor32_mask
1732 *
1733 * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1734 * Allows a thread to conditionally sleep while indicating which thread should
1735 * inherit the priority push associated with the condition.
1736 * The condition should be expressed through a cond_swi_var32_s pointer.
1737 * The condition needs to be populated by the caller with the ctid of the
1738 * thread that should inherit the push. The remaining bits of the condition
1739 * can be used by the caller to implement its own synchronization logic.
1740 * A copy of the condition value observed by the caller when it decided to call
1741 * this function should be provided to prevent races with matching wakeups.
1742 * This function will atomically check the value stored in the condition against
1743 * the expected/observed one provided only for the bits that are set in the mask.
1744 * If the check doesn't pass the thread will not sleep and the function will return.
1745 * The ctid provided in the condition will be used only after a successful
1746 * check.
1747 *
1748 * Args:
1749 * Arg1: cond_swi_var32_s pointer that stores the condition to check.
1750 * Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1751 * Arg3: mask to apply to the condition to check.
1752 * Arg4: interruptible flag for wait.
1753 * Arg5: deadline for wait.
1754 *
1755 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1756 * wakeup for the cond is called.
1757 *
1758 * Returns: result of the wait.
1759 */
1760 static wait_result_t
cond_sleep_with_inheritor32_mask(cond_swi_var_t cond,cond_swi_var32_s expected_cond,uint32_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1761 cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1762 {
1763 bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1764 cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1765 bool ret;
1766 if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1767 ret = true;
1768 *ctid = cond_val.cond32_owner;
1769 } else {
1770 ret = false;
1771 }
1772 return ret;
1773 };
1774
1775 return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1776 }
1777
1778 /*
1779 * Name: cond_sleep_with_inheritor64_mask
1780 *
1781 * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1782 * Allows a thread to conditionally sleep while indicating which thread should
1783 * inherit the priority push associated with the condition.
1784 * The condition should be expressed through a cond_swi_var64_s pointer.
1785 * The condition needs to be populated by the caller with the ctid of the
1786 * thread that should inherit the push. The remaining bits of the condition
1787 * can be used by the caller to implement its own synchronization logic.
1788 * A copy of the condition value observed by the caller when it decided to call
1789 * this function should be provided to prevent races with matching wakeups.
1790 * This function will atomically check the value stored in the condition against
1791 * the expected/observed one provided only for the bits that are set in the mask.
1792 * If the check doesn't pass the thread will not sleep and the function will return.
1793 * The ctid provided in the condition will be used only after a successful
1794 * check.
1795 *
1796 * Args:
1797 * Arg1: cond_swi_var64_s pointer that stores the condition to check.
1798 * Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1799 * Arg3: mask to apply to the condition to check.
1800 * Arg4: interruptible flag for wait.
1801 * Arg5: deadline for wait.
1802 *
1803 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1804 * wakeup for the cond is called.
1805 *
1806 * Returns: result of the wait.
1807 */
1808 wait_result_t
cond_sleep_with_inheritor64_mask(cond_swi_var_t cond,cond_swi_var64_s expected_cond,uint64_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1809 cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1810 {
1811 bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1812 cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1813 bool ret;
1814 if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1815 ret = true;
1816 *ctid = cond_val.cond64_owner;
1817 } else {
1818 ret = false;
1819 }
1820 return ret;
1821 };
1822
1823 return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1824 }
1825
1826 /*
1827 * Name: cond_sleep_with_inheritor32
1828 *
1829 * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1830 * Allows a thread to conditionally sleep while indicating which thread should
1831 * inherit the priority push associated with the condition.
1832 * The condition should be expressed through a cond_swi_var32_s pointer.
1833 * The condition needs to be populated by the caller with the ctid of the
1834 * thread that should inherit the push. The remaining bits of the condition
1835 * can be used by the caller to implement its own synchronization logic.
1836 * A copy of the condition value observed by the caller when it decided to call
1837 * this function should be provided to prevent races with matching wakeups.
1838 * This function will atomically check the value stored in the condition against
1839 * the expected/observed one provided. If the check doesn't pass the thread will not
1840 * sleep and the function will return.
1841 * The ctid provided in the condition will be used only after a successful
1842 * check.
1843 *
1844 * Args:
1845 * Arg1: cond_swi_var32_s pointer that stores the condition to check.
1846 * Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1847 * Arg3: interruptible flag for wait.
1848 * Arg4: deadline for wait.
1849 *
1850 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1851 * wakeup for the cond is called.
1852 *
1853 * Returns: result of the wait.
1854 */
1855 wait_result_t
cond_sleep_with_inheritor32(cond_swi_var_t cond,cond_swi_var32_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1856 cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1857 {
1858 return cond_sleep_with_inheritor32_mask(cond, expected_cond, ~0u, interruptible, deadline);
1859 }
1860
1861 /*
1862 * Name: cond_sleep_with_inheritor64
1863 *
1864 * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1865 * Allows a thread to conditionally sleep while indicating which thread should
1866 * inherit the priority push associated with the condition.
1867 * The condition should be expressed through a cond_swi_var64_s pointer.
1868 * The condition needs to be populated by the caller with the ctid of the
1869 * thread that should inherit the push. The remaining bits of the condition
1870 * can be used by the caller to implement its own synchronization logic.
1871 * A copy of the condition value observed by the caller when it decided to call
1872 * this function should be provided to prevent races with matching wakeups.
1873 * This function will atomically check the value stored in the condition against
1874 * the expected/observed one provided. If the check doesn't pass the thread will not
1875 * sleep and the function will return.
1876 * The ctid provided in the condition will be used only after a successful
1877 * check.
1878 *
1879 * Args:
1880 * Arg1: cond_swi_var64_s pointer that stores the condition to check.
1881 * Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1882 * Arg3: interruptible flag for wait.
1883 * Arg4: deadline for wait.
1884 *
1885 * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1886 * wakeup for the cond is called.
1887 *
1888 * Returns: result of the wait.
1889 */
1890 wait_result_t
cond_sleep_with_inheritor64(cond_swi_var_t cond,cond_swi_var64_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1891 cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1892 {
1893 return cond_sleep_with_inheritor64_mask(cond, expected_cond, ~0ull, interruptible, deadline);
1894 }
1895
1896 /*
1897 * Name: cond_wakeup_one_with_inheritor
1898 *
1899 * Description: Wake up one waiter waiting on the condition (if any).
1900 * The thread woken up will be the one with the higher sched priority waiting on the condition.
1901 * The push for the condition will be transferred from the last inheritor to the woken up thread.
1902 *
1903 * Args:
1904 * Arg1: condition to wake from.
1905 * Arg2: wait result to pass to the woken up thread.
1906 * Arg3: pointer for storing the thread wokenup.
1907 *
1908 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1909 *
1910 * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1911 * condition or a wakeup for the event is called.
1912 * A reference for the wokenup thread is acquired.
1913 * NOTE: this cannot be called from interrupt context.
1914 */
1915 kern_return_t
cond_wakeup_one_with_inheritor(cond_swi_var_t cond,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1916 cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1917 {
1918 return wakeup_with_inheritor_and_turnstile((event_t)cond,
1919 result,
1920 LCK_WAKEUP_ONE,
1921 action,
1922 thread_wokenup);
1923 }
1924
1925 /*
1926 * Name: cond_wakeup_all_with_inheritor
1927 *
1928 * Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1929 *
1930 * Args:
1931 * Arg1: condition to wake from.
1932 * Arg2: wait result to pass to the woken up threads.
1933 *
1934 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1935 *
1936 * Conditions: NOTE: this cannot be called from interrupt context.
1937 */
1938 kern_return_t
cond_wakeup_all_with_inheritor(cond_swi_var_t cond,wait_result_t result)1939 cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1940 {
1941 return wakeup_with_inheritor_and_turnstile((event_t)cond,
1942 result,
1943 LCK_WAKEUP_ALL,
1944 0,
1945 NULL);
1946 }
1947
1948
1949 #pragma mark - gates
1950
1951 #define GATE_TYPE 3
1952 #define GATE_ILOCK_BIT 0
1953 #define GATE_WAITERS_BIT 1
1954
1955 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1956 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1957
1958 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1959 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1960 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1961 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1962 #define ordered_store_gate(gate, value) os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1963
1964 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1965 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1966 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1967 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1968
1969 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1970
1971 #define GATE_EVENT(gate) ((event_t) gate)
1972 #define EVENT_TO_GATE(event) ((gate_t *) event)
1973
1974 typedef void (*void_func_void)(void);
1975
1976 __abortlike
1977 static void
gate_verify_tag_panic(gate_t * gate)1978 gate_verify_tag_panic(gate_t *gate)
1979 {
1980 panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1981 }
1982
1983 __abortlike
1984 static void
gate_verify_destroy_panic(gate_t * gate)1985 gate_verify_destroy_panic(gate_t *gate)
1986 {
1987 panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1988 }
1989
1990 static void
gate_verify(gate_t * gate)1991 gate_verify(gate_t *gate)
1992 {
1993 if (gate->gt_type != GATE_TYPE) {
1994 gate_verify_tag_panic(gate);
1995 }
1996 if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
1997 gate_verify_destroy_panic(gate);
1998 }
1999
2000 assert(gate->gt_refs > 0);
2001 }
2002
2003 __abortlike
2004 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)2005 gate_already_owned_panic(gate_t *gate, thread_t holder)
2006 {
2007 panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2008 }
2009
2010 static kern_return_t
gate_try_close(gate_t * gate)2011 gate_try_close(gate_t *gate)
2012 {
2013 uintptr_t state;
2014 thread_t holder;
2015 kern_return_t ret;
2016 thread_t thread = current_thread();
2017
2018 gate_verify(gate);
2019
2020 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2021 return KERN_SUCCESS;
2022 }
2023
2024 gate_ilock(gate);
2025 state = ordered_load_gate(gate);
2026 holder = GATE_STATE_TO_THREAD(state);
2027
2028 if (holder == NULL) {
2029 assert(gate_has_waiter_bit(state) == FALSE);
2030
2031 state = GATE_THREAD_TO_STATE(current_thread());
2032 state |= GATE_ILOCK;
2033 ordered_store_gate(gate, state);
2034 ret = KERN_SUCCESS;
2035 } else {
2036 if (holder == current_thread()) {
2037 gate_already_owned_panic(gate, holder);
2038 }
2039 ret = KERN_FAILURE;
2040 }
2041
2042 gate_iunlock(gate);
2043 return ret;
2044 }
2045
2046 static void
gate_close(gate_t * gate)2047 gate_close(gate_t* gate)
2048 {
2049 uintptr_t state;
2050 thread_t holder;
2051 thread_t thread = current_thread();
2052
2053 gate_verify(gate);
2054
2055 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2056 return;
2057 }
2058
2059 gate_ilock(gate);
2060 state = ordered_load_gate(gate);
2061 holder = GATE_STATE_TO_THREAD(state);
2062
2063 if (holder != NULL) {
2064 gate_already_owned_panic(gate, holder);
2065 }
2066
2067 assert(gate_has_waiter_bit(state) == FALSE);
2068
2069 state = GATE_THREAD_TO_STATE(thread);
2070 state |= GATE_ILOCK;
2071 ordered_store_gate(gate, state);
2072
2073 gate_iunlock(gate);
2074 }
2075
2076 static void
gate_open_turnstile(gate_t * gate)2077 gate_open_turnstile(gate_t *gate)
2078 {
2079 struct turnstile *ts = NULL;
2080
2081 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile,
2082 TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2083 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2084 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2085 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2086 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2087 /*
2088 * We can do the cleanup while holding the interlock.
2089 * It is ok because:
2090 * 1. current_thread is the previous inheritor and it is running
2091 * 2. new inheritor is NULL.
2092 * => No chain of turnstiles needs to be updated.
2093 */
2094 turnstile_cleanup();
2095 }
2096
2097 __abortlike
2098 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2099 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2100 {
2101 if (open) {
2102 panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2103 } else {
2104 panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2105 }
2106 }
2107
2108 static void
gate_open(gate_t * gate)2109 gate_open(gate_t *gate)
2110 {
2111 uintptr_t state;
2112 thread_t holder;
2113 bool waiters;
2114 thread_t thread = current_thread();
2115
2116 gate_verify(gate);
2117 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2118 return;
2119 }
2120
2121 gate_ilock(gate);
2122 state = ordered_load_gate(gate);
2123 holder = GATE_STATE_TO_THREAD(state);
2124 waiters = gate_has_waiter_bit(state);
2125
2126 if (holder != thread) {
2127 gate_not_owned_panic(gate, holder, true);
2128 }
2129
2130 if (waiters) {
2131 gate_open_turnstile(gate);
2132 }
2133
2134 state = GATE_ILOCK;
2135 ordered_store_gate(gate, state);
2136
2137 gate_iunlock(gate);
2138 }
2139
2140 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2141 gate_handoff_turnstile(gate_t *gate,
2142 int flags,
2143 thread_t *thread_woken_up,
2144 bool *waiters)
2145 {
2146 struct turnstile *ts = NULL;
2147 kern_return_t ret = KERN_FAILURE;
2148 thread_t hp_thread;
2149
2150 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2151 /*
2152 * Wake up the higest priority thread waiting on the gate
2153 */
2154 hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2155 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2156
2157 if (hp_thread != NULL) {
2158 /*
2159 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2160 */
2161 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2162 *thread_woken_up = hp_thread;
2163 *waiters = turnstile_has_waiters(ts);
2164 /*
2165 * Note: hp_thread is the new holder and the new inheritor.
2166 * In case there are no more waiters, it doesn't need to be the inheritor
2167 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2168 * handoff can go through the fast path.
2169 * We could set the inheritor to NULL here, or the new holder itself can set it
2170 * on its way back from the sleep. In the latter case there are more chanses that
2171 * new waiters will come by, avoiding to do the opearation at all.
2172 */
2173 ret = KERN_SUCCESS;
2174 } else {
2175 /*
2176 * waiters can have been woken up by an interrupt and still not
2177 * have updated gate->waiters, so we couldn't find them on the waitq.
2178 * Update the inheritor to NULL here, so that the current thread can return to userspace
2179 * indipendently from when the interrupted waiters will finish the wait.
2180 */
2181 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2182 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2183 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2184 }
2185 // there are no waiters.
2186 ret = KERN_NOT_WAITING;
2187 }
2188
2189 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2190
2191 /*
2192 * We can do the cleanup while holding the interlock.
2193 * It is ok because:
2194 * 1. current_thread is the previous inheritor and it is running
2195 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2196 * of the gate before trying to sleep.
2197 * => No chain of turnstiles needs to be updated.
2198 */
2199 turnstile_cleanup();
2200
2201 return ret;
2202 }
2203
2204 static kern_return_t
gate_handoff(gate_t * gate,int flags)2205 gate_handoff(gate_t *gate,
2206 int flags)
2207 {
2208 kern_return_t ret;
2209 thread_t new_holder = NULL;
2210 uintptr_t state;
2211 thread_t holder;
2212 bool waiters;
2213 thread_t thread = current_thread();
2214
2215 assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2216 gate_verify(gate);
2217
2218 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2219 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2220 //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2221 return KERN_NOT_WAITING;
2222 }
2223 }
2224
2225 gate_ilock(gate);
2226 state = ordered_load_gate(gate);
2227 holder = GATE_STATE_TO_THREAD(state);
2228 waiters = gate_has_waiter_bit(state);
2229
2230 if (holder != current_thread()) {
2231 gate_not_owned_panic(gate, holder, false);
2232 }
2233
2234 if (waiters) {
2235 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2236 if (ret == KERN_SUCCESS) {
2237 state = GATE_THREAD_TO_STATE(new_holder);
2238 if (waiters) {
2239 state |= GATE_WAITERS;
2240 }
2241 } else {
2242 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2243 state = 0;
2244 }
2245 }
2246 } else {
2247 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2248 state = 0;
2249 }
2250 ret = KERN_NOT_WAITING;
2251 }
2252 state |= GATE_ILOCK;
2253 ordered_store_gate(gate, state);
2254
2255 gate_iunlock(gate);
2256
2257 if (new_holder) {
2258 thread_deallocate(new_holder);
2259 }
2260 return ret;
2261 }
2262
2263 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2264 gate_steal_turnstile(gate_t *gate,
2265 thread_t new_inheritor)
2266 {
2267 struct turnstile *ts = NULL;
2268
2269 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2270
2271 turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2272 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2273 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2274
2275 /*
2276 * turnstile_cleanup might need to update the chain of the old holder.
2277 * This operation should happen without the turnstile interlock held.
2278 */
2279 return turnstile_cleanup;
2280 }
2281
2282 __abortlike
2283 static void
gate_not_closed_panic(gate_t * gate,bool wait)2284 gate_not_closed_panic(gate_t *gate, bool wait)
2285 {
2286 if (wait) {
2287 panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2288 } else {
2289 panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2290 }
2291 }
2292
2293 static void
gate_steal(gate_t * gate)2294 gate_steal(gate_t *gate)
2295 {
2296 uintptr_t state;
2297 thread_t holder;
2298 thread_t thread = current_thread();
2299 bool waiters;
2300
2301 void_func_void func_after_interlock_unlock;
2302
2303 gate_verify(gate);
2304
2305 gate_ilock(gate);
2306 state = ordered_load_gate(gate);
2307 holder = GATE_STATE_TO_THREAD(state);
2308 waiters = gate_has_waiter_bit(state);
2309
2310 if (holder == NULL) {
2311 gate_not_closed_panic(gate, false);
2312 }
2313
2314 state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2315 if (waiters) {
2316 state |= GATE_WAITERS;
2317 ordered_store_gate(gate, state);
2318 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2319 gate_iunlock(gate);
2320
2321 func_after_interlock_unlock();
2322 } else {
2323 ordered_store_gate(gate, state);
2324 gate_iunlock(gate);
2325 }
2326 }
2327
2328 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2329 gate_wait_turnstile(gate_t *gate,
2330 wait_interrupt_t interruptible,
2331 uint64_t deadline,
2332 thread_t holder,
2333 wait_result_t* wait,
2334 bool* waiters)
2335 {
2336 struct turnstile *ts;
2337 uintptr_t state;
2338
2339 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2340
2341 turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2342 waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2343
2344 gate_iunlock(gate);
2345
2346 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2347
2348 *wait = thread_block(THREAD_CONTINUE_NULL);
2349
2350 gate_ilock(gate);
2351
2352 *waiters = turnstile_has_waiters(ts);
2353
2354 if (!*waiters) {
2355 /*
2356 * We want to enable the fast path as soon as we see that there are no more waiters.
2357 * On the fast path the holder will not do any turnstile operations.
2358 * Set the inheritor as NULL here.
2359 *
2360 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2361 * already been set to NULL.
2362 */
2363 state = ordered_load_gate(gate);
2364 holder = GATE_STATE_TO_THREAD(state);
2365 if (holder &&
2366 ((*wait != THREAD_AWAKENED) || // thread interrupted or timedout
2367 holder == current_thread())) { // thread was woken up and it is the new holder
2368 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2369 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2370 }
2371 }
2372
2373 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2374
2375 /*
2376 * turnstile_cleanup might need to update the chain of the old holder.
2377 * This operation should happen without the turnstile primitive interlock held.
2378 */
2379 return turnstile_cleanup;
2380 }
2381
2382 static void
gate_free_internal(gate_t * gate)2383 gate_free_internal(gate_t *gate)
2384 {
2385 zfree(KT_GATE, gate);
2386 }
2387
2388 __abortlike
2389 static void
gate_too_many_refs_panic(gate_t * gate)2390 gate_too_many_refs_panic(gate_t *gate)
2391 {
2392 panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2393 }
2394
2395 static gate_wait_result_t
2396 gate_wait(gate_t* gate,
2397 wait_interrupt_t interruptible,
2398 uint64_t deadline,
2399 void (^primitive_unlock)(void),
2400 void (^primitive_lock)(void))
2401 {
2402 gate_wait_result_t ret;
2403 void_func_void func_after_interlock_unlock;
2404 wait_result_t wait_result;
2405 uintptr_t state;
2406 thread_t holder;
2407 bool waiters;
2408
2409 gate_verify(gate);
2410
2411 gate_ilock(gate);
2412 state = ordered_load_gate(gate);
2413 holder = GATE_STATE_TO_THREAD(state);
2414
2415 if (holder == NULL) {
2416 gate_not_closed_panic(gate, true);
2417 }
2418
2419 /*
2420 * Get a ref on the gate so it will not
2421 * be freed while we are coming back from the sleep.
2422 */
2423 if (gate->gt_refs == UINT16_MAX) {
2424 gate_too_many_refs_panic(gate);
2425 }
2426 gate->gt_refs++;
2427 state |= GATE_WAITERS;
2428 ordered_store_gate(gate, state);
2429
2430 /*
2431 * Release the primitive lock before any
2432 * turnstile operation. Turnstile
2433 * does not support a blocking primitive as
2434 * interlock.
2435 *
2436 * In this way, concurrent threads will be
2437 * able to acquire the primitive lock
2438 * but still will wait for me through the
2439 * gate interlock.
2440 */
2441 primitive_unlock();
2442
2443 func_after_interlock_unlock = gate_wait_turnstile( gate,
2444 interruptible,
2445 deadline,
2446 holder,
2447 &wait_result,
2448 &waiters);
2449
2450 state = ordered_load_gate(gate);
2451 holder = GATE_STATE_TO_THREAD(state);
2452
2453 switch (wait_result) {
2454 case THREAD_INTERRUPTED:
2455 case THREAD_TIMED_OUT:
2456 assert(holder != current_thread());
2457
2458 if (waiters) {
2459 state |= GATE_WAITERS;
2460 } else {
2461 state &= ~GATE_WAITERS;
2462 }
2463 ordered_store_gate(gate, state);
2464
2465 if (wait_result == THREAD_INTERRUPTED) {
2466 ret = GATE_INTERRUPTED;
2467 } else {
2468 ret = GATE_TIMED_OUT;
2469 }
2470 break;
2471 default:
2472 /*
2473 * Note it is possible that even if the gate was handed off to
2474 * me, someone called gate_steal() before I woke up.
2475 *
2476 * As well as it is possible that the gate was opened, but someone
2477 * closed it while I was waking up.
2478 *
2479 * In both cases we return GATE_OPENED, as the gate was opened to me
2480 * at one point, it is the caller responsibility to check again if
2481 * the gate is open.
2482 */
2483 if (holder == current_thread()) {
2484 ret = GATE_HANDOFF;
2485 } else {
2486 ret = GATE_OPENED;
2487 }
2488 break;
2489 }
2490
2491 assert(gate->gt_refs > 0);
2492 uint32_t ref = --gate->gt_refs;
2493 bool to_free = gate->gt_alloc;
2494 gate_iunlock(gate);
2495
2496 if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2497 if (to_free == true) {
2498 assert(!waiters);
2499 if (ref == 0) {
2500 gate_free_internal(gate);
2501 }
2502 ret = GATE_OPENED;
2503 } else {
2504 gate_verify_destroy_panic(gate);
2505 }
2506 }
2507
2508 /*
2509 * turnstile func that needs to be executed without
2510 * holding the primitive interlock
2511 */
2512 func_after_interlock_unlock();
2513
2514 primitive_lock();
2515
2516 return ret;
2517 }
2518
2519 static void
gate_assert(gate_t * gate,int flags)2520 gate_assert(gate_t *gate, int flags)
2521 {
2522 uintptr_t state;
2523 thread_t holder;
2524
2525 gate_verify(gate);
2526
2527 gate_ilock(gate);
2528 state = ordered_load_gate(gate);
2529 holder = GATE_STATE_TO_THREAD(state);
2530
2531 switch (flags) {
2532 case GATE_ASSERT_CLOSED:
2533 assert(holder != NULL);
2534 break;
2535 case GATE_ASSERT_OPEN:
2536 assert(holder == NULL);
2537 break;
2538 case GATE_ASSERT_HELD:
2539 assert(holder == current_thread());
2540 break;
2541 default:
2542 panic("invalid %s flag %d", __func__, flags);
2543 }
2544
2545 gate_iunlock(gate);
2546 }
2547
2548 enum {
2549 GT_INIT_DEFAULT = 0,
2550 GT_INIT_ALLOC
2551 };
2552
2553 static void
gate_init(gate_t * gate,uint type)2554 gate_init(gate_t *gate, uint type)
2555 {
2556 bzero(gate, sizeof(gate_t));
2557
2558 gate->gt_data = 0;
2559 gate->gt_turnstile = NULL;
2560 gate->gt_refs = 1;
2561 switch (type) {
2562 case GT_INIT_ALLOC:
2563 gate->gt_alloc = 1;
2564 break;
2565 default:
2566 gate->gt_alloc = 0;
2567 break;
2568 }
2569 gate->gt_type = GATE_TYPE;
2570 gate->gt_flags_pad = 0;
2571 }
2572
2573 static gate_t*
gate_alloc_init(void)2574 gate_alloc_init(void)
2575 {
2576 gate_t *gate;
2577 gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2578 gate_init(gate, GT_INIT_ALLOC);
2579 return gate;
2580 }
2581
2582 __abortlike
2583 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2584 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2585 {
2586 panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2587 }
2588
2589 __abortlike
2590 static void
gate_destroy_waiter_panic(gate_t * gate)2591 gate_destroy_waiter_panic(gate_t *gate)
2592 {
2593 panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2594 }
2595
2596 static uint16_t
gate_destroy_internal(gate_t * gate)2597 gate_destroy_internal(gate_t *gate)
2598 {
2599 uintptr_t state;
2600 thread_t holder;
2601 uint16_t ref;
2602
2603 gate_ilock(gate);
2604 state = ordered_load_gate(gate);
2605 holder = GATE_STATE_TO_THREAD(state);
2606
2607 /*
2608 * The gate must be open
2609 * and all the threads must
2610 * have been woken up by this time
2611 */
2612 if (holder != NULL) {
2613 gate_destroy_owned_panic(gate, holder);
2614 }
2615 if (gate_has_waiter_bit(state)) {
2616 gate_destroy_waiter_panic(gate);
2617 }
2618
2619 assert(gate->gt_refs > 0);
2620
2621 ref = --gate->gt_refs;
2622
2623 /*
2624 * Mark the gate as destroyed.
2625 * The interlock bit still need
2626 * to be available to let the
2627 * last wokenup threads to clear
2628 * the wait.
2629 */
2630 state = GATE_DESTROYED;
2631 state |= GATE_ILOCK;
2632 ordered_store_gate(gate, state);
2633 gate_iunlock(gate);
2634 return ref;
2635 }
2636
2637 __abortlike
2638 static void
gate_destroy_panic(gate_t * gate)2639 gate_destroy_panic(gate_t *gate)
2640 {
2641 panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2642 }
2643
2644 static void
gate_destroy(gate_t * gate)2645 gate_destroy(gate_t *gate)
2646 {
2647 gate_verify(gate);
2648 if (gate->gt_alloc == 1) {
2649 gate_destroy_panic(gate);
2650 }
2651 gate_destroy_internal(gate);
2652 }
2653
2654 __abortlike
2655 static void
gate_free_panic(gate_t * gate)2656 gate_free_panic(gate_t *gate)
2657 {
2658 panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2659 }
2660
2661 static void
gate_free(gate_t * gate)2662 gate_free(gate_t *gate)
2663 {
2664 uint16_t ref;
2665
2666 gate_verify(gate);
2667
2668 if (gate->gt_alloc == 0) {
2669 gate_free_panic(gate);
2670 }
2671
2672 ref = gate_destroy_internal(gate);
2673 /*
2674 * Some of the threads waiting on the gate
2675 * might still need to run after being woken up.
2676 * They will access the gate to cleanup the
2677 * state, so we cannot free it.
2678 * The last waiter will free the gate in this case.
2679 */
2680 if (ref == 0) {
2681 gate_free_internal(gate);
2682 }
2683 }
2684
2685 /*
2686 * Name: lck_rw_gate_init
2687 *
2688 * Description: initializes a variable declared with decl_lck_rw_gate_data.
2689 *
2690 * Args:
2691 * Arg1: lck_rw_t lock used to protect the gate.
2692 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2693 */
2694 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2695 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2696 {
2697 (void) lock;
2698 gate_init(gate, GT_INIT_DEFAULT);
2699 }
2700
2701 /*
2702 * Name: lck_rw_gate_alloc_init
2703 *
2704 * Description: allocates and initializes a gate_t.
2705 *
2706 * Args:
2707 * Arg1: lck_rw_t lock used to protect the gate.
2708 *
2709 * Returns:
2710 * gate_t allocated.
2711 */
2712 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2713 lck_rw_gate_alloc_init(lck_rw_t *lock)
2714 {
2715 (void) lock;
2716 return gate_alloc_init();
2717 }
2718
2719 /*
2720 * Name: lck_rw_gate_destroy
2721 *
2722 * Description: destroys a variable previously initialized
2723 * with lck_rw_gate_init().
2724 *
2725 * Args:
2726 * Arg1: lck_rw_t lock used to protect the gate.
2727 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2728 */
2729 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2730 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2731 {
2732 (void) lock;
2733 gate_destroy(gate);
2734 }
2735
2736 /*
2737 * Name: lck_rw_gate_free
2738 *
2739 * Description: destroys and tries to free a gate previously allocated
2740 * with lck_rw_gate_alloc_init().
2741 * The gate free might be delegated to the last thread returning
2742 * from the gate_wait().
2743 *
2744 * Args:
2745 * Arg1: lck_rw_t lock used to protect the gate.
2746 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2747 */
2748 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2749 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2750 {
2751 (void) lock;
2752 gate_free(gate);
2753 }
2754
2755 /*
2756 * Name: lck_rw_gate_try_close
2757 *
2758 * Description: Tries to close the gate.
2759 * In case of success the current thread will be set as
2760 * the holder of the gate.
2761 *
2762 * Args:
2763 * Arg1: lck_rw_t lock used to protect the gate.
2764 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2765 *
2766 * Conditions: Lock must be held. Returns with the lock held.
2767 *
2768 * Returns:
2769 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2770 * of the gate.
2771 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2772 * to wake up possible waiters on the gate before returning to userspace.
2773 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2774 * between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2775 *
2776 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2777 * lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2778 * The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2779 * be done without dropping the lock that is protecting the gate in between.
2780 */
2781 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2782 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2783 {
2784 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2785
2786 return gate_try_close(gate);
2787 }
2788
2789 /*
2790 * Name: lck_rw_gate_close
2791 *
2792 * Description: Closes the gate. The current thread will be set as
2793 * the holder of the gate. Will panic if the gate is already closed.
2794 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2795 * to wake up possible waiters on the gate before returning to userspace.
2796 *
2797 * Args:
2798 * Arg1: lck_rw_t lock used to protect the gate.
2799 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2800 *
2801 * Conditions: Lock must be held. Returns with the lock held.
2802 * The gate must be open.
2803 *
2804 */
2805 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2806 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2807 {
2808 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2809
2810 return gate_close(gate);
2811 }
2812
2813 /*
2814 * Name: lck_rw_gate_open
2815 *
2816 * Description: Opens the gate and wakes up possible waiters.
2817 *
2818 * Args:
2819 * Arg1: lck_rw_t lock used to protect the gate.
2820 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2821 *
2822 * Conditions: Lock must be held. Returns with the lock held.
2823 * The current thread must be the holder of the gate.
2824 *
2825 */
2826 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2827 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2828 {
2829 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2830
2831 gate_open(gate);
2832 }
2833
2834 /*
2835 * Name: lck_rw_gate_handoff
2836 *
2837 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2838 * priority will be selected as the new holder of the gate, and woken up,
2839 * with the gate remaining in the closed state throughout.
2840 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2841 * will be returned.
2842 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2843 * case no waiters were found.
2844 *
2845 *
2846 * Args:
2847 * Arg1: lck_rw_t lock used to protect the gate.
2848 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2849 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2850 *
2851 * Conditions: Lock must be held. Returns with the lock held.
2852 * The current thread must be the holder of the gate.
2853 *
2854 * Returns:
2855 * KERN_SUCCESS in case one of the waiters became the new holder.
2856 * KERN_NOT_WAITING in case there were no waiters.
2857 *
2858 */
2859 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2860 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2861 {
2862 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2863
2864 return gate_handoff(gate, flags);
2865 }
2866
2867 /*
2868 * Name: lck_rw_gate_steal
2869 *
2870 * Description: Set the current ownership of the gate. It sets the current thread as the
2871 * new holder of the gate.
2872 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2873 * to wake up possible waiters on the gate before returning to userspace.
2874 * NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2875 * anymore.
2876 *
2877 *
2878 * Args:
2879 * Arg1: lck_rw_t lock used to protect the gate.
2880 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2881 *
2882 * Conditions: Lock must be held. Returns with the lock held.
2883 * The gate must be closed and the current thread must not already be the holder.
2884 *
2885 */
2886 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2887 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2888 {
2889 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2890
2891 gate_steal(gate);
2892 }
2893
2894 /*
2895 * Name: lck_rw_gate_wait
2896 *
2897 * Description: Waits for the current thread to become the holder of the gate or for the
2898 * gate to become open. An interruptible mode and deadline can be specified
2899 * to return earlier from the wait.
2900 *
2901 * Args:
2902 * Arg1: lck_rw_t lock used to protect the gate.
2903 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2904 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2905 * Arg3: interruptible flag for wait.
2906 * Arg4: deadline
2907 *
2908 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2909 * Lock will be dropped while waiting.
2910 * The gate must be closed.
2911 *
2912 * Returns: Reason why the thread was woken up.
2913 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2914 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2915 * to wake up possible waiters on the gate before returning to userspace.
2916 * GATE_OPENED - the gate was opened by the holder.
2917 * GATE_TIMED_OUT - the thread was woken up by a timeout.
2918 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
2919 */
2920 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2921 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2922 {
2923 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2924
2925 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2926
2927 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2928 return gate_wait(gate,
2929 interruptible,
2930 deadline,
2931 ^{lck_rw_type = lck_rw_done(lock);},
2932 ^{;});
2933 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2934 return gate_wait(gate,
2935 interruptible,
2936 deadline,
2937 ^{lck_rw_type = lck_rw_done(lock);},
2938 ^{lck_rw_lock(lock, lck_rw_type);});
2939 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2940 return gate_wait(gate,
2941 interruptible,
2942 deadline,
2943 ^{lck_rw_type = lck_rw_done(lock);},
2944 ^{lck_rw_lock_exclusive(lock);});
2945 } else {
2946 return gate_wait(gate,
2947 interruptible,
2948 deadline,
2949 ^{lck_rw_type = lck_rw_done(lock);},
2950 ^{lck_rw_lock_shared(lock);});
2951 }
2952 }
2953
2954 /*
2955 * Name: lck_rw_gate_assert
2956 *
2957 * Description: asserts that the gate is in the specified state.
2958 *
2959 * Args:
2960 * Arg1: lck_rw_t lock used to protect the gate.
2961 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2962 * Arg3: flags to specified assert type.
2963 * GATE_ASSERT_CLOSED - the gate is currently closed
2964 * GATE_ASSERT_OPEN - the gate is currently opened
2965 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2966 */
2967 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2968 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2969 {
2970 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2971
2972 gate_assert(gate, flags);
2973 return;
2974 }
2975
2976 /*
2977 * Name: lck_mtx_gate_init
2978 *
2979 * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2980 *
2981 * Args:
2982 * Arg1: lck_mtx_t lock used to protect the gate.
2983 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2984 */
2985 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)2986 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2987 {
2988 (void) lock;
2989 gate_init(gate, GT_INIT_DEFAULT);
2990 }
2991
2992 /*
2993 * Name: lck_mtx_gate_alloc_init
2994 *
2995 * Description: allocates and initializes a gate_t.
2996 *
2997 * Args:
2998 * Arg1: lck_mtx_t lock used to protect the gate.
2999 *
3000 * Returns:
3001 * gate_t allocated.
3002 */
3003 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)3004 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3005 {
3006 (void) lock;
3007 return gate_alloc_init();
3008 }
3009
3010 /*
3011 * Name: lck_mtx_gate_destroy
3012 *
3013 * Description: destroys a variable previously initialized
3014 * with lck_mtx_gate_init().
3015 *
3016 * Args:
3017 * Arg1: lck_mtx_t lock used to protect the gate.
3018 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3019 */
3020 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)3021 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3022 {
3023 (void) lock;
3024 gate_destroy(gate);
3025 }
3026
3027 /*
3028 * Name: lck_mtx_gate_free
3029 *
3030 * Description: destroys and tries to free a gate previously allocated
3031 * with lck_mtx_gate_alloc_init().
3032 * The gate free might be delegated to the last thread returning
3033 * from the gate_wait().
3034 *
3035 * Args:
3036 * Arg1: lck_mtx_t lock used to protect the gate.
3037 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3038 */
3039 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3040 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3041 {
3042 (void) lock;
3043 gate_free(gate);
3044 }
3045
3046 /*
3047 * Name: lck_mtx_gate_try_close
3048 *
3049 * Description: Tries to close the gate.
3050 * In case of success the current thread will be set as
3051 * the holder of the gate.
3052 *
3053 * Args:
3054 * Arg1: lck_mtx_t lock used to protect the gate.
3055 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3056 *
3057 * Conditions: Lock must be held. Returns with the lock held.
3058 *
3059 * Returns:
3060 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3061 * of the gate.
3062 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3063 * to wake up possible waiters on the gate before returning to userspace.
3064 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3065 * between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3066 *
3067 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3068 * lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3069 * The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3070 * be done without dropping the lock that is protecting the gate in between.
3071 */
3072 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3073 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3074 {
3075 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3076
3077 return gate_try_close(gate);
3078 }
3079
3080 /*
3081 * Name: lck_mtx_gate_close
3082 *
3083 * Description: Closes the gate. The current thread will be set as
3084 * the holder of the gate. Will panic if the gate is already closed.
3085 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3086 * to wake up possible waiters on the gate before returning to userspace.
3087 *
3088 * Args:
3089 * Arg1: lck_mtx_t lock used to protect the gate.
3090 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3091 *
3092 * Conditions: Lock must be held. Returns with the lock held.
3093 * The gate must be open.
3094 *
3095 */
3096 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3097 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3098 {
3099 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3100
3101 return gate_close(gate);
3102 }
3103
3104 /*
3105 * Name: lck_mtx_gate_open
3106 *
3107 * Description: Opens of the gate and wakes up possible waiters.
3108 *
3109 * Args:
3110 * Arg1: lck_mtx_t lock used to protect the gate.
3111 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3112 *
3113 * Conditions: Lock must be held. Returns with the lock held.
3114 * The current thread must be the holder of the gate.
3115 *
3116 */
3117 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3118 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3119 {
3120 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3121
3122 gate_open(gate);
3123 }
3124
3125 /*
3126 * Name: lck_mtx_gate_handoff
3127 *
3128 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3129 * priority will be selected as the new holder of the gate, and woken up,
3130 * with the gate remaining in the closed state throughout.
3131 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3132 * will be returned.
3133 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3134 * case no waiters were found.
3135 *
3136 *
3137 * Args:
3138 * Arg1: lck_mtx_t lock used to protect the gate.
3139 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3140 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3141 *
3142 * Conditions: Lock must be held. Returns with the lock held.
3143 * The current thread must be the holder of the gate.
3144 *
3145 * Returns:
3146 * KERN_SUCCESS in case one of the waiters became the new holder.
3147 * KERN_NOT_WAITING in case there were no waiters.
3148 *
3149 */
3150 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3151 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3152 {
3153 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3154
3155 return gate_handoff(gate, flags);
3156 }
3157
3158 /*
3159 * Name: lck_mtx_gate_steal
3160 *
3161 * Description: Steals the ownership of the gate. It sets the current thread as the
3162 * new holder of the gate.
3163 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3164 * to wake up possible waiters on the gate before returning to userspace.
3165 * NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3166 * anymore.
3167 *
3168 *
3169 * Args:
3170 * Arg1: lck_mtx_t lock used to protect the gate.
3171 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3172 *
3173 * Conditions: Lock must be held. Returns with the lock held.
3174 * The gate must be closed and the current thread must not already be the holder.
3175 *
3176 */
3177 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3178 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3179 {
3180 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3181
3182 gate_steal(gate);
3183 }
3184
3185 /*
3186 * Name: lck_mtx_gate_wait
3187 *
3188 * Description: Waits for the current thread to become the holder of the gate or for the
3189 * gate to become open. An interruptible mode and deadline can be specified
3190 * to return earlier from the wait.
3191 *
3192 * Args:
3193 * Arg1: lck_mtx_t lock used to protect the gate.
3194 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3195 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3196 * Arg3: interruptible flag for wait.
3197 * Arg4: deadline
3198 *
3199 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3200 * Lock will be dropped while waiting.
3201 * The gate must be closed.
3202 *
3203 * Returns: Reason why the thread was woken up.
3204 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3205 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3206 * to wake up possible waiters on the gate before returning to userspace.
3207 * GATE_OPENED - the gate was opened by the holder.
3208 * GATE_TIMED_OUT - the thread was woken up by a timeout.
3209 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
3210 */
3211 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3212 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3213 {
3214 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3215
3216 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3217 return gate_wait(gate,
3218 interruptible,
3219 deadline,
3220 ^{lck_mtx_unlock(lock);},
3221 ^{;});
3222 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3223 return gate_wait(gate,
3224 interruptible,
3225 deadline,
3226 ^{lck_mtx_unlock(lock);},
3227 ^{lck_mtx_lock_spin(lock);});
3228 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3229 return gate_wait(gate,
3230 interruptible,
3231 deadline,
3232 ^{lck_mtx_unlock(lock);},
3233 ^{lck_mtx_lock_spin_always(lock);});
3234 } else {
3235 return gate_wait(gate,
3236 interruptible,
3237 deadline,
3238 ^{lck_mtx_unlock(lock);},
3239 ^{lck_mtx_lock(lock);});
3240 }
3241 }
3242
3243 /*
3244 * Name: lck_mtx_gate_assert
3245 *
3246 * Description: asserts that the gate is in the specified state.
3247 *
3248 * Args:
3249 * Arg1: lck_mtx_t lock used to protect the gate.
3250 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3251 * Arg3: flags to specified assert type.
3252 * GATE_ASSERT_CLOSED - the gate is currently closed
3253 * GATE_ASSERT_OPEN - the gate is currently opened
3254 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3255 */
3256 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3257 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3258 {
3259 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3260
3261 gate_assert(gate, flags);
3262 }
3263
3264 #pragma mark - LCK_*_DECLARE support
3265
3266 __startup_func
3267 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3268 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3269 {
3270 lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3271 }
3272
3273 __startup_func
3274 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3275 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3276 {
3277 lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3278 }
3279
3280 __startup_func
3281 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3282 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3283 {
3284 lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3285 }
3286
3287 __startup_func
3288 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3289 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3290 {
3291 simple_lock_init(sp->lck, sp->lck_init_arg);
3292 }
3293
3294 __startup_func
3295 void
lck_ticket_startup_init(struct lck_ticket_startup_spec * sp)3296 lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3297 {
3298 lck_ticket_init(sp->lck, sp->lck_grp);
3299 }
3300