xref: /xnu-10002.81.5/osfmk/kern/locks.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 
57 #define LOCK_PRIVATE 1
58 
59 #include <mach_ldebug.h>
60 #include <debug.h>
61 
62 #include <mach/kern_return.h>
63 
64 #include <kern/locks_internal.h>
65 #include <kern/lock_stat.h>
66 #include <kern/locks.h>
67 #include <kern/misc_protos.h>
68 #include <kern/zalloc.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/sched_prim.h>
72 #include <kern/debug.h>
73 #include <libkern/section_keywords.h>
74 #if defined(__x86_64__)
75 #include <i386/tsc.h>
76 #include <i386/machine_routines.h>
77 #endif
78 #include <machine/atomic.h>
79 #include <machine/machine_cpu.h>
80 #include <string.h>
81 #include <vm/pmap.h>
82 
83 #include <sys/kdebug.h>
84 
85 #define LCK_MTX_SLEEP_CODE              0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
87 #define LCK_MTX_LCK_WAIT_CODE           2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
89 
90 // Panic in tests that check lock usage correctness
91 // These are undesirable when in a panic or a debugger is runnning.
92 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93 
94 #if MACH_LDEBUG
95 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96 #else
97 #define ALIGN_TEST(p, t) do{}while(0)
98 #endif
99 
100 #define NOINLINE                __attribute__((noinline))
101 
102 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104 
105 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106 
107 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109 
110 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111 
112 struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113 
114 #if CONFIG_PV_TICKET
115 SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; /* used by waitq.py */
116 #endif
117 
118 #if DEBUG
119 TUNABLE(uint32_t, LcksOpts, "lcks", LCK_OPTION_ENABLE_DEBUG);
120 #else
121 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
122 #endif
123 
124 #if CONFIG_DTRACE
125 #if defined (__x86_64__)
126 machine_timeout_t dtrace_spin_threshold = 500; // 500ns
127 #elif defined(__arm64__)
128 MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129     0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130 #endif
131 #endif
132 
133 struct lck_mcs PERCPU_DATA(lck_mcs);
134 
135 __kdebug_only
136 uintptr_t
unslide_for_kdebug(const void * object)137 unslide_for_kdebug(const void* object)
138 {
139 	if (__improbable(kdebug_enable)) {
140 		return VM_KERNEL_UNSLIDE_OR_PERM(object);
141 	} else {
142 		return 0;
143 	}
144 }
145 
146 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)147 __lck_require_preemption_disabled_panic(void *lock)
148 {
149 	panic("Attempt to take no-preempt lock %p in preemptible context", lock);
150 }
151 
152 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)153 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
154 {
155 	if (__improbable(!lock_preemption_disabled_for_thread(self))) {
156 		__lck_require_preemption_disabled_panic(lock);
157 	}
158 }
159 
160 #pragma mark - HW Spin policies
161 
162 /*
163  * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
164  */
165 __attribute__((always_inline))
166 hw_spin_timeout_t
hw_spin_compute_timeout(hw_spin_policy_t pol)167 hw_spin_compute_timeout(hw_spin_policy_t pol)
168 {
169 	hw_spin_timeout_t ret = {
170 		.hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
171 	};
172 
173 	ret.hwst_timeout <<= pol->hwsp_timeout_shift;
174 #if SCHED_HYGIENE_DEBUG
175 	ret.hwst_in_ppl = pmap_in_ppl();
176 	/* Note we can't check if we are interruptible if in ppl */
177 	ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
178 #endif /* SCHED_HYGIENE_DEBUG */
179 
180 #if SCHED_HYGIENE_DEBUG
181 #ifndef KASAN
182 	if (ret.hwst_timeout > 0 &&
183 	    !ret.hwst_in_ppl &&
184 	    !ret.hwst_interruptible &&
185 	    interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
186 		uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
187 
188 #if defined(__x86_64__)
189 		int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
190 #endif
191 		if (int_timeout < ret.hwst_timeout) {
192 			ret.hwst_timeout = int_timeout;
193 		}
194 	}
195 #endif /* !KASAN */
196 #endif /* SCHED_HYGIENE_DEBUG */
197 
198 	return ret;
199 }
200 
201 __attribute__((always_inline))
202 bool
hw_spin_in_ppl(hw_spin_timeout_t to)203 hw_spin_in_ppl(hw_spin_timeout_t to)
204 {
205 #if SCHED_HYGIENE_DEBUG
206 	return to.hwst_in_ppl;
207 #else
208 	(void)to;
209 	return pmap_in_ppl();
210 #endif
211 }
212 
213 bool
hw_spin_should_keep_spinning(void * lock,hw_spin_policy_t pol,hw_spin_timeout_t to,hw_spin_state_t * state)214 hw_spin_should_keep_spinning(
215 	void                   *lock,
216 	hw_spin_policy_t        pol,
217 	hw_spin_timeout_t       to,
218 	hw_spin_state_t        *state)
219 {
220 	hw_spin_timeout_status_t rc;
221 #if SCHED_HYGIENE_DEBUG
222 	uint64_t irq_time = 0;
223 #endif
224 	uint64_t now;
225 
226 	if (__improbable(to.hwst_timeout == 0)) {
227 		return true;
228 	}
229 
230 	now = ml_get_timebase();
231 	if (__probable(now < state->hwss_deadline)) {
232 		/* keep spinning */
233 		return true;
234 	}
235 
236 #if SCHED_HYGIENE_DEBUG
237 	if (to.hwst_interruptible) {
238 		irq_time = current_thread()->machine.int_time_mt;
239 	}
240 #endif /* SCHED_HYGIENE_DEBUG */
241 
242 	if (__probable(state->hwss_deadline == 0)) {
243 		state->hwss_start     = now;
244 		state->hwss_deadline  = now + to.hwst_timeout;
245 #if SCHED_HYGIENE_DEBUG
246 		state->hwss_irq_start = irq_time;
247 #endif
248 		return true;
249 	}
250 
251 	/*
252 	 * Update fields that the callback needs
253 	 */
254 	state->hwss_now     = now;
255 #if SCHED_HYGIENE_DEBUG
256 	state->hwss_irq_end = irq_time;
257 #endif /* SCHED_HYGIENE_DEBUG */
258 
259 	rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
260 	    to, *state);
261 	if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
262 		/* push the deadline */
263 		state->hwss_deadline += to.hwst_timeout;
264 	}
265 	return rc == HW_LOCK_TIMEOUT_CONTINUE;
266 }
267 
268 __attribute__((always_inline))
269 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)270 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
271 {
272 #if DEBUG || DEVELOPMENT
273 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
274 #else
275 	(void)owner;
276 #endif
277 }
278 
279 __attribute__((always_inline))
280 void
lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)281 lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
282 {
283 #if DEBUG || DEVELOPMENT
284 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
285 	    (uintptr_t)ctid_get_thread_unsafe(ctid);
286 #else
287 	(void)ctid;
288 #endif
289 }
290 
291 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)292 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
293 {
294 	lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
295 
296 	if (owner < (1u << CTID_SIZE_BIT)) {
297 		owner = (uintptr_t)ctid_get_thread_unsafe((uint32_t)owner);
298 	} else {
299 		/* strip possible bits used by the lock implementations */
300 		owner &= ~0x7ul;
301 	}
302 
303 	lsti->lock = lck;
304 	lsti->owner_thread_cur = owner;
305 	lsti->owner_cpu = ~0u;
306 	os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
307 
308 	if (owner == 0) {
309 		/* if the owner isn't known, just bail */
310 		goto out;
311 	}
312 
313 	for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
314 		cpu_data_t *data = cpu_datap(i);
315 		if (data && (uintptr_t)data->cpu_active_thread == owner) {
316 			lsti->owner_cpu = i;
317 			os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
318 #if __x86_64__
319 			if ((uint32_t)cpu_number() != i) {
320 				/* Cause NMI and panic on the owner's cpu */
321 				NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
322 			}
323 #endif
324 			break;
325 		}
326 	}
327 
328 out:
329 	return lsti;
330 }
331 
332 #pragma mark - HW locks
333 
334 /*
335  * Routine:	hw_lock_init
336  *
337  *	Initialize a hardware lock.
338  */
339 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)340 hw_lock_init(hw_lock_t lock)
341 {
342 	ordered_store_hw(lock, 0);
343 }
344 
345 __result_use_check
346 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)347 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
348 {
349 #if OS_ATOMIC_USE_LLSC
350 	uintptr_t oldval;
351 	os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
352 		if (oldval != 0) {
353 		        wait_for_event(); // clears the monitor so we don't need give_up()
354 		        return false;
355 		}
356 	});
357 	return true;
358 #else // !OS_ATOMIC_USE_LLSC
359 #if OS_ATOMIC_HAS_LLSC
360 	uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
361 	if (oldval != 0) {
362 		wait_for_event(); // clears the monitor so we don't need give_up()
363 		return false;
364 	}
365 #endif
366 	return lock_cmpxchg(&lock->lock_data, 0, newval, acquire);
367 #endif // !OS_ATOMIC_USE_LLSC
368 }
369 
370 __result_use_check
371 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)372 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
373 {
374 	uint32_t mask = 1u << bit;
375 
376 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
377 	uint32_t oldval, newval;
378 	os_atomic_rmw_loop(target, oldval, newval, acquire, {
379 		newval = oldval | mask;
380 		if (__improbable(oldval & mask)) {
381 #if OS_ATOMIC_HAS_LLSC
382 		        if (wait) {
383 		                wait_for_event(); // clears the monitor so we don't need give_up()
384 			} else {
385 		                os_atomic_clear_exclusive();
386 			}
387 #else
388 		        if (wait) {
389 		                cpu_pause();
390 			}
391 #endif
392 		        return false;
393 		}
394 	});
395 	return true;
396 #else
397 	uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
398 	if (__improbable(oldval & mask)) {
399 		if (wait) {
400 			wait_for_event(); // clears the monitor so we don't need give_up()
401 		} else {
402 			os_atomic_clear_exclusive();
403 		}
404 		return false;
405 	}
406 	return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
407 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
408 }
409 
410 static hw_spin_timeout_status_t
hw_spin_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)411 hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
412 {
413 	hw_lock_t lock  = _lock;
414 	uintptr_t owner = lock->lock_data & ~0x7ul;
415 	lck_spinlock_to_info_t lsti;
416 
417 	if (!spinlock_timeout_panic) {
418 		/* keep spinning rather than panicing */
419 		return HW_LOCK_TIMEOUT_CONTINUE;
420 	}
421 
422 	if (pmap_in_ppl()) {
423 		/*
424 		 * This code is used by the PPL and can't write to globals.
425 		 */
426 		panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
427 		    "current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
428 		    lock, HW_SPIN_TIMEOUT_ARG(to, st),
429 		    (void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
430 	}
431 
432 	// Capture the actual time spent blocked, which may be higher than the timeout
433 	// if a misbehaving interrupt stole this thread's CPU time.
434 	lsti = lck_spinlock_timeout_hit(lock, owner);
435 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
436 	    "current owner: %p (on cpu %d), "
437 #if DEBUG || DEVELOPMENT
438 	    "initial owner: %p, "
439 #endif /* DEBUG || DEVELOPMENT */
440 	    HW_SPIN_TIMEOUT_DETAILS_FMT,
441 	    lock, HW_SPIN_TIMEOUT_ARG(to, st),
442 	    (void *)lsti->owner_thread_cur, lsti->owner_cpu,
443 #if DEBUG || DEVELOPMENT
444 	    (void *)lsti->owner_thread_orig,
445 #endif /* DEBUG || DEVELOPMENT */
446 	    HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
447 }
448 
449 const struct hw_spin_policy hw_lock_spin_policy = {
450 	.hwsp_name              = "hw_lock_t",
451 	.hwsp_timeout_atomic    = &lock_panic_timeout,
452 	.hwsp_op_timeout        = hw_spin_timeout_panic,
453 };
454 
455 static hw_spin_timeout_status_t
hw_spin_always_return(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)456 hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
457 {
458 #pragma unused(_lock, to, st)
459 	return HW_LOCK_TIMEOUT_RETURN;
460 }
461 
462 const struct hw_spin_policy hw_lock_spin_panic_policy = {
463 	.hwsp_name              = "hw_lock_t[panic]",
464 #if defined(__x86_64__)
465 	.hwsp_timeout           = &LockTimeOutTSC,
466 #else
467 	.hwsp_timeout_atomic    = &LockTimeOut,
468 #endif
469 	.hwsp_timeout_shift     = 2,
470 	.hwsp_op_timeout        = hw_spin_always_return,
471 };
472 
473 #if DEBUG || DEVELOPMENT
474 static machine_timeout_t hw_lock_test_to;
475 const struct hw_spin_policy hw_lock_test_give_up_policy = {
476 	.hwsp_name              = "testing policy",
477 #if defined(__x86_64__)
478 	.hwsp_timeout           = &LockTimeOutTSC,
479 #else
480 	.hwsp_timeout_atomic    = &LockTimeOut,
481 #endif
482 	.hwsp_timeout_shift     = 2,
483 	.hwsp_op_timeout        = hw_spin_always_return,
484 };
485 
486 __startup_func
487 static void
hw_lock_test_to_init(void)488 hw_lock_test_to_init(void)
489 {
490 	uint64_t timeout;
491 
492 	nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &timeout);
493 #if defined(__x86_64__)
494 	timeout = tmrCvt(timeout, tscFCvtn2t);
495 #endif
496 	os_atomic_init(&hw_lock_test_to, timeout);
497 }
498 STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
499 #endif
500 
501 static hw_spin_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)502 hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
503 {
504 	hw_lock_bit_t *lock = _lock;
505 
506 	if (!spinlock_timeout_panic) {
507 		/* keep spinning rather than panicing */
508 		return HW_LOCK_TIMEOUT_CONTINUE;
509 	}
510 
511 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
512 	    "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
513 	    lock, HW_SPIN_TIMEOUT_ARG(to, st),
514 	    *lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
515 }
516 
517 static const struct hw_spin_policy hw_lock_bit_policy = {
518 	.hwsp_name              = "hw_lock_bit_t",
519 	.hwsp_timeout_atomic    = &lock_panic_timeout,
520 	.hwsp_op_timeout        = hw_lock_bit_timeout_panic,
521 };
522 
523 #if __arm64__
524 const uint64_t hw_lock_bit_timeout_2s = 0x3000000;
525 const struct hw_spin_policy hw_lock_bit_policy_2s = {
526 	.hwsp_name              = "hw_lock_bit_t",
527 	.hwsp_timeout           = &hw_lock_bit_timeout_2s,
528 	.hwsp_op_timeout        = hw_lock_bit_timeout_panic,
529 };
530 #endif
531 
532 /*
533  *	Routine: hw_lock_lock_contended
534  *
535  *	Spin until lock is acquired or timeout expires.
536  *	timeout is in mach_absolute_time ticks. Called with
537  *	preemption disabled.
538  */
539 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,uintptr_t data,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))540 hw_lock_lock_contended(
541 	hw_lock_t               lock,
542 	uintptr_t               data,
543 	hw_spin_policy_t        pol
544 	LCK_GRP_ARG(lck_grp_t *grp))
545 {
546 	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
547 	hw_spin_state_t   state = { };
548 	hw_lock_status_t  rc = HW_LOCK_CONTENDED;
549 
550 	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
551 	    HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
552 		panic("hwlock: thread %p is trying to lock %p recursively",
553 		    HW_LOCK_STATE_TO_THREAD(data), lock);
554 	}
555 
556 #if CONFIG_DTRACE || LOCK_STATS
557 	uint64_t begin = 0;
558 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
559 
560 	if (__improbable(stat_enabled)) {
561 		begin = mach_absolute_time();
562 	}
563 #endif /* CONFIG_DTRACE || LOCK_STATS */
564 
565 	if (!hw_spin_in_ppl(to)) {
566 		/*
567 		 * This code is used by the PPL and can't write to globals.
568 		 */
569 		lck_spinlock_timeout_set_orig_owner(lock->lock_data);
570 	}
571 
572 	do {
573 		for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
574 			cpu_pause();
575 			if (hw_lock_trylock_contended(lock, data)) {
576 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
577 				rc = HW_LOCK_ACQUIRED;
578 				goto end;
579 			}
580 		}
581 	} while (hw_spin_should_keep_spinning(lock, pol, to, &state));
582 
583 end:
584 #if CONFIG_DTRACE || LOCK_STATS
585 	if (__improbable(stat_enabled)) {
586 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
587 		    mach_absolute_time() - begin);
588 	}
589 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
590 #endif /* CONFIG_DTRACE || LOCK_STATS */
591 	return rc;
592 }
593 
594 static hw_spin_timeout_status_t
hw_wait_while_equals32_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)595 hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
596 {
597 	uint32_t *address = _lock;
598 
599 	if (!spinlock_timeout_panic) {
600 		/* keep spinning rather than panicing */
601 		return HW_LOCK_TIMEOUT_CONTINUE;
602 	}
603 
604 	panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
605 	    "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
606 	    address, HW_SPIN_TIMEOUT_ARG(to, st),
607 	    *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
608 }
609 
610 static const struct hw_spin_policy hw_wait_while_equals32_policy = {
611 	.hwsp_name              = "hw_wait_while_equals32",
612 	.hwsp_timeout_atomic    = &lock_panic_timeout,
613 	.hwsp_op_timeout        = hw_wait_while_equals32_panic,
614 };
615 
616 static hw_spin_timeout_status_t
hw_wait_while_equals64_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)617 hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
618 {
619 	uint64_t *address = _lock;
620 
621 	if (!spinlock_timeout_panic) {
622 		/* keep spinning rather than panicing */
623 		return HW_LOCK_TIMEOUT_CONTINUE;
624 	}
625 
626 	panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
627 	    "current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
628 	    address, HW_SPIN_TIMEOUT_ARG(to, st),
629 	    *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
630 }
631 
632 static const struct hw_spin_policy hw_wait_while_equals64_policy = {
633 	.hwsp_name              = "hw_wait_while_equals64",
634 	.hwsp_timeout_atomic    = &lock_panic_timeout,
635 	.hwsp_op_timeout        = hw_wait_while_equals64_panic,
636 };
637 
638 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)639 hw_wait_while_equals32(uint32_t *address, uint32_t current)
640 {
641 	hw_spin_policy_t  pol   = &hw_wait_while_equals32_policy;
642 	hw_spin_timeout_t to    = hw_spin_compute_timeout(pol);
643 	hw_spin_state_t   state = { };
644 	uint32_t          v;
645 
646 	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
647 		hw_spin_should_keep_spinning(address, pol, to, &state);
648 	}
649 
650 	return v;
651 }
652 
653 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)654 hw_wait_while_equals64(uint64_t *address, uint64_t current)
655 {
656 	hw_spin_policy_t  pol   = &hw_wait_while_equals64_policy;
657 	hw_spin_timeout_t to    = hw_spin_compute_timeout(pol);
658 	hw_spin_state_t   state = { };
659 	uint64_t          v;
660 
661 	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
662 		hw_spin_should_keep_spinning(address, pol, to, &state);
663 	}
664 
665 	return v;
666 }
667 
668 __result_use_check
669 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))670 hw_lock_to_internal(
671 	hw_lock_t               lock,
672 	thread_t                thread,
673 	hw_spin_policy_t        pol
674 	LCK_GRP_ARG(lck_grp_t *grp))
675 {
676 	uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
677 
678 	if (__probable(hw_lock_trylock_contended(lock, state))) {
679 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
680 		return HW_LOCK_ACQUIRED;
681 	}
682 
683 	return hw_lock_lock_contended(lock, state, pol LCK_GRP_ARG(grp));
684 }
685 
686 /*
687  *	Routine: hw_lock_lock
688  *
689  *	Acquire lock, spinning until it becomes available,
690  *	return with preemption disabled.
691  */
692 void
693 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
694 {
695 	thread_t thread = current_thread();
696 	lock_disable_preemption_for_thread(thread);
697 	(void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
698 	    LCK_GRP_ARG(grp));
699 }
700 
701 /*
702  *	Routine: hw_lock_lock_nopreempt
703  *
704  *	Acquire lock, spinning until it becomes available.
705  */
706 void
707 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
708 {
709 	thread_t thread = current_thread();
710 	__lck_require_preemption_disabled(lock, thread);
711 	(void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
712 	    LCK_GRP_ARG(grp));
713 }
714 
715 /*
716  *	Routine: hw_lock_to
717  *
718  *	Acquire lock, spinning until it becomes available or timeout.
719  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
720  *	preemption disabled.
721  */
722 unsigned
723 int
724 (hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
725 {
726 	thread_t thread = current_thread();
727 	lock_disable_preemption_for_thread(thread);
728 	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
729 }
730 
731 /*
732  *	Routine: hw_lock_to_nopreempt
733  *
734  *	Acquire lock, spinning until it becomes available or timeout.
735  *	Timeout is in mach_absolute_time ticks, called and return with
736  *	preemption disabled.
737  */
738 unsigned
739 int
740 (hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
741 {
742 	thread_t thread = current_thread();
743 	__lck_require_preemption_disabled(lock, thread);
744 	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
745 }
746 
747 __result_use_check
748 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))749 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
750 {
751 	if (__probable(lock_cmpxchg(&lock->lock_data, 0,
752 	    HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
753 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
754 		return true;
755 	}
756 	return false;
757 }
758 
759 /*
760  *	Routine: hw_lock_try
761  *
762  *	returns with preemption disabled on success.
763  */
764 unsigned
765 int
766 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
767 {
768 	thread_t thread = current_thread();
769 	lock_disable_preemption_for_thread(thread);
770 	unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
771 	if (!success) {
772 		lock_enable_preemption();
773 	}
774 	return success;
775 }
776 
777 unsigned
778 int
779 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
780 {
781 	thread_t thread = current_thread();
782 	__lck_require_preemption_disabled(lock, thread);
783 	return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
784 }
785 
786 #if DEBUG || DEVELOPMENT
787 __abortlike
788 static void
__hw_lock_unlock_unowned_panic(hw_lock_t lock)789 __hw_lock_unlock_unowned_panic(hw_lock_t lock)
790 {
791 	panic("hwlock: thread %p is trying to lock %p recursively",
792 	    current_thread(), lock);
793 }
794 #endif /* DEBUG || DEVELOPMENT */
795 
796 /*
797  *	Routine: hw_lock_unlock
798  *
799  *	Unconditionally release lock, release preemption level.
800  */
801 static inline void
hw_lock_unlock_internal(hw_lock_t lock)802 hw_lock_unlock_internal(hw_lock_t lock)
803 {
804 #if DEBUG || DEVELOPMENT
805 	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
806 	    LOCK_CORRECTNESS_PANIC()) {
807 		__hw_lock_unlock_unowned_panic(lock);
808 	}
809 #endif /* DEBUG || DEVELOPMENT */
810 
811 	os_atomic_store(&lock->lock_data, 0, release);
812 #if     CONFIG_DTRACE
813 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
814 #endif /* CONFIG_DTRACE */
815 }
816 
817 void
818 (hw_lock_unlock)(hw_lock_t lock)
819 {
820 	hw_lock_unlock_internal(lock);
821 	lock_enable_preemption();
822 }
823 
824 void
825 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
826 {
827 	hw_lock_unlock_internal(lock);
828 }
829 
830 void
hw_lock_assert(__assert_only hw_lock_t lock,__assert_only unsigned int type)831 hw_lock_assert(__assert_only hw_lock_t lock, __assert_only unsigned int type)
832 {
833 #if MACH_ASSERT
834 	thread_t thread, holder;
835 
836 	holder = HW_LOCK_STATE_TO_THREAD(lock->lock_data);
837 	thread = current_thread();
838 
839 	if (type == LCK_ASSERT_OWNED) {
840 		if (holder == 0) {
841 			panic("Lock not owned %p = %p", lock, holder);
842 		}
843 		if (holder != thread) {
844 			panic("Lock not owned by current thread %p = %p", lock, holder);
845 		}
846 	} else if (type == LCK_ASSERT_NOTOWNED) {
847 		if (holder != THREAD_NULL && holder == thread) {
848 			panic("Lock owned by current thread %p = %p", lock, holder);
849 		}
850 	} else {
851 		panic("hw_lock_assert(): invalid arg (%u)", type);
852 	}
853 #endif /* MACH_ASSERT */
854 }
855 
856 /*
857  *	Routine hw_lock_held, doesn't change preemption state.
858  *	N.B.  Racy, of course.
859  */
860 unsigned int
hw_lock_held(hw_lock_t lock)861 hw_lock_held(hw_lock_t lock)
862 {
863 	return ordered_load_hw(lock) != 0;
864 }
865 
866 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))867 hw_lock_bit_to_contended(
868 	hw_lock_bit_t          *lock,
869 	uint32_t                bit,
870 	hw_spin_policy_t        pol
871 	LCK_GRP_ARG(lck_grp_t *grp))
872 {
873 	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
874 	hw_spin_state_t   state = { };
875 	hw_lock_status_t  rc = HW_LOCK_CONTENDED;
876 
877 #if CONFIG_DTRACE || LOCK_STATS
878 	uint64_t begin = 0;
879 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
880 
881 	if (__improbable(stat_enabled)) {
882 		begin = mach_absolute_time();
883 	}
884 #endif /* LOCK_STATS || CONFIG_DTRACE */
885 
886 	do {
887 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
888 			rc = hw_lock_trylock_bit(lock, bit, true);
889 
890 			if (rc == HW_LOCK_ACQUIRED) {
891 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
892 				goto end;
893 			}
894 		}
895 
896 		assert(rc == HW_LOCK_CONTENDED);
897 	} while (hw_spin_should_keep_spinning(lock, pol, to, &state));
898 
899 end:
900 #if CONFIG_DTRACE || LOCK_STATS
901 	if (__improbable(stat_enabled)) {
902 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
903 		    mach_absolute_time() - begin);
904 	}
905 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
906 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
907 	return rc;
908 }
909 
910 __result_use_check
911 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))912 hw_lock_bit_to_internal(
913 	hw_lock_bit_t          *lock,
914 	unsigned int            bit,
915 	hw_spin_policy_t        pol
916 	LCK_GRP_ARG(lck_grp_t *grp))
917 {
918 	if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
919 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
920 		return HW_LOCK_ACQUIRED;
921 	}
922 
923 	return (unsigned)hw_lock_bit_to_contended(lock, bit, pol LCK_GRP_ARG(grp));
924 }
925 
926 /*
927  *	Routine: hw_lock_bit_to
928  *
929  *	Acquire bit lock, spinning until it becomes available or timeout.
930  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
931  *	preemption disabled.
932  */
933 unsigned
934 int
935 (hw_lock_bit_to)(
936 	hw_lock_bit_t          * lock,
937 	uint32_t                bit,
938 	hw_spin_policy_t        pol
939 	LCK_GRP_ARG(lck_grp_t *grp))
940 {
941 	_disable_preemption();
942 	return hw_lock_bit_to_internal(lock, bit, pol LCK_GRP_ARG(grp));
943 }
944 
945 /*
946  *	Routine: hw_lock_bit
947  *
948  *	Acquire bit lock, spinning until it becomes available,
949  *	return with preemption disabled.
950  */
951 void
952 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
953 {
954 	_disable_preemption();
955 	(void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
956 }
957 
958 /*
959  *	Routine: hw_lock_bit_nopreempt
960  *
961  *	Acquire bit lock, spinning until it becomes available.
962  */
963 void
964 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
965 {
966 	__lck_require_preemption_disabled(lock, current_thread());
967 	(void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
968 }
969 
970 
971 unsigned
972 int
973 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
974 {
975 	boolean_t success = false;
976 
977 	_disable_preemption();
978 	success = hw_lock_trylock_bit(lock, bit, false);
979 	if (!success) {
980 		lock_enable_preemption();
981 	}
982 
983 	if (success) {
984 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
985 	}
986 
987 	return success;
988 }
989 
990 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)991 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
992 {
993 	os_atomic_andnot(lock, 1u << bit, release);
994 #if CONFIG_DTRACE
995 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
996 #endif
997 }
998 
999 /*
1000  *	Routine:	hw_unlock_bit
1001  *
1002  *		Release spin-lock. The second parameter is the bit number to test and set.
1003  *		Decrement the preemption level.
1004  */
1005 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)1006 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1007 {
1008 	hw_unlock_bit_internal(lock, bit);
1009 	lock_enable_preemption();
1010 }
1011 
1012 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)1013 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1014 {
1015 	__lck_require_preemption_disabled(lock, current_thread());
1016 	hw_unlock_bit_internal(lock, bit);
1017 }
1018 
1019 
1020 #pragma mark - lck_*_sleep
1021 
1022 /*
1023  * Routine:	lck_spin_sleep
1024  */
1025 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1026 lck_spin_sleep_grp(
1027 	lck_spin_t              *lck,
1028 	lck_sleep_action_t      lck_sleep_action,
1029 	event_t                 event,
1030 	wait_interrupt_t        interruptible,
1031 	lck_grp_t               *grp)
1032 {
1033 	wait_result_t   res;
1034 
1035 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1036 		panic("Invalid lock sleep action %x", lck_sleep_action);
1037 	}
1038 
1039 	res = assert_wait(event, interruptible);
1040 	if (res == THREAD_WAITING) {
1041 		lck_spin_unlock(lck);
1042 		res = thread_block(THREAD_CONTINUE_NULL);
1043 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1044 			lck_spin_lock_grp(lck, grp);
1045 		}
1046 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1047 		lck_spin_unlock(lck);
1048 	}
1049 
1050 	return res;
1051 }
1052 
1053 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1054 lck_spin_sleep(
1055 	lck_spin_t              *lck,
1056 	lck_sleep_action_t      lck_sleep_action,
1057 	event_t                 event,
1058 	wait_interrupt_t        interruptible)
1059 {
1060 	return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1061 }
1062 
1063 /*
1064  * Routine:	lck_spin_sleep_deadline
1065  */
1066 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1067 lck_spin_sleep_deadline(
1068 	lck_spin_t              *lck,
1069 	lck_sleep_action_t      lck_sleep_action,
1070 	event_t                 event,
1071 	wait_interrupt_t        interruptible,
1072 	uint64_t                deadline)
1073 {
1074 	wait_result_t   res;
1075 
1076 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1077 		panic("Invalid lock sleep action %x", lck_sleep_action);
1078 	}
1079 
1080 	res = assert_wait_deadline(event, interruptible, deadline);
1081 	if (res == THREAD_WAITING) {
1082 		lck_spin_unlock(lck);
1083 		res = thread_block(THREAD_CONTINUE_NULL);
1084 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1085 			lck_spin_lock(lck);
1086 		}
1087 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1088 		lck_spin_unlock(lck);
1089 	}
1090 
1091 	return res;
1092 }
1093 
1094 /*
1095  * Routine:	lck_mtx_sleep
1096  */
1097 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1098 lck_mtx_sleep(
1099 	lck_mtx_t               *lck,
1100 	lck_sleep_action_t      lck_sleep_action,
1101 	event_t                 event,
1102 	wait_interrupt_t        interruptible)
1103 {
1104 	wait_result_t           res;
1105 	thread_pri_floor_t      token;
1106 
1107 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1108 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1109 
1110 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1111 		panic("Invalid lock sleep action %x", lck_sleep_action);
1112 	}
1113 
1114 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1115 		/*
1116 		 * We get a priority floor
1117 		 * during the time that this thread is asleep, so that when it
1118 		 * is re-awakened (and not yet contending on the mutex), it is
1119 		 * runnable at a reasonably high priority.
1120 		 */
1121 		token = thread_priority_floor_start();
1122 	}
1123 
1124 	res = assert_wait(event, interruptible);
1125 	if (res == THREAD_WAITING) {
1126 		lck_mtx_unlock(lck);
1127 		res = thread_block(THREAD_CONTINUE_NULL);
1128 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1129 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1130 				lck_mtx_lock_spin(lck);
1131 			} else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1132 				lck_mtx_lock_spin_always(lck);
1133 			} else {
1134 				lck_mtx_lock(lck);
1135 			}
1136 		}
1137 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1138 		lck_mtx_unlock(lck);
1139 	}
1140 
1141 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1142 		thread_priority_floor_end(&token);
1143 	}
1144 
1145 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1146 
1147 	return res;
1148 }
1149 
1150 
1151 /*
1152  * Routine:	lck_mtx_sleep_deadline
1153  */
1154 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1155 lck_mtx_sleep_deadline(
1156 	lck_mtx_t               *lck,
1157 	lck_sleep_action_t      lck_sleep_action,
1158 	event_t                 event,
1159 	wait_interrupt_t        interruptible,
1160 	uint64_t                deadline)
1161 {
1162 	wait_result_t           res;
1163 	thread_pri_floor_t      token;
1164 
1165 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1166 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1167 
1168 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1169 		panic("Invalid lock sleep action %x", lck_sleep_action);
1170 	}
1171 
1172 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1173 		/*
1174 		 * See lck_mtx_sleep().
1175 		 */
1176 		token = thread_priority_floor_start();
1177 	}
1178 
1179 	res = assert_wait_deadline(event, interruptible, deadline);
1180 	if (res == THREAD_WAITING) {
1181 		lck_mtx_unlock(lck);
1182 		res = thread_block(THREAD_CONTINUE_NULL);
1183 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1184 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1185 				lck_mtx_lock_spin(lck);
1186 			} else {
1187 				lck_mtx_lock(lck);
1188 			}
1189 		}
1190 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1191 		lck_mtx_unlock(lck);
1192 	}
1193 
1194 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1195 		thread_priority_floor_end(&token);
1196 	}
1197 
1198 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1199 
1200 	return res;
1201 }
1202 
1203 /*
1204  * sleep_with_inheritor and wakeup_with_inheritor KPI
1205  *
1206  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1207  * the latest thread specified as inheritor.
1208  *
1209  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1210  * direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1211  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1212  *
1213  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1214  *
1215  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1216  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1217  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1218  * invoking any turnstile operation.
1219  *
1220  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1221  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1222  * is instantiated for this KPI to manage the hash without interrupt disabled.
1223  * Also:
1224  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1225  * - every event will have a dedicated wait_queue.
1226  *
1227  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1228  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1229  */
1230 
1231 
1232 typedef enum {
1233 	LCK_WAKEUP_THREAD,
1234 	LCK_WAKEUP_ONE,
1235 	LCK_WAKEUP_ALL
1236 } lck_wakeup_type_t;
1237 
1238 static kern_return_t
wakeup_with_inheritor_and_turnstile(event_t event,wait_result_t result,lck_wakeup_type_t wake_type,lck_wake_action_t action,thread_t * thread_wokenup)1239 wakeup_with_inheritor_and_turnstile(
1240 	event_t                 event,
1241 	wait_result_t           result,
1242 	lck_wakeup_type_t       wake_type,
1243 	lck_wake_action_t       action,
1244 	thread_t               *thread_wokenup)
1245 {
1246 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1247 	uint32_t index;
1248 	struct turnstile *ts = NULL;
1249 	kern_return_t ret = KERN_NOT_WAITING;
1250 
1251 	/*
1252 	 * the hash bucket spinlock is used as turnstile interlock
1253 	 */
1254 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1255 
1256 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1257 
1258 	switch (wake_type) {
1259 	case LCK_WAKEUP_ONE: {
1260 		waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1261 
1262 		if (action == LCK_WAKE_DEFAULT) {
1263 			flags = WAITQ_UPDATE_INHERITOR;
1264 		} else {
1265 			assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1266 		}
1267 
1268 		/*
1269 		 * WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1270 		 * if it finds a thread
1271 		 */
1272 		if (thread_wokenup) {
1273 			thread_t wokeup;
1274 
1275 			wokeup = waitq_wakeup64_identify(&ts->ts_waitq,
1276 			    CAST_EVENT64_T(event), result, flags);
1277 			*thread_wokenup = wokeup;
1278 			ret = wokeup ? KERN_SUCCESS : KERN_NOT_WAITING;
1279 		} else {
1280 			ret = waitq_wakeup64_one(&ts->ts_waitq,
1281 			    CAST_EVENT64_T(event), result, flags);
1282 		}
1283 		if (ret == KERN_SUCCESS && action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1284 			goto complete;
1285 		}
1286 		if (ret == KERN_NOT_WAITING) {
1287 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
1288 			    TURNSTILE_IMMEDIATE_UPDATE);
1289 		}
1290 		break;
1291 	}
1292 	case LCK_WAKEUP_ALL: {
1293 		ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event),
1294 		    result, WAITQ_UPDATE_INHERITOR);
1295 		break;
1296 	}
1297 	case LCK_WAKEUP_THREAD: {
1298 		assert(thread_wokenup);
1299 		ret = waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T(event),
1300 		    *thread_wokenup, result);
1301 		break;
1302 	}
1303 	}
1304 
1305 	/*
1306 	 * turnstile_update_inheritor_complete could be called while holding the interlock.
1307 	 * In this case the new inheritor or is null, or is a thread that is just been woken up
1308 	 * and have not blocked because it is racing with the same interlock used here
1309 	 * after the wait.
1310 	 * So there is no chain to update for the new inheritor.
1311 	 *
1312 	 * However unless the current thread is the old inheritor,
1313 	 * old inheritor can be blocked and requires a chain update.
1314 	 *
1315 	 * The chain should be short because kernel turnstiles cannot have user turnstiles
1316 	 * chained after them.
1317 	 *
1318 	 * We can anyway optimize this by asking turnstile to tell us
1319 	 * if old inheritor needs an update and drop the lock
1320 	 * just in that case.
1321 	 */
1322 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1323 
1324 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1325 
1326 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1327 
1328 complete:
1329 	turnstile_complete_hash((uintptr_t)event, type);
1330 
1331 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1332 
1333 	turnstile_cleanup();
1334 
1335 	return ret;
1336 }
1337 
1338 static wait_result_t
1339 sleep_with_inheritor_and_turnstile(
1340 	event_t                 event,
1341 	thread_t                inheritor,
1342 	wait_interrupt_t        interruptible,
1343 	uint64_t                deadline,
1344 	void                  (^primitive_lock)(void),
1345 	void                  (^primitive_unlock)(void))
1346 {
1347 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1348 	wait_result_t ret;
1349 	uint32_t index;
1350 	struct turnstile *ts = NULL;
1351 
1352 	/*
1353 	 * the hash bucket spinlock is used as turnstile interlock,
1354 	 * lock it before releasing the primitive lock
1355 	 */
1356 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1357 
1358 	primitive_unlock();
1359 
1360 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1361 
1362 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1363 	/*
1364 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1365 	 * waitq_assert_wait64 after.
1366 	 */
1367 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1368 
1369 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1370 
1371 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1372 
1373 	/*
1374 	 * Update new and old inheritor chains outside the interlock;
1375 	 */
1376 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1377 
1378 	if (ret == THREAD_WAITING) {
1379 		ret = thread_block(THREAD_CONTINUE_NULL);
1380 	}
1381 
1382 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1383 
1384 	turnstile_complete_hash((uintptr_t)event, type);
1385 
1386 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1387 
1388 	turnstile_cleanup();
1389 
1390 	primitive_lock();
1391 
1392 	return ret;
1393 }
1394 
1395 /*
1396  * change_sleep_inheritor is independent from the locking primitive.
1397  */
1398 
1399 /*
1400  * Name: change_sleep_inheritor
1401  *
1402  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1403  *
1404  * Args:
1405  *   Arg1: event to redirect the push.
1406  *   Arg2: new inheritor for event.
1407  *
1408  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1409  *
1410  * Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1411  *             wakeup for the event is called.
1412  *             NOTE: this cannot be called from interrupt context.
1413  */
1414 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1415 change_sleep_inheritor(event_t event, thread_t inheritor)
1416 {
1417 	uint32_t index;
1418 	struct turnstile *ts = NULL;
1419 	kern_return_t ret =  KERN_SUCCESS;
1420 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1421 
1422 	/*
1423 	 * the hash bucket spinlock is used as turnstile interlock
1424 	 */
1425 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1426 
1427 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1428 
1429 	if (!turnstile_has_waiters(ts)) {
1430 		ret = KERN_NOT_WAITING;
1431 	}
1432 
1433 	/*
1434 	 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1435 	 */
1436 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1437 
1438 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1439 
1440 	/*
1441 	 * update the chains outside the interlock
1442 	 */
1443 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1444 
1445 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1446 
1447 	turnstile_complete_hash((uintptr_t)event, type);
1448 
1449 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1450 
1451 	turnstile_cleanup();
1452 
1453 	return ret;
1454 }
1455 
1456 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1457 lck_spin_sleep_with_inheritor(
1458 	lck_spin_t *lock,
1459 	lck_sleep_action_t lck_sleep_action,
1460 	event_t event,
1461 	thread_t inheritor,
1462 	wait_interrupt_t interruptible,
1463 	uint64_t deadline)
1464 {
1465 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1466 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1467 		           interruptible, deadline,
1468 		           ^{}, ^{ lck_spin_unlock(lock); });
1469 	} else {
1470 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1471 		           interruptible, deadline,
1472 		           ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1473 	}
1474 }
1475 
1476 wait_result_t
hw_lck_ticket_sleep_with_inheritor(hw_lck_ticket_t * lock,lck_grp_t * grp __unused,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1477 hw_lck_ticket_sleep_with_inheritor(
1478 	hw_lck_ticket_t *lock,
1479 	lck_grp_t *grp __unused,
1480 	lck_sleep_action_t lck_sleep_action,
1481 	event_t event,
1482 	thread_t inheritor,
1483 	wait_interrupt_t interruptible,
1484 	uint64_t deadline)
1485 {
1486 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1487 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1488 		           interruptible, deadline,
1489 		           ^{}, ^{ hw_lck_ticket_unlock(lock); });
1490 	} else {
1491 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1492 		           interruptible, deadline,
1493 		           ^{ hw_lck_ticket_lock(lock, grp); }, ^{ hw_lck_ticket_unlock(lock); });
1494 	}
1495 }
1496 
1497 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1498 lck_ticket_sleep_with_inheritor(
1499 	lck_ticket_t *lock,
1500 	lck_grp_t *grp,
1501 	lck_sleep_action_t lck_sleep_action,
1502 	event_t event,
1503 	thread_t inheritor,
1504 	wait_interrupt_t interruptible,
1505 	uint64_t deadline)
1506 {
1507 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1508 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1509 		           interruptible, deadline,
1510 		           ^{}, ^{ lck_ticket_unlock(lock); });
1511 	} else {
1512 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1513 		           interruptible, deadline,
1514 		           ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1515 	}
1516 }
1517 
1518 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1519 lck_mtx_sleep_with_inheritor(
1520 	lck_mtx_t              *lock,
1521 	lck_sleep_action_t      lck_sleep_action,
1522 	event_t                 event,
1523 	thread_t                inheritor,
1524 	wait_interrupt_t        interruptible,
1525 	uint64_t                deadline)
1526 {
1527 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1528 
1529 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1530 		return sleep_with_inheritor_and_turnstile(event,
1531 		           inheritor,
1532 		           interruptible,
1533 		           deadline,
1534 		           ^{;},
1535 		           ^{lck_mtx_unlock(lock);});
1536 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1537 		return sleep_with_inheritor_and_turnstile(event,
1538 		           inheritor,
1539 		           interruptible,
1540 		           deadline,
1541 		           ^{lck_mtx_lock_spin(lock);},
1542 		           ^{lck_mtx_unlock(lock);});
1543 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1544 		return sleep_with_inheritor_and_turnstile(event,
1545 		           inheritor,
1546 		           interruptible,
1547 		           deadline,
1548 		           ^{lck_mtx_lock_spin_always(lock);},
1549 		           ^{lck_mtx_unlock(lock);});
1550 	} else {
1551 		return sleep_with_inheritor_and_turnstile(event,
1552 		           inheritor,
1553 		           interruptible,
1554 		           deadline,
1555 		           ^{lck_mtx_lock(lock);},
1556 		           ^{lck_mtx_unlock(lock);});
1557 	}
1558 }
1559 
1560 /*
1561  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1562  */
1563 
1564 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1565 lck_rw_sleep_with_inheritor(
1566 	lck_rw_t               *lock,
1567 	lck_sleep_action_t      lck_sleep_action,
1568 	event_t                 event,
1569 	thread_t                inheritor,
1570 	wait_interrupt_t        interruptible,
1571 	uint64_t                deadline)
1572 {
1573 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1574 
1575 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1576 
1577 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1578 		return sleep_with_inheritor_and_turnstile(event,
1579 		           inheritor,
1580 		           interruptible,
1581 		           deadline,
1582 		           ^{;},
1583 		           ^{lck_rw_type = lck_rw_done(lock);});
1584 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1585 		return sleep_with_inheritor_and_turnstile(event,
1586 		           inheritor,
1587 		           interruptible,
1588 		           deadline,
1589 		           ^{lck_rw_lock(lock, lck_rw_type);},
1590 		           ^{lck_rw_type = lck_rw_done(lock);});
1591 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1592 		return sleep_with_inheritor_and_turnstile(event,
1593 		           inheritor,
1594 		           interruptible,
1595 		           deadline,
1596 		           ^{lck_rw_lock_exclusive(lock);},
1597 		           ^{lck_rw_type = lck_rw_done(lock);});
1598 	} else {
1599 		return sleep_with_inheritor_and_turnstile(event,
1600 		           inheritor,
1601 		           interruptible,
1602 		           deadline,
1603 		           ^{lck_rw_lock_shared(lock);},
1604 		           ^{lck_rw_type = lck_rw_done(lock);});
1605 	}
1606 }
1607 
1608 /*
1609  * wakeup_with_inheritor functions are independent from the locking primitive.
1610  */
1611 
1612 kern_return_t
wakeup_thread_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t thread_towake)1613 wakeup_thread_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t thread_towake)
1614 {
1615 	return wakeup_with_inheritor_and_turnstile(event,
1616 	           result,
1617 	           LCK_WAKEUP_THREAD,
1618 	           action,
1619 	           &thread_towake);
1620 }
1621 
1622 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1623 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1624 {
1625 	return wakeup_with_inheritor_and_turnstile(event,
1626 	           result,
1627 	           LCK_WAKEUP_ONE,
1628 	           action,
1629 	           thread_wokenup);
1630 }
1631 
1632 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1633 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1634 {
1635 	return wakeup_with_inheritor_and_turnstile(event,
1636 	           result,
1637 	           LCK_WAKEUP_ALL,
1638 	           0,
1639 	           NULL);
1640 }
1641 
1642 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1643 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1644 {
1645 	assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1646 	assert(waitq_type(waitq) == WQT_TURNSTILE);
1647 	waitinfo->owner = 0;
1648 	waitinfo->context = 0;
1649 
1650 	if (waitq_held(waitq)) {
1651 		return;
1652 	}
1653 
1654 	struct turnstile *turnstile = waitq_to_turnstile(waitq);
1655 	assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1656 	waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1657 }
1658 
1659 static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1660 static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1661 static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1662 
1663 static wait_result_t
1664 cond_sleep_with_inheritor_and_turnstile_type(
1665 	cond_swi_var_t cond,
1666 	bool (^cond_sleep_check)(ctid_t*),
1667 	wait_interrupt_t interruptible,
1668 	uint64_t deadline,
1669 	turnstile_type_t type)
1670 {
1671 	wait_result_t ret;
1672 	uint32_t index;
1673 	struct turnstile *ts = NULL;
1674 	ctid_t ctid = 0;
1675 	thread_t inheritor;
1676 
1677 	/*
1678 	 * the hash bucket spinlock is used as turnstile interlock,
1679 	 * lock it before checking the sleep condition
1680 	 */
1681 	turnstile_hash_bucket_lock((uintptr_t)cond, &index, type);
1682 
1683 	/*
1684 	 * In case the sleep check succeeds, the block will
1685 	 * provide us the ctid observed on the variable.
1686 	 */
1687 	if (!cond_sleep_check(&ctid)) {
1688 		turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1689 		return THREAD_NOT_WAITING;
1690 	}
1691 
1692 	/*
1693 	 * We can translate the ctid to a thread_t only
1694 	 * if cond_sleep_check succeded.
1695 	 */
1696 	inheritor = ctid_get_thread(ctid);
1697 	assert(inheritor != NULL);
1698 
1699 	ts = turnstile_prepare_hash((uintptr_t)cond, type);
1700 
1701 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1702 	/*
1703 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1704 	 * waitq_assert_wait64 after.
1705 	 */
1706 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1707 
1708 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1709 
1710 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1711 
1712 	/*
1713 	 * Update new and old inheritor chains outside the interlock;
1714 	 */
1715 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1716 	if (ret == THREAD_WAITING) {
1717 		ret = thread_block(THREAD_CONTINUE_NULL);
1718 	}
1719 
1720 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1721 
1722 	turnstile_complete_hash((uintptr_t)cond, type);
1723 
1724 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1725 
1726 	turnstile_cleanup();
1727 	return ret;
1728 }
1729 
1730 /*
1731  * Name: cond_sleep_with_inheritor32_mask
1732  *
1733  * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1734  *              Allows a thread to conditionally sleep while indicating which thread should
1735  *              inherit the priority push associated with the condition.
1736  *              The condition should be expressed through a cond_swi_var32_s pointer.
1737  *              The condition needs to be populated by the caller with the ctid of the
1738  *              thread that should inherit the push. The remaining bits of the condition
1739  *              can be used by the caller to implement its own synchronization logic.
1740  *              A copy of the condition value observed by the caller when it decided to call
1741  *              this function should be provided to prevent races with matching wakeups.
1742  *              This function will atomically check the value stored in the condition against
1743  *              the expected/observed one provided only for the bits that are set in the mask.
1744  *              If the check doesn't pass the thread will not sleep and the function will return.
1745  *              The ctid provided in the condition will be used only after a successful
1746  *              check.
1747  *
1748  * Args:
1749  *   Arg1: cond_swi_var32_s pointer that stores the condition to check.
1750  *   Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1751  *   Arg3: mask to apply to the condition to check.
1752  *   Arg4: interruptible flag for wait.
1753  *   Arg5: deadline for wait.
1754  *
1755  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1756  *             wakeup for the cond is called.
1757  *
1758  * Returns: result of the wait.
1759  */
1760 static wait_result_t
cond_sleep_with_inheritor32_mask(cond_swi_var_t cond,cond_swi_var32_s expected_cond,uint32_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1761 cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1762 {
1763 	bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1764 		cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1765 		bool ret;
1766 		if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1767 			ret = true;
1768 			*ctid = cond_val.cond32_owner;
1769 		} else {
1770 			ret = false;
1771 		}
1772 		return ret;
1773 	};
1774 
1775 	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1776 }
1777 
1778 /*
1779  * Name: cond_sleep_with_inheritor64_mask
1780  *
1781  * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1782  *              Allows a thread to conditionally sleep while indicating which thread should
1783  *              inherit the priority push associated with the condition.
1784  *              The condition should be expressed through a cond_swi_var64_s pointer.
1785  *              The condition needs to be populated by the caller with the ctid of the
1786  *              thread that should inherit the push. The remaining bits of the condition
1787  *              can be used by the caller to implement its own synchronization logic.
1788  *              A copy of the condition value observed by the caller when it decided to call
1789  *              this function should be provided to prevent races with matching wakeups.
1790  *              This function will atomically check the value stored in the condition against
1791  *              the expected/observed one provided only for the bits that are set in the mask.
1792  *              If the check doesn't pass the thread will not sleep and the function will return.
1793  *              The ctid provided in the condition will be used only after a successful
1794  *              check.
1795  *
1796  * Args:
1797  *   Arg1: cond_swi_var64_s pointer that stores the condition to check.
1798  *   Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1799  *   Arg3: mask to apply to the condition to check.
1800  *   Arg4: interruptible flag for wait.
1801  *   Arg5: deadline for wait.
1802  *
1803  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1804  *             wakeup for the cond is called.
1805  *
1806  * Returns: result of the wait.
1807  */
1808 wait_result_t
cond_sleep_with_inheritor64_mask(cond_swi_var_t cond,cond_swi_var64_s expected_cond,uint64_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1809 cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1810 {
1811 	bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1812 		cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1813 		bool ret;
1814 		if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1815 			ret = true;
1816 			*ctid = cond_val.cond64_owner;
1817 		} else {
1818 			ret = false;
1819 		}
1820 		return ret;
1821 	};
1822 
1823 	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1824 }
1825 
1826 /*
1827  * Name: cond_sleep_with_inheritor32
1828  *
1829  * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1830  *              Allows a thread to conditionally sleep while indicating which thread should
1831  *              inherit the priority push associated with the condition.
1832  *              The condition should be expressed through a cond_swi_var32_s pointer.
1833  *              The condition needs to be populated by the caller with the ctid of the
1834  *              thread that should inherit the push. The remaining bits of the condition
1835  *              can be used by the caller to implement its own synchronization logic.
1836  *              A copy of the condition value observed by the caller when it decided to call
1837  *              this function should be provided to prevent races with matching wakeups.
1838  *              This function will atomically check the value stored in the condition against
1839  *              the expected/observed one provided. If the check doesn't pass the thread will not
1840  *              sleep and the function will return.
1841  *              The ctid provided in the condition will be used only after a successful
1842  *              check.
1843  *
1844  * Args:
1845  *   Arg1: cond_swi_var32_s pointer that stores the condition to check.
1846  *   Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1847  *   Arg3: interruptible flag for wait.
1848  *   Arg4: deadline for wait.
1849  *
1850  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1851  *             wakeup for the cond is called.
1852  *
1853  * Returns: result of the wait.
1854  */
1855 wait_result_t
cond_sleep_with_inheritor32(cond_swi_var_t cond,cond_swi_var32_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1856 cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1857 {
1858 	return cond_sleep_with_inheritor32_mask(cond, expected_cond, ~0u, interruptible, deadline);
1859 }
1860 
1861 /*
1862  * Name: cond_sleep_with_inheritor64
1863  *
1864  * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1865  *              Allows a thread to conditionally sleep while indicating which thread should
1866  *              inherit the priority push associated with the condition.
1867  *              The condition should be expressed through a cond_swi_var64_s pointer.
1868  *              The condition needs to be populated by the caller with the ctid of the
1869  *              thread that should inherit the push. The remaining bits of the condition
1870  *              can be used by the caller to implement its own synchronization logic.
1871  *              A copy of the condition value observed by the caller when it decided to call
1872  *              this function should be provided to prevent races with matching wakeups.
1873  *              This function will atomically check the value stored in the condition against
1874  *              the expected/observed one provided. If the check doesn't pass the thread will not
1875  *              sleep and the function will return.
1876  *              The ctid provided in the condition will be used only after a successful
1877  *              check.
1878  *
1879  * Args:
1880  *   Arg1: cond_swi_var64_s pointer that stores the condition to check.
1881  *   Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1882  *   Arg3: interruptible flag for wait.
1883  *   Arg4: deadline for wait.
1884  *
1885  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1886  *             wakeup for the cond is called.
1887  *
1888  * Returns: result of the wait.
1889  */
1890 wait_result_t
cond_sleep_with_inheritor64(cond_swi_var_t cond,cond_swi_var64_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1891 cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1892 {
1893 	return cond_sleep_with_inheritor64_mask(cond, expected_cond, ~0ull, interruptible, deadline);
1894 }
1895 
1896 /*
1897  * Name: cond_wakeup_one_with_inheritor
1898  *
1899  * Description: Wake up one waiter waiting on the condition (if any).
1900  *              The thread woken up will be the one with the higher sched priority waiting on the condition.
1901  *              The push for the condition will be transferred from the last inheritor to the woken up thread.
1902  *
1903  * Args:
1904  *   Arg1: condition to wake from.
1905  *   Arg2: wait result to pass to the woken up thread.
1906  *   Arg3: pointer for storing the thread wokenup.
1907  *
1908  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1909  *
1910  * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1911  *             condition or a wakeup for the event is called.
1912  *             A reference for the wokenup thread is acquired.
1913  *             NOTE: this cannot be called from interrupt context.
1914  */
1915 kern_return_t
cond_wakeup_one_with_inheritor(cond_swi_var_t cond,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1916 cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1917 {
1918 	return wakeup_with_inheritor_and_turnstile((event_t)cond,
1919 	           result,
1920 	           LCK_WAKEUP_ONE,
1921 	           action,
1922 	           thread_wokenup);
1923 }
1924 
1925 /*
1926  * Name: cond_wakeup_all_with_inheritor
1927  *
1928  * Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1929  *
1930  * Args:
1931  *   Arg1: condition to wake from.
1932  *   Arg2: wait result to pass to the woken up threads.
1933  *
1934  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1935  *
1936  * Conditions: NOTE: this cannot be called from interrupt context.
1937  */
1938 kern_return_t
cond_wakeup_all_with_inheritor(cond_swi_var_t cond,wait_result_t result)1939 cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1940 {
1941 	return wakeup_with_inheritor_and_turnstile((event_t)cond,
1942 	           result,
1943 	           LCK_WAKEUP_ALL,
1944 	           0,
1945 	           NULL);
1946 }
1947 
1948 
1949 #pragma mark - gates
1950 
1951 #define GATE_TYPE        3
1952 #define GATE_ILOCK_BIT   0
1953 #define GATE_WAITERS_BIT 1
1954 
1955 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1956 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1957 
1958 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1959 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1960 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1961 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1962 #define ordered_store_gate(gate, value)  os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1963 
1964 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1965 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1966 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1967 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1968 
1969 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1970 
1971 #define GATE_EVENT(gate)     ((event_t) gate)
1972 #define EVENT_TO_GATE(event) ((gate_t *) event)
1973 
1974 typedef void (*void_func_void)(void);
1975 
1976 __abortlike
1977 static void
gate_verify_tag_panic(gate_t * gate)1978 gate_verify_tag_panic(gate_t *gate)
1979 {
1980 	panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1981 }
1982 
1983 __abortlike
1984 static void
gate_verify_destroy_panic(gate_t * gate)1985 gate_verify_destroy_panic(gate_t *gate)
1986 {
1987 	panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1988 }
1989 
1990 static void
gate_verify(gate_t * gate)1991 gate_verify(gate_t *gate)
1992 {
1993 	if (gate->gt_type != GATE_TYPE) {
1994 		gate_verify_tag_panic(gate);
1995 	}
1996 	if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
1997 		gate_verify_destroy_panic(gate);
1998 	}
1999 
2000 	assert(gate->gt_refs > 0);
2001 }
2002 
2003 __abortlike
2004 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)2005 gate_already_owned_panic(gate_t *gate, thread_t holder)
2006 {
2007 	panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2008 }
2009 
2010 static kern_return_t
gate_try_close(gate_t * gate)2011 gate_try_close(gate_t *gate)
2012 {
2013 	uintptr_t state;
2014 	thread_t holder;
2015 	kern_return_t ret;
2016 	thread_t thread = current_thread();
2017 
2018 	gate_verify(gate);
2019 
2020 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2021 		return KERN_SUCCESS;
2022 	}
2023 
2024 	gate_ilock(gate);
2025 	state = ordered_load_gate(gate);
2026 	holder = GATE_STATE_TO_THREAD(state);
2027 
2028 	if (holder == NULL) {
2029 		assert(gate_has_waiter_bit(state) == FALSE);
2030 
2031 		state = GATE_THREAD_TO_STATE(current_thread());
2032 		state |= GATE_ILOCK;
2033 		ordered_store_gate(gate, state);
2034 		ret = KERN_SUCCESS;
2035 	} else {
2036 		if (holder == current_thread()) {
2037 			gate_already_owned_panic(gate, holder);
2038 		}
2039 		ret = KERN_FAILURE;
2040 	}
2041 
2042 	gate_iunlock(gate);
2043 	return ret;
2044 }
2045 
2046 static void
gate_close(gate_t * gate)2047 gate_close(gate_t* gate)
2048 {
2049 	uintptr_t state;
2050 	thread_t holder;
2051 	thread_t thread = current_thread();
2052 
2053 	gate_verify(gate);
2054 
2055 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2056 		return;
2057 	}
2058 
2059 	gate_ilock(gate);
2060 	state = ordered_load_gate(gate);
2061 	holder = GATE_STATE_TO_THREAD(state);
2062 
2063 	if (holder != NULL) {
2064 		gate_already_owned_panic(gate, holder);
2065 	}
2066 
2067 	assert(gate_has_waiter_bit(state) == FALSE);
2068 
2069 	state = GATE_THREAD_TO_STATE(thread);
2070 	state |= GATE_ILOCK;
2071 	ordered_store_gate(gate, state);
2072 
2073 	gate_iunlock(gate);
2074 }
2075 
2076 static void
gate_open_turnstile(gate_t * gate)2077 gate_open_turnstile(gate_t *gate)
2078 {
2079 	struct turnstile *ts = NULL;
2080 
2081 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile,
2082 	    TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2083 	waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2084 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2085 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2086 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2087 	/*
2088 	 * We can do the cleanup while holding the interlock.
2089 	 * It is ok because:
2090 	 * 1. current_thread is the previous inheritor and it is running
2091 	 * 2. new inheritor is NULL.
2092 	 * => No chain of turnstiles needs to be updated.
2093 	 */
2094 	turnstile_cleanup();
2095 }
2096 
2097 __abortlike
2098 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2099 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2100 {
2101 	if (open) {
2102 		panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2103 	} else {
2104 		panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2105 	}
2106 }
2107 
2108 static void
gate_open(gate_t * gate)2109 gate_open(gate_t *gate)
2110 {
2111 	uintptr_t state;
2112 	thread_t holder;
2113 	bool waiters;
2114 	thread_t thread = current_thread();
2115 
2116 	gate_verify(gate);
2117 	if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2118 		return;
2119 	}
2120 
2121 	gate_ilock(gate);
2122 	state = ordered_load_gate(gate);
2123 	holder = GATE_STATE_TO_THREAD(state);
2124 	waiters = gate_has_waiter_bit(state);
2125 
2126 	if (holder != thread) {
2127 		gate_not_owned_panic(gate, holder, true);
2128 	}
2129 
2130 	if (waiters) {
2131 		gate_open_turnstile(gate);
2132 	}
2133 
2134 	state = GATE_ILOCK;
2135 	ordered_store_gate(gate, state);
2136 
2137 	gate_iunlock(gate);
2138 }
2139 
2140 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2141 gate_handoff_turnstile(gate_t *gate,
2142     int flags,
2143     thread_t *thread_woken_up,
2144     bool *waiters)
2145 {
2146 	struct turnstile *ts = NULL;
2147 	kern_return_t ret = KERN_FAILURE;
2148 	thread_t hp_thread;
2149 
2150 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2151 	/*
2152 	 * Wake up the higest priority thread waiting on the gate
2153 	 */
2154 	hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2155 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2156 
2157 	if (hp_thread != NULL) {
2158 		/*
2159 		 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2160 		 */
2161 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2162 		*thread_woken_up = hp_thread;
2163 		*waiters = turnstile_has_waiters(ts);
2164 		/*
2165 		 * Note: hp_thread is the new holder and the new inheritor.
2166 		 * In case there are no more waiters, it doesn't need to be the inheritor
2167 		 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2168 		 * handoff can go through the fast path.
2169 		 * We could set the inheritor to NULL here, or the new holder itself can set it
2170 		 * on its way back from the sleep. In the latter case there are more chanses that
2171 		 * new waiters will come by, avoiding to do the opearation at all.
2172 		 */
2173 		ret = KERN_SUCCESS;
2174 	} else {
2175 		/*
2176 		 * waiters can have been woken up by an interrupt and still not
2177 		 * have updated gate->waiters, so we couldn't find them on the waitq.
2178 		 * Update the inheritor to NULL here, so that the current thread can return to userspace
2179 		 * indipendently from when the interrupted waiters will finish the wait.
2180 		 */
2181 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2182 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2183 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2184 		}
2185 		// there are no waiters.
2186 		ret = KERN_NOT_WAITING;
2187 	}
2188 
2189 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2190 
2191 	/*
2192 	 * We can do the cleanup while holding the interlock.
2193 	 * It is ok because:
2194 	 * 1. current_thread is the previous inheritor and it is running
2195 	 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2196 	 *    of the gate before trying to sleep.
2197 	 * => No chain of turnstiles needs to be updated.
2198 	 */
2199 	turnstile_cleanup();
2200 
2201 	return ret;
2202 }
2203 
2204 static kern_return_t
gate_handoff(gate_t * gate,int flags)2205 gate_handoff(gate_t *gate,
2206     int flags)
2207 {
2208 	kern_return_t ret;
2209 	thread_t new_holder = NULL;
2210 	uintptr_t state;
2211 	thread_t holder;
2212 	bool waiters;
2213 	thread_t thread = current_thread();
2214 
2215 	assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2216 	gate_verify(gate);
2217 
2218 	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2219 		if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2220 			//gate opened but there were no waiters, so return KERN_NOT_WAITING.
2221 			return KERN_NOT_WAITING;
2222 		}
2223 	}
2224 
2225 	gate_ilock(gate);
2226 	state = ordered_load_gate(gate);
2227 	holder = GATE_STATE_TO_THREAD(state);
2228 	waiters = gate_has_waiter_bit(state);
2229 
2230 	if (holder != current_thread()) {
2231 		gate_not_owned_panic(gate, holder, false);
2232 	}
2233 
2234 	if (waiters) {
2235 		ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2236 		if (ret == KERN_SUCCESS) {
2237 			state = GATE_THREAD_TO_STATE(new_holder);
2238 			if (waiters) {
2239 				state |= GATE_WAITERS;
2240 			}
2241 		} else {
2242 			if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2243 				state = 0;
2244 			}
2245 		}
2246 	} else {
2247 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2248 			state = 0;
2249 		}
2250 		ret = KERN_NOT_WAITING;
2251 	}
2252 	state |= GATE_ILOCK;
2253 	ordered_store_gate(gate, state);
2254 
2255 	gate_iunlock(gate);
2256 
2257 	if (new_holder) {
2258 		thread_deallocate(new_holder);
2259 	}
2260 	return ret;
2261 }
2262 
2263 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2264 gate_steal_turnstile(gate_t *gate,
2265     thread_t new_inheritor)
2266 {
2267 	struct turnstile *ts = NULL;
2268 
2269 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2270 
2271 	turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2272 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2273 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2274 
2275 	/*
2276 	 * turnstile_cleanup might need to update the chain of the old holder.
2277 	 * This operation should happen without the turnstile interlock held.
2278 	 */
2279 	return turnstile_cleanup;
2280 }
2281 
2282 __abortlike
2283 static void
gate_not_closed_panic(gate_t * gate,bool wait)2284 gate_not_closed_panic(gate_t *gate, bool wait)
2285 {
2286 	if (wait) {
2287 		panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2288 	} else {
2289 		panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2290 	}
2291 }
2292 
2293 static void
gate_steal(gate_t * gate)2294 gate_steal(gate_t *gate)
2295 {
2296 	uintptr_t state;
2297 	thread_t holder;
2298 	thread_t thread = current_thread();
2299 	bool waiters;
2300 
2301 	void_func_void func_after_interlock_unlock;
2302 
2303 	gate_verify(gate);
2304 
2305 	gate_ilock(gate);
2306 	state = ordered_load_gate(gate);
2307 	holder = GATE_STATE_TO_THREAD(state);
2308 	waiters = gate_has_waiter_bit(state);
2309 
2310 	if (holder == NULL) {
2311 		gate_not_closed_panic(gate, false);
2312 	}
2313 
2314 	state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2315 	if (waiters) {
2316 		state |= GATE_WAITERS;
2317 		ordered_store_gate(gate, state);
2318 		func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2319 		gate_iunlock(gate);
2320 
2321 		func_after_interlock_unlock();
2322 	} else {
2323 		ordered_store_gate(gate, state);
2324 		gate_iunlock(gate);
2325 	}
2326 }
2327 
2328 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2329 gate_wait_turnstile(gate_t *gate,
2330     wait_interrupt_t interruptible,
2331     uint64_t deadline,
2332     thread_t holder,
2333     wait_result_t* wait,
2334     bool* waiters)
2335 {
2336 	struct turnstile *ts;
2337 	uintptr_t state;
2338 
2339 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2340 
2341 	turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2342 	waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2343 
2344 	gate_iunlock(gate);
2345 
2346 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2347 
2348 	*wait = thread_block(THREAD_CONTINUE_NULL);
2349 
2350 	gate_ilock(gate);
2351 
2352 	*waiters = turnstile_has_waiters(ts);
2353 
2354 	if (!*waiters) {
2355 		/*
2356 		 * We want to enable the fast path as soon as we see that there are no more waiters.
2357 		 * On the fast path the holder will not do any turnstile operations.
2358 		 * Set the inheritor as NULL here.
2359 		 *
2360 		 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2361 		 * already been set to NULL.
2362 		 */
2363 		state = ordered_load_gate(gate);
2364 		holder = GATE_STATE_TO_THREAD(state);
2365 		if (holder &&
2366 		    ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2367 		    holder == current_thread())) {     // thread was woken up and it is the new holder
2368 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2369 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2370 		}
2371 	}
2372 
2373 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2374 
2375 	/*
2376 	 * turnstile_cleanup might need to update the chain of the old holder.
2377 	 * This operation should happen without the turnstile primitive interlock held.
2378 	 */
2379 	return turnstile_cleanup;
2380 }
2381 
2382 static void
gate_free_internal(gate_t * gate)2383 gate_free_internal(gate_t *gate)
2384 {
2385 	zfree(KT_GATE, gate);
2386 }
2387 
2388 __abortlike
2389 static void
gate_too_many_refs_panic(gate_t * gate)2390 gate_too_many_refs_panic(gate_t *gate)
2391 {
2392 	panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2393 }
2394 
2395 static gate_wait_result_t
2396 gate_wait(gate_t* gate,
2397     wait_interrupt_t interruptible,
2398     uint64_t deadline,
2399     void (^primitive_unlock)(void),
2400     void (^primitive_lock)(void))
2401 {
2402 	gate_wait_result_t ret;
2403 	void_func_void func_after_interlock_unlock;
2404 	wait_result_t wait_result;
2405 	uintptr_t state;
2406 	thread_t holder;
2407 	bool waiters;
2408 
2409 	gate_verify(gate);
2410 
2411 	gate_ilock(gate);
2412 	state = ordered_load_gate(gate);
2413 	holder = GATE_STATE_TO_THREAD(state);
2414 
2415 	if (holder == NULL) {
2416 		gate_not_closed_panic(gate, true);
2417 	}
2418 
2419 	/*
2420 	 * Get a ref on the gate so it will not
2421 	 * be freed while we are coming back from the sleep.
2422 	 */
2423 	if (gate->gt_refs == UINT16_MAX) {
2424 		gate_too_many_refs_panic(gate);
2425 	}
2426 	gate->gt_refs++;
2427 	state |= GATE_WAITERS;
2428 	ordered_store_gate(gate, state);
2429 
2430 	/*
2431 	 * Release the primitive lock before any
2432 	 * turnstile operation. Turnstile
2433 	 * does not support a blocking primitive as
2434 	 * interlock.
2435 	 *
2436 	 * In this way, concurrent threads will be
2437 	 * able to acquire the primitive lock
2438 	 * but still will wait for me through the
2439 	 * gate interlock.
2440 	 */
2441 	primitive_unlock();
2442 
2443 	func_after_interlock_unlock = gate_wait_turnstile(    gate,
2444 	    interruptible,
2445 	    deadline,
2446 	    holder,
2447 	    &wait_result,
2448 	    &waiters);
2449 
2450 	state = ordered_load_gate(gate);
2451 	holder = GATE_STATE_TO_THREAD(state);
2452 
2453 	switch (wait_result) {
2454 	case THREAD_INTERRUPTED:
2455 	case THREAD_TIMED_OUT:
2456 		assert(holder != current_thread());
2457 
2458 		if (waiters) {
2459 			state |= GATE_WAITERS;
2460 		} else {
2461 			state &= ~GATE_WAITERS;
2462 		}
2463 		ordered_store_gate(gate, state);
2464 
2465 		if (wait_result == THREAD_INTERRUPTED) {
2466 			ret = GATE_INTERRUPTED;
2467 		} else {
2468 			ret = GATE_TIMED_OUT;
2469 		}
2470 		break;
2471 	default:
2472 		/*
2473 		 * Note it is possible that even if the gate was handed off to
2474 		 * me, someone called gate_steal() before I woke up.
2475 		 *
2476 		 * As well as it is possible that the gate was opened, but someone
2477 		 * closed it while I was waking up.
2478 		 *
2479 		 * In both cases we return GATE_OPENED, as the gate was opened to me
2480 		 * at one point, it is the caller responsibility to check again if
2481 		 * the gate is open.
2482 		 */
2483 		if (holder == current_thread()) {
2484 			ret = GATE_HANDOFF;
2485 		} else {
2486 			ret = GATE_OPENED;
2487 		}
2488 		break;
2489 	}
2490 
2491 	assert(gate->gt_refs > 0);
2492 	uint32_t ref = --gate->gt_refs;
2493 	bool to_free = gate->gt_alloc;
2494 	gate_iunlock(gate);
2495 
2496 	if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2497 		if (to_free == true) {
2498 			assert(!waiters);
2499 			if (ref == 0) {
2500 				gate_free_internal(gate);
2501 			}
2502 			ret = GATE_OPENED;
2503 		} else {
2504 			gate_verify_destroy_panic(gate);
2505 		}
2506 	}
2507 
2508 	/*
2509 	 * turnstile func that needs to be executed without
2510 	 * holding the primitive interlock
2511 	 */
2512 	func_after_interlock_unlock();
2513 
2514 	primitive_lock();
2515 
2516 	return ret;
2517 }
2518 
2519 static void
gate_assert(gate_t * gate,int flags)2520 gate_assert(gate_t *gate, int flags)
2521 {
2522 	uintptr_t state;
2523 	thread_t holder;
2524 
2525 	gate_verify(gate);
2526 
2527 	gate_ilock(gate);
2528 	state = ordered_load_gate(gate);
2529 	holder = GATE_STATE_TO_THREAD(state);
2530 
2531 	switch (flags) {
2532 	case GATE_ASSERT_CLOSED:
2533 		assert(holder != NULL);
2534 		break;
2535 	case GATE_ASSERT_OPEN:
2536 		assert(holder == NULL);
2537 		break;
2538 	case GATE_ASSERT_HELD:
2539 		assert(holder == current_thread());
2540 		break;
2541 	default:
2542 		panic("invalid %s flag %d", __func__, flags);
2543 	}
2544 
2545 	gate_iunlock(gate);
2546 }
2547 
2548 enum {
2549 	GT_INIT_DEFAULT = 0,
2550 	GT_INIT_ALLOC
2551 };
2552 
2553 static void
gate_init(gate_t * gate,uint type)2554 gate_init(gate_t *gate, uint type)
2555 {
2556 	bzero(gate, sizeof(gate_t));
2557 
2558 	gate->gt_data = 0;
2559 	gate->gt_turnstile = NULL;
2560 	gate->gt_refs = 1;
2561 	switch (type) {
2562 	case GT_INIT_ALLOC:
2563 		gate->gt_alloc = 1;
2564 		break;
2565 	default:
2566 		gate->gt_alloc = 0;
2567 		break;
2568 	}
2569 	gate->gt_type = GATE_TYPE;
2570 	gate->gt_flags_pad = 0;
2571 }
2572 
2573 static gate_t*
gate_alloc_init(void)2574 gate_alloc_init(void)
2575 {
2576 	gate_t *gate;
2577 	gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2578 	gate_init(gate, GT_INIT_ALLOC);
2579 	return gate;
2580 }
2581 
2582 __abortlike
2583 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2584 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2585 {
2586 	panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2587 }
2588 
2589 __abortlike
2590 static void
gate_destroy_waiter_panic(gate_t * gate)2591 gate_destroy_waiter_panic(gate_t *gate)
2592 {
2593 	panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2594 }
2595 
2596 static uint16_t
gate_destroy_internal(gate_t * gate)2597 gate_destroy_internal(gate_t *gate)
2598 {
2599 	uintptr_t state;
2600 	thread_t holder;
2601 	uint16_t ref;
2602 
2603 	gate_ilock(gate);
2604 	state = ordered_load_gate(gate);
2605 	holder = GATE_STATE_TO_THREAD(state);
2606 
2607 	/*
2608 	 * The gate must be open
2609 	 * and all the threads must
2610 	 * have been woken up by this time
2611 	 */
2612 	if (holder != NULL) {
2613 		gate_destroy_owned_panic(gate, holder);
2614 	}
2615 	if (gate_has_waiter_bit(state)) {
2616 		gate_destroy_waiter_panic(gate);
2617 	}
2618 
2619 	assert(gate->gt_refs > 0);
2620 
2621 	ref = --gate->gt_refs;
2622 
2623 	/*
2624 	 * Mark the gate as destroyed.
2625 	 * The interlock bit still need
2626 	 * to be available to let the
2627 	 * last wokenup threads to clear
2628 	 * the wait.
2629 	 */
2630 	state = GATE_DESTROYED;
2631 	state |= GATE_ILOCK;
2632 	ordered_store_gate(gate, state);
2633 	gate_iunlock(gate);
2634 	return ref;
2635 }
2636 
2637 __abortlike
2638 static void
gate_destroy_panic(gate_t * gate)2639 gate_destroy_panic(gate_t *gate)
2640 {
2641 	panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2642 }
2643 
2644 static void
gate_destroy(gate_t * gate)2645 gate_destroy(gate_t *gate)
2646 {
2647 	gate_verify(gate);
2648 	if (gate->gt_alloc == 1) {
2649 		gate_destroy_panic(gate);
2650 	}
2651 	gate_destroy_internal(gate);
2652 }
2653 
2654 __abortlike
2655 static void
gate_free_panic(gate_t * gate)2656 gate_free_panic(gate_t *gate)
2657 {
2658 	panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2659 }
2660 
2661 static void
gate_free(gate_t * gate)2662 gate_free(gate_t *gate)
2663 {
2664 	uint16_t ref;
2665 
2666 	gate_verify(gate);
2667 
2668 	if (gate->gt_alloc == 0) {
2669 		gate_free_panic(gate);
2670 	}
2671 
2672 	ref = gate_destroy_internal(gate);
2673 	/*
2674 	 * Some of the threads waiting on the gate
2675 	 * might still need to run after being woken up.
2676 	 * They will access the gate to cleanup the
2677 	 * state, so we cannot free it.
2678 	 * The last waiter will free the gate in this case.
2679 	 */
2680 	if (ref == 0) {
2681 		gate_free_internal(gate);
2682 	}
2683 }
2684 
2685 /*
2686  * Name: lck_rw_gate_init
2687  *
2688  * Description: initializes a variable declared with decl_lck_rw_gate_data.
2689  *
2690  * Args:
2691  *   Arg1: lck_rw_t lock used to protect the gate.
2692  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2693  */
2694 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2695 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2696 {
2697 	(void) lock;
2698 	gate_init(gate, GT_INIT_DEFAULT);
2699 }
2700 
2701 /*
2702  * Name: lck_rw_gate_alloc_init
2703  *
2704  * Description: allocates and initializes a gate_t.
2705  *
2706  * Args:
2707  *   Arg1: lck_rw_t lock used to protect the gate.
2708  *
2709  * Returns:
2710  *         gate_t allocated.
2711  */
2712 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2713 lck_rw_gate_alloc_init(lck_rw_t *lock)
2714 {
2715 	(void) lock;
2716 	return gate_alloc_init();
2717 }
2718 
2719 /*
2720  * Name: lck_rw_gate_destroy
2721  *
2722  * Description: destroys a variable previously initialized
2723  *              with lck_rw_gate_init().
2724  *
2725  * Args:
2726  *   Arg1: lck_rw_t lock used to protect the gate.
2727  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2728  */
2729 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2730 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2731 {
2732 	(void) lock;
2733 	gate_destroy(gate);
2734 }
2735 
2736 /*
2737  * Name: lck_rw_gate_free
2738  *
2739  * Description: destroys and tries to free a gate previously allocated
2740  *              with lck_rw_gate_alloc_init().
2741  *              The gate free might be delegated to the last thread returning
2742  *              from the gate_wait().
2743  *
2744  * Args:
2745  *   Arg1: lck_rw_t lock used to protect the gate.
2746  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2747  */
2748 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2749 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2750 {
2751 	(void) lock;
2752 	gate_free(gate);
2753 }
2754 
2755 /*
2756  * Name: lck_rw_gate_try_close
2757  *
2758  * Description: Tries to close the gate.
2759  *              In case of success the current thread will be set as
2760  *              the holder of the gate.
2761  *
2762  * Args:
2763  *   Arg1: lck_rw_t lock used to protect the gate.
2764  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2765  *
2766  * Conditions: Lock must be held. Returns with the lock held.
2767  *
2768  * Returns:
2769  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2770  *          of the gate.
2771  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2772  *          to wake up possible waiters on the gate before returning to userspace.
2773  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2774  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2775  *
2776  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2777  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2778  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2779  *          be done without dropping the lock that is protecting the gate in between.
2780  */
2781 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2782 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2783 {
2784 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2785 
2786 	return gate_try_close(gate);
2787 }
2788 
2789 /*
2790  * Name: lck_rw_gate_close
2791  *
2792  * Description: Closes the gate. The current thread will be set as
2793  *              the holder of the gate. Will panic if the gate is already closed.
2794  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2795  *              to wake up possible waiters on the gate before returning to userspace.
2796  *
2797  * Args:
2798  *   Arg1: lck_rw_t lock used to protect the gate.
2799  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2800  *
2801  * Conditions: Lock must be held. Returns with the lock held.
2802  *             The gate must be open.
2803  *
2804  */
2805 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2806 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2807 {
2808 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2809 
2810 	return gate_close(gate);
2811 }
2812 
2813 /*
2814  * Name: lck_rw_gate_open
2815  *
2816  * Description: Opens the gate and wakes up possible waiters.
2817  *
2818  * Args:
2819  *   Arg1: lck_rw_t lock used to protect the gate.
2820  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2821  *
2822  * Conditions: Lock must be held. Returns with the lock held.
2823  *             The current thread must be the holder of the gate.
2824  *
2825  */
2826 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2827 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2828 {
2829 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2830 
2831 	gate_open(gate);
2832 }
2833 
2834 /*
2835  * Name: lck_rw_gate_handoff
2836  *
2837  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2838  *              priority will be selected as the new holder of the gate, and woken up,
2839  *              with the gate remaining in the closed state throughout.
2840  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2841  *              will be returned.
2842  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2843  *              case no waiters were found.
2844  *
2845  *
2846  * Args:
2847  *   Arg1: lck_rw_t lock used to protect the gate.
2848  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2849  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2850  *
2851  * Conditions: Lock must be held. Returns with the lock held.
2852  *             The current thread must be the holder of the gate.
2853  *
2854  * Returns:
2855  *          KERN_SUCCESS in case one of the waiters became the new holder.
2856  *          KERN_NOT_WAITING in case there were no waiters.
2857  *
2858  */
2859 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2860 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2861 {
2862 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2863 
2864 	return gate_handoff(gate, flags);
2865 }
2866 
2867 /*
2868  * Name: lck_rw_gate_steal
2869  *
2870  * Description: Set the current ownership of the gate. It sets the current thread as the
2871  *              new holder of the gate.
2872  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2873  *              to wake up possible waiters on the gate before returning to userspace.
2874  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2875  *              anymore.
2876  *
2877  *
2878  * Args:
2879  *   Arg1: lck_rw_t lock used to protect the gate.
2880  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2881  *
2882  * Conditions: Lock must be held. Returns with the lock held.
2883  *             The gate must be closed and the current thread must not already be the holder.
2884  *
2885  */
2886 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2887 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2888 {
2889 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2890 
2891 	gate_steal(gate);
2892 }
2893 
2894 /*
2895  * Name: lck_rw_gate_wait
2896  *
2897  * Description: Waits for the current thread to become the holder of the gate or for the
2898  *              gate to become open. An interruptible mode and deadline can be specified
2899  *              to return earlier from the wait.
2900  *
2901  * Args:
2902  *   Arg1: lck_rw_t lock used to protect the gate.
2903  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2904  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2905  *   Arg3: interruptible flag for wait.
2906  *   Arg4: deadline
2907  *
2908  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2909  *             Lock will be dropped while waiting.
2910  *             The gate must be closed.
2911  *
2912  * Returns: Reason why the thread was woken up.
2913  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2914  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2915  *                         to wake up possible waiters on the gate before returning to userspace.
2916  *          GATE_OPENED - the gate was opened by the holder.
2917  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
2918  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
2919  */
2920 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2921 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2922 {
2923 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2924 
2925 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2926 
2927 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2928 		return gate_wait(gate,
2929 		           interruptible,
2930 		           deadline,
2931 		           ^{lck_rw_type = lck_rw_done(lock);},
2932 		           ^{;});
2933 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2934 		return gate_wait(gate,
2935 		           interruptible,
2936 		           deadline,
2937 		           ^{lck_rw_type = lck_rw_done(lock);},
2938 		           ^{lck_rw_lock(lock, lck_rw_type);});
2939 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2940 		return gate_wait(gate,
2941 		           interruptible,
2942 		           deadline,
2943 		           ^{lck_rw_type = lck_rw_done(lock);},
2944 		           ^{lck_rw_lock_exclusive(lock);});
2945 	} else {
2946 		return gate_wait(gate,
2947 		           interruptible,
2948 		           deadline,
2949 		           ^{lck_rw_type = lck_rw_done(lock);},
2950 		           ^{lck_rw_lock_shared(lock);});
2951 	}
2952 }
2953 
2954 /*
2955  * Name: lck_rw_gate_assert
2956  *
2957  * Description: asserts that the gate is in the specified state.
2958  *
2959  * Args:
2960  *   Arg1: lck_rw_t lock used to protect the gate.
2961  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2962  *   Arg3: flags to specified assert type.
2963  *         GATE_ASSERT_CLOSED - the gate is currently closed
2964  *         GATE_ASSERT_OPEN - the gate is currently opened
2965  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2966  */
2967 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2968 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2969 {
2970 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2971 
2972 	gate_assert(gate, flags);
2973 	return;
2974 }
2975 
2976 /*
2977  * Name: lck_mtx_gate_init
2978  *
2979  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2980  *
2981  * Args:
2982  *   Arg1: lck_mtx_t lock used to protect the gate.
2983  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2984  */
2985 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)2986 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2987 {
2988 	(void) lock;
2989 	gate_init(gate, GT_INIT_DEFAULT);
2990 }
2991 
2992 /*
2993  * Name: lck_mtx_gate_alloc_init
2994  *
2995  * Description: allocates and initializes a gate_t.
2996  *
2997  * Args:
2998  *   Arg1: lck_mtx_t lock used to protect the gate.
2999  *
3000  * Returns:
3001  *         gate_t allocated.
3002  */
3003 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)3004 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3005 {
3006 	(void) lock;
3007 	return gate_alloc_init();
3008 }
3009 
3010 /*
3011  * Name: lck_mtx_gate_destroy
3012  *
3013  * Description: destroys a variable previously initialized
3014  *              with lck_mtx_gate_init().
3015  *
3016  * Args:
3017  *   Arg1: lck_mtx_t lock used to protect the gate.
3018  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3019  */
3020 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)3021 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3022 {
3023 	(void) lock;
3024 	gate_destroy(gate);
3025 }
3026 
3027 /*
3028  * Name: lck_mtx_gate_free
3029  *
3030  * Description: destroys and tries to free a gate previously allocated
3031  *	        with lck_mtx_gate_alloc_init().
3032  *              The gate free might be delegated to the last thread returning
3033  *              from the gate_wait().
3034  *
3035  * Args:
3036  *   Arg1: lck_mtx_t lock used to protect the gate.
3037  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3038  */
3039 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3040 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3041 {
3042 	(void) lock;
3043 	gate_free(gate);
3044 }
3045 
3046 /*
3047  * Name: lck_mtx_gate_try_close
3048  *
3049  * Description: Tries to close the gate.
3050  *              In case of success the current thread will be set as
3051  *              the holder of the gate.
3052  *
3053  * Args:
3054  *   Arg1: lck_mtx_t lock used to protect the gate.
3055  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3056  *
3057  * Conditions: Lock must be held. Returns with the lock held.
3058  *
3059  * Returns:
3060  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3061  *          of the gate.
3062  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3063  *          to wake up possible waiters on the gate before returning to userspace.
3064  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3065  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3066  *
3067  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3068  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3069  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3070  *          be done without dropping the lock that is protecting the gate in between.
3071  */
3072 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3073 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3074 {
3075 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3076 
3077 	return gate_try_close(gate);
3078 }
3079 
3080 /*
3081  * Name: lck_mtx_gate_close
3082  *
3083  * Description: Closes the gate. The current thread will be set as
3084  *              the holder of the gate. Will panic if the gate is already closed.
3085  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3086  *              to wake up possible waiters on the gate before returning to userspace.
3087  *
3088  * Args:
3089  *   Arg1: lck_mtx_t lock used to protect the gate.
3090  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3091  *
3092  * Conditions: Lock must be held. Returns with the lock held.
3093  *             The gate must be open.
3094  *
3095  */
3096 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3097 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3098 {
3099 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3100 
3101 	return gate_close(gate);
3102 }
3103 
3104 /*
3105  * Name: lck_mtx_gate_open
3106  *
3107  * Description: Opens of the gate and wakes up possible waiters.
3108  *
3109  * Args:
3110  *   Arg1: lck_mtx_t lock used to protect the gate.
3111  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3112  *
3113  * Conditions: Lock must be held. Returns with the lock held.
3114  *             The current thread must be the holder of the gate.
3115  *
3116  */
3117 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3118 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3119 {
3120 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3121 
3122 	gate_open(gate);
3123 }
3124 
3125 /*
3126  * Name: lck_mtx_gate_handoff
3127  *
3128  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3129  *              priority will be selected as the new holder of the gate, and woken up,
3130  *              with the gate remaining in the closed state throughout.
3131  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3132  *              will be returned.
3133  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3134  *              case no waiters were found.
3135  *
3136  *
3137  * Args:
3138  *   Arg1: lck_mtx_t lock used to protect the gate.
3139  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3140  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3141  *
3142  * Conditions: Lock must be held. Returns with the lock held.
3143  *             The current thread must be the holder of the gate.
3144  *
3145  * Returns:
3146  *          KERN_SUCCESS in case one of the waiters became the new holder.
3147  *          KERN_NOT_WAITING in case there were no waiters.
3148  *
3149  */
3150 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3151 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3152 {
3153 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3154 
3155 	return gate_handoff(gate, flags);
3156 }
3157 
3158 /*
3159  * Name: lck_mtx_gate_steal
3160  *
3161  * Description: Steals the ownership of the gate. It sets the current thread as the
3162  *              new holder of the gate.
3163  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3164  *              to wake up possible waiters on the gate before returning to userspace.
3165  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3166  *              anymore.
3167  *
3168  *
3169  * Args:
3170  *   Arg1: lck_mtx_t lock used to protect the gate.
3171  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3172  *
3173  * Conditions: Lock must be held. Returns with the lock held.
3174  *             The gate must be closed and the current thread must not already be the holder.
3175  *
3176  */
3177 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3178 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3179 {
3180 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3181 
3182 	gate_steal(gate);
3183 }
3184 
3185 /*
3186  * Name: lck_mtx_gate_wait
3187  *
3188  * Description: Waits for the current thread to become the holder of the gate or for the
3189  *              gate to become open. An interruptible mode and deadline can be specified
3190  *              to return earlier from the wait.
3191  *
3192  * Args:
3193  *   Arg1: lck_mtx_t lock used to protect the gate.
3194  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3195  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3196  *   Arg3: interruptible flag for wait.
3197  *   Arg4: deadline
3198  *
3199  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3200  *             Lock will be dropped while waiting.
3201  *             The gate must be closed.
3202  *
3203  * Returns: Reason why the thread was woken up.
3204  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3205  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3206  *                         to wake up possible waiters on the gate before returning to userspace.
3207  *          GATE_OPENED - the gate was opened by the holder.
3208  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3209  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3210  */
3211 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3212 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3213 {
3214 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3215 
3216 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3217 		return gate_wait(gate,
3218 		           interruptible,
3219 		           deadline,
3220 		           ^{lck_mtx_unlock(lock);},
3221 		           ^{;});
3222 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3223 		return gate_wait(gate,
3224 		           interruptible,
3225 		           deadline,
3226 		           ^{lck_mtx_unlock(lock);},
3227 		           ^{lck_mtx_lock_spin(lock);});
3228 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3229 		return gate_wait(gate,
3230 		           interruptible,
3231 		           deadline,
3232 		           ^{lck_mtx_unlock(lock);},
3233 		           ^{lck_mtx_lock_spin_always(lock);});
3234 	} else {
3235 		return gate_wait(gate,
3236 		           interruptible,
3237 		           deadline,
3238 		           ^{lck_mtx_unlock(lock);},
3239 		           ^{lck_mtx_lock(lock);});
3240 	}
3241 }
3242 
3243 /*
3244  * Name: lck_mtx_gate_assert
3245  *
3246  * Description: asserts that the gate is in the specified state.
3247  *
3248  * Args:
3249  *   Arg1: lck_mtx_t lock used to protect the gate.
3250  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3251  *   Arg3: flags to specified assert type.
3252  *         GATE_ASSERT_CLOSED - the gate is currently closed
3253  *         GATE_ASSERT_OPEN - the gate is currently opened
3254  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3255  */
3256 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3257 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3258 {
3259 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3260 
3261 	gate_assert(gate, flags);
3262 }
3263 
3264 #pragma mark - LCK_*_DECLARE support
3265 
3266 __startup_func
3267 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3268 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3269 {
3270 	lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3271 }
3272 
3273 __startup_func
3274 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3275 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3276 {
3277 	lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3278 }
3279 
3280 __startup_func
3281 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3282 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3283 {
3284 	lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3285 }
3286 
3287 __startup_func
3288 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3289 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3290 {
3291 	simple_lock_init(sp->lck, sp->lck_init_arg);
3292 }
3293 
3294 __startup_func
3295 void
lck_ticket_startup_init(struct lck_ticket_startup_spec * sp)3296 lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3297 {
3298 	lck_ticket_init(sp->lck, sp->lck_grp);
3299 }
3300