xref: /xnu-11417.140.69/osfmk/kern/locks.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 
57 #define LOCK_PRIVATE 1
58 
59 #include <mach_ldebug.h>
60 #include <debug.h>
61 
62 #include <mach/kern_return.h>
63 
64 #include <kern/locks_internal.h>
65 #include <kern/lock_stat.h>
66 #include <kern/locks.h>
67 #include <kern/misc_protos.h>
68 #include <kern/zalloc.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/sched_prim.h>
72 #include <kern/debug.h>
73 #include <libkern/section_keywords.h>
74 #if defined(__x86_64__)
75 #include <i386/tsc.h>
76 #include <i386/machine_routines.h>
77 #endif
78 #include <machine/atomic.h>
79 #include <machine/machine_cpu.h>
80 #include <string.h>
81 #include <vm/pmap.h>
82 
83 #include <sys/kdebug.h>
84 
85 #define LCK_MTX_SLEEP_CODE              0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
87 #define LCK_MTX_LCK_WAIT_CODE           2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
89 
90 // Panic in tests that check lock usage correctness
91 // These are undesirable when in a panic or a debugger is runnning.
92 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93 
94 #if MACH_LDEBUG
95 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96 #else
97 #define ALIGN_TEST(p, t) do{}while(0)
98 #endif
99 
100 #define NOINLINE                __attribute__((noinline))
101 
102 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104 
105 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106 
107 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109 
110 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111 
112 struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113 
114 #if CONFIG_PV_TICKET
115 SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; /* used by waitq.py */
116 #endif
117 
118 #if DEBUG
119 TUNABLE(uint32_t, LcksOpts, "lcks", LCK_OPTION_ENABLE_DEBUG);
120 #else
121 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
122 #endif
123 
124 #if CONFIG_DTRACE
125 #if defined (__x86_64__)
126 machine_timeout_t dtrace_spin_threshold = 500; // 500ns
127 #elif defined(__arm64__)
128 MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129     0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130 #endif
131 #endif
132 
133 struct lck_mcs PERCPU_DATA(lck_mcs);
134 
135 __kdebug_only
136 uintptr_t
unslide_for_kdebug(const void * object)137 unslide_for_kdebug(const void* object)
138 {
139 	if (__improbable(kdebug_enable)) {
140 		return VM_KERNEL_UNSLIDE_OR_PERM(object);
141 	} else {
142 		return 0;
143 	}
144 }
145 
146 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)147 __lck_require_preemption_disabled_panic(void *lock)
148 {
149 	panic("Attempt to take no-preempt lock %p in preemptible context", lock);
150 }
151 
152 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)153 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
154 {
155 	if (__improbable(!lock_preemption_disabled_for_thread(self))) {
156 		__lck_require_preemption_disabled_panic(lock);
157 	}
158 }
159 
160 #pragma mark - HW Spin policies
161 
162 /*
163  * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
164  */
165 __attribute__((always_inline))
166 hw_spin_timeout_t
hw_spin_compute_timeout(hw_spin_policy_t pol)167 hw_spin_compute_timeout(hw_spin_policy_t pol)
168 {
169 	hw_spin_timeout_t ret = {
170 		.hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
171 	};
172 
173 	ret.hwst_timeout <<= pol->hwsp_timeout_shift;
174 #if SCHED_HYGIENE_DEBUG
175 	ret.hwst_in_ppl = pmap_in_ppl();
176 	/* Note we can't check if we are interruptible if in ppl */
177 	ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
178 #endif /* SCHED_HYGIENE_DEBUG */
179 
180 #if SCHED_HYGIENE_DEBUG
181 #ifndef KASAN
182 	if (ret.hwst_timeout > 0 &&
183 	    !ret.hwst_in_ppl &&
184 	    !ret.hwst_interruptible &&
185 	    interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
186 		uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
187 
188 #if defined(__x86_64__)
189 		int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
190 #endif
191 		if (int_timeout < ret.hwst_timeout) {
192 			ret.hwst_timeout = int_timeout;
193 		}
194 	}
195 #endif /* !KASAN */
196 #endif /* SCHED_HYGIENE_DEBUG */
197 
198 	return ret;
199 }
200 
201 __attribute__((always_inline))
202 bool
hw_spin_in_ppl(hw_spin_timeout_t to)203 hw_spin_in_ppl(hw_spin_timeout_t to)
204 {
205 #if SCHED_HYGIENE_DEBUG
206 	return to.hwst_in_ppl;
207 #else
208 	(void)to;
209 	return pmap_in_ppl();
210 #endif
211 }
212 
213 bool
hw_spin_should_keep_spinning(void * lock,hw_spin_policy_t pol,hw_spin_timeout_t to,hw_spin_state_t * state)214 hw_spin_should_keep_spinning(
215 	void                   *lock,
216 	hw_spin_policy_t        pol,
217 	hw_spin_timeout_t       to,
218 	hw_spin_state_t        *state)
219 {
220 	hw_spin_timeout_status_t rc;
221 #if SCHED_HYGIENE_DEBUG
222 	uint64_t irq_time = 0;
223 #endif
224 	uint64_t now;
225 
226 	if (__improbable(to.hwst_timeout == 0)) {
227 		return true;
228 	}
229 
230 	now = ml_get_timebase();
231 	if (__probable(now < state->hwss_deadline)) {
232 		/* keep spinning */
233 		return true;
234 	}
235 
236 #if SCHED_HYGIENE_DEBUG
237 	if (to.hwst_interruptible) {
238 		irq_time = current_thread()->machine.int_time_mt;
239 	}
240 #endif /* SCHED_HYGIENE_DEBUG */
241 
242 	if (__probable(state->hwss_deadline == 0)) {
243 		state->hwss_start     = now;
244 		state->hwss_deadline  = now + to.hwst_timeout;
245 #if SCHED_HYGIENE_DEBUG
246 		state->hwss_irq_start = irq_time;
247 #endif
248 		return true;
249 	}
250 
251 	/*
252 	 * Update fields that the callback needs
253 	 */
254 	state->hwss_now     = now;
255 #if SCHED_HYGIENE_DEBUG
256 	state->hwss_irq_end = irq_time;
257 #endif /* SCHED_HYGIENE_DEBUG */
258 
259 	rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
260 	    to, *state);
261 	if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
262 		/* push the deadline */
263 		state->hwss_deadline += to.hwst_timeout;
264 	}
265 	return rc == HW_LOCK_TIMEOUT_CONTINUE;
266 }
267 
268 __attribute__((always_inline))
269 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)270 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
271 {
272 #if DEBUG || DEVELOPMENT
273 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
274 #else
275 	(void)owner;
276 #endif
277 }
278 
279 __attribute__((always_inline))
280 void
lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)281 lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
282 {
283 #if DEBUG || DEVELOPMENT
284 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
285 	    (uintptr_t)ctid_get_thread_unsafe(ctid);
286 #else
287 	(void)ctid;
288 #endif
289 }
290 
291 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)292 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
293 {
294 	lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
295 
296 	if (owner < (1u << CTID_SIZE_BIT)) {
297 		owner = (uintptr_t)ctid_get_thread_unsafe((uint32_t)owner);
298 	} else {
299 		/* strip possible bits used by the lock implementations */
300 		owner &= ~0x7ul;
301 	}
302 
303 	lsti->lock = lck;
304 	lsti->owner_thread_cur = owner;
305 	lsti->owner_cpu = ~0u;
306 	os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
307 
308 	if (owner == 0) {
309 		/* if the owner isn't known, just bail */
310 		goto out;
311 	}
312 
313 	for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
314 		cpu_data_t *data = cpu_datap(i);
315 		if (data && (uintptr_t)data->cpu_active_thread == owner) {
316 			lsti->owner_cpu = i;
317 			os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
318 #if __x86_64__
319 			if ((uint32_t)cpu_number() != i) {
320 				/* Cause NMI and panic on the owner's cpu */
321 				NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
322 			}
323 #endif
324 			break;
325 		}
326 	}
327 
328 out:
329 	return lsti;
330 }
331 
332 #pragma mark - HW locks
333 
334 /*
335  * Routine:	hw_lock_init
336  *
337  *	Initialize a hardware lock.
338  */
339 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)340 hw_lock_init(hw_lock_t lock)
341 {
342 	ordered_store_hw(lock, 0);
343 }
344 
345 __result_use_check
346 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)347 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
348 {
349 #if OS_ATOMIC_USE_LLSC
350 	uintptr_t oldval;
351 	os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
352 		if (oldval != 0) {
353 		        wait_for_event(); // clears the monitor so we don't need give_up()
354 		        return false;
355 		}
356 	});
357 	return true;
358 #else // !OS_ATOMIC_USE_LLSC
359 #if OS_ATOMIC_HAS_LLSC
360 	uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
361 	if (oldval != 0) {
362 		wait_for_event(); // clears the monitor so we don't need give_up()
363 		return false;
364 	}
365 #endif
366 	return lock_cmpxchg(&lock->lock_data, 0, newval, acquire);
367 #endif // !OS_ATOMIC_USE_LLSC
368 }
369 
370 __result_use_check
371 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)372 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
373 {
374 	uint32_t mask = 1u << bit;
375 
376 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
377 	uint32_t oldval, newval;
378 	os_atomic_rmw_loop(target, oldval, newval, acquire, {
379 		newval = oldval | mask;
380 		if (__improbable(oldval & mask)) {
381 #if OS_ATOMIC_HAS_LLSC
382 		        if (wait) {
383 		                wait_for_event(); // clears the monitor so we don't need give_up()
384 			} else {
385 		                os_atomic_clear_exclusive();
386 			}
387 #else
388 		        if (wait) {
389 		                cpu_pause();
390 			}
391 #endif
392 		        return false;
393 		}
394 	});
395 	return true;
396 #else
397 	uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
398 	if (__improbable(oldval & mask)) {
399 		if (wait) {
400 			wait_for_event(); // clears the monitor so we don't need give_up()
401 		} else {
402 			os_atomic_clear_exclusive();
403 		}
404 		return false;
405 	}
406 	return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
407 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
408 }
409 
410 static hw_spin_timeout_status_t
hw_spin_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)411 hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
412 {
413 	hw_lock_t lock  = _lock;
414 	uintptr_t owner = lock->lock_data & ~0x7ul;
415 	lck_spinlock_to_info_t lsti;
416 
417 	if (!spinlock_timeout_panic) {
418 		/* keep spinning rather than panicing */
419 		return HW_LOCK_TIMEOUT_CONTINUE;
420 	}
421 
422 	if (pmap_in_ppl()) {
423 		/*
424 		 * This code is used by the PPL and can't write to globals.
425 		 */
426 		panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
427 		    "current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
428 		    lock, HW_SPIN_TIMEOUT_ARG(to, st),
429 		    (void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
430 	}
431 
432 	// Capture the actual time spent blocked, which may be higher than the timeout
433 	// if a misbehaving interrupt stole this thread's CPU time.
434 	lsti = lck_spinlock_timeout_hit(lock, owner);
435 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
436 	    "current owner: %p (on cpu %d), "
437 #if DEBUG || DEVELOPMENT
438 	    "initial owner: %p, "
439 #endif /* DEBUG || DEVELOPMENT */
440 	    HW_SPIN_TIMEOUT_DETAILS_FMT,
441 	    lock, HW_SPIN_TIMEOUT_ARG(to, st),
442 	    (void *)lsti->owner_thread_cur, lsti->owner_cpu,
443 #if DEBUG || DEVELOPMENT
444 	    (void *)lsti->owner_thread_orig,
445 #endif /* DEBUG || DEVELOPMENT */
446 	    HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
447 }
448 
449 const struct hw_spin_policy hw_lock_spin_policy = {
450 	.hwsp_name              = "hw_lock_t",
451 	.hwsp_timeout_atomic    = &lock_panic_timeout,
452 	.hwsp_op_timeout        = hw_spin_timeout_panic,
453 };
454 
455 static hw_spin_timeout_status_t
hw_spin_always_return(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)456 hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
457 {
458 #pragma unused(_lock, to, st)
459 	return HW_LOCK_TIMEOUT_RETURN;
460 }
461 
462 const struct hw_spin_policy hw_lock_spin_panic_policy = {
463 	.hwsp_name              = "hw_lock_t[panic]",
464 #if defined(__x86_64__)
465 	.hwsp_timeout           = &LockTimeOutTSC,
466 #else
467 	.hwsp_timeout_atomic    = &LockTimeOut,
468 #endif
469 	.hwsp_timeout_shift     = 2,
470 	.hwsp_op_timeout        = hw_spin_always_return,
471 };
472 
473 #if DEBUG || DEVELOPMENT
474 static machine_timeout_t hw_lock_test_to;
475 const struct hw_spin_policy hw_lock_test_give_up_policy = {
476 	.hwsp_name              = "testing policy",
477 #if defined(__x86_64__)
478 	.hwsp_timeout           = &LockTimeOutTSC,
479 #else
480 	.hwsp_timeout_atomic    = &LockTimeOut,
481 #endif
482 	.hwsp_timeout_shift     = 2,
483 	.hwsp_op_timeout        = hw_spin_always_return,
484 };
485 
486 __startup_func
487 static void
hw_lock_test_to_init(void)488 hw_lock_test_to_init(void)
489 {
490 	uint64_t timeout;
491 
492 	nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &timeout);
493 #if defined(__x86_64__)
494 	timeout = tmrCvt(timeout, tscFCvtn2t);
495 #endif
496 	os_atomic_init(&hw_lock_test_to, timeout);
497 }
498 STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
499 #endif
500 
501 static hw_spin_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)502 hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
503 {
504 	hw_lock_bit_t *lock = _lock;
505 
506 	if (!spinlock_timeout_panic) {
507 		/* keep spinning rather than panicing */
508 		return HW_LOCK_TIMEOUT_CONTINUE;
509 	}
510 
511 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
512 	    "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
513 	    lock, HW_SPIN_TIMEOUT_ARG(to, st),
514 	    *lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
515 }
516 
517 const struct hw_spin_policy hw_lock_bit_policy = {
518 	.hwsp_name              = "hw_lock_bit_t",
519 	.hwsp_timeout_atomic    = &lock_panic_timeout,
520 	.hwsp_op_timeout        = hw_lock_bit_timeout_panic,
521 };
522 
523 #if __arm64__
524 const uint64_t hw_lock_bit_timeout_2s = 0x3000000;
525 const struct hw_spin_policy hw_lock_bit_policy_2s = {
526 	.hwsp_name              = "hw_lock_bit_t",
527 	.hwsp_timeout           = &hw_lock_bit_timeout_2s,
528 	.hwsp_op_timeout        = hw_lock_bit_timeout_panic,
529 };
530 #endif
531 
532 /*
533  *	Routine: hw_lock_lock_contended
534  *
535  *	Spin until lock is acquired or timeout expires.
536  *	timeout is in mach_absolute_time ticks. Called with
537  *	preemption disabled.
538  */
539 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,uintptr_t data,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))540 hw_lock_lock_contended(
541 	hw_lock_t               lock,
542 	uintptr_t               data,
543 	hw_spin_policy_t        pol
544 	LCK_GRP_ARG(lck_grp_t *grp))
545 {
546 	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
547 	hw_spin_state_t   state = { };
548 	hw_lock_status_t  rc = HW_LOCK_CONTENDED;
549 
550 	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
551 	    HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
552 		panic("hwlock: thread %p is trying to lock %p recursively",
553 		    HW_LOCK_STATE_TO_THREAD(data), lock);
554 	}
555 
556 #if CONFIG_DTRACE || LOCK_STATS
557 	uint64_t begin = 0;
558 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
559 
560 	if (__improbable(stat_enabled)) {
561 		begin = mach_absolute_time();
562 	}
563 #endif /* CONFIG_DTRACE || LOCK_STATS */
564 
565 	if (!hw_spin_in_ppl(to)) {
566 		/*
567 		 * This code is used by the PPL and can't write to globals.
568 		 */
569 		lck_spinlock_timeout_set_orig_owner(lock->lock_data);
570 	}
571 
572 	do {
573 		for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
574 			cpu_pause();
575 			if (hw_lock_trylock_contended(lock, data)) {
576 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
577 				rc = HW_LOCK_ACQUIRED;
578 				goto end;
579 			}
580 		}
581 	} while (hw_spin_should_keep_spinning(lock, pol, to, &state));
582 
583 end:
584 #if CONFIG_DTRACE || LOCK_STATS
585 	if (__improbable(stat_enabled)) {
586 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
587 		    mach_absolute_time() - begin);
588 	}
589 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
590 #endif /* CONFIG_DTRACE || LOCK_STATS */
591 	return rc;
592 }
593 
594 static hw_spin_timeout_status_t
hw_wait_while_equals32_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)595 hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
596 {
597 	uint32_t *address = _lock;
598 
599 	if (!spinlock_timeout_panic) {
600 		/* keep spinning rather than panicing */
601 		return HW_LOCK_TIMEOUT_CONTINUE;
602 	}
603 
604 	panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
605 	    "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
606 	    address, HW_SPIN_TIMEOUT_ARG(to, st),
607 	    *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
608 }
609 
610 static const struct hw_spin_policy hw_wait_while_equals32_policy = {
611 	.hwsp_name              = "hw_wait_while_equals32",
612 	.hwsp_timeout_atomic    = &lock_panic_timeout,
613 	.hwsp_op_timeout        = hw_wait_while_equals32_panic,
614 };
615 
616 static hw_spin_timeout_status_t
hw_wait_while_equals64_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)617 hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
618 {
619 	uint64_t *address = _lock;
620 
621 	if (!spinlock_timeout_panic) {
622 		/* keep spinning rather than panicing */
623 		return HW_LOCK_TIMEOUT_CONTINUE;
624 	}
625 
626 	panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
627 	    "current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
628 	    address, HW_SPIN_TIMEOUT_ARG(to, st),
629 	    *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
630 }
631 
632 static const struct hw_spin_policy hw_wait_while_equals64_policy = {
633 	.hwsp_name              = "hw_wait_while_equals64",
634 	.hwsp_timeout_atomic    = &lock_panic_timeout,
635 	.hwsp_op_timeout        = hw_wait_while_equals64_panic,
636 };
637 
638 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)639 hw_wait_while_equals32(uint32_t *address, uint32_t current)
640 {
641 	hw_spin_policy_t  pol   = &hw_wait_while_equals32_policy;
642 	hw_spin_timeout_t to    = hw_spin_compute_timeout(pol);
643 	hw_spin_state_t   state = { };
644 	uint32_t          v;
645 
646 	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
647 		hw_spin_should_keep_spinning(address, pol, to, &state);
648 	}
649 
650 	return v;
651 }
652 
653 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)654 hw_wait_while_equals64(uint64_t *address, uint64_t current)
655 {
656 	hw_spin_policy_t  pol   = &hw_wait_while_equals64_policy;
657 	hw_spin_timeout_t to    = hw_spin_compute_timeout(pol);
658 	hw_spin_state_t   state = { };
659 	uint64_t          v;
660 
661 	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
662 		hw_spin_should_keep_spinning(address, pol, to, &state);
663 	}
664 
665 	return v;
666 }
667 
668 __result_use_check
669 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))670 hw_lock_to_internal(
671 	hw_lock_t               lock,
672 	thread_t                thread,
673 	hw_spin_policy_t        pol
674 	LCK_GRP_ARG(lck_grp_t *grp))
675 {
676 	uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
677 
678 	if (__probable(hw_lock_trylock_contended(lock, state))) {
679 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
680 		return HW_LOCK_ACQUIRED;
681 	}
682 
683 	return hw_lock_lock_contended(lock, state, pol LCK_GRP_ARG(grp));
684 }
685 
686 /*
687  *	Routine: hw_lock_lock
688  *
689  *	Acquire lock, spinning until it becomes available,
690  *	return with preemption disabled.
691  */
692 void
693 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
694 {
695 	thread_t thread = current_thread();
696 	lock_disable_preemption_for_thread(thread);
697 	(void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
698 	    LCK_GRP_ARG(grp));
699 }
700 
701 /*
702  *	Routine: hw_lock_lock_nopreempt
703  *
704  *	Acquire lock, spinning until it becomes available.
705  */
706 void
707 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
708 {
709 	thread_t thread = current_thread();
710 	__lck_require_preemption_disabled(lock, thread);
711 	(void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
712 	    LCK_GRP_ARG(grp));
713 }
714 
715 /*
716  *	Routine: hw_lock_to
717  *
718  *	Acquire lock, spinning until it becomes available or timeout.
719  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
720  *	preemption disabled.
721  */
722 unsigned
723 int
724 (hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
725 {
726 	thread_t thread = current_thread();
727 	lock_disable_preemption_for_thread(thread);
728 	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
729 }
730 
731 /*
732  *	Routine: hw_lock_to_nopreempt
733  *
734  *	Acquire lock, spinning until it becomes available or timeout.
735  *	Timeout is in mach_absolute_time ticks, called and return with
736  *	preemption disabled.
737  */
738 unsigned
739 int
740 (hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
741 {
742 	thread_t thread = current_thread();
743 	__lck_require_preemption_disabled(lock, thread);
744 	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
745 }
746 
747 __result_use_check
748 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))749 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
750 {
751 	if (__probable(lock_cmpxchg(&lock->lock_data, 0,
752 	    HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
753 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
754 		return true;
755 	}
756 	return false;
757 }
758 
759 /*
760  *	Routine: hw_lock_try
761  *
762  *	returns with preemption disabled on success.
763  */
764 unsigned
765 int
766 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
767 {
768 	thread_t thread = current_thread();
769 	lock_disable_preemption_for_thread(thread);
770 	unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
771 	if (!success) {
772 		lock_enable_preemption();
773 	}
774 	return success;
775 }
776 
777 unsigned
778 int
779 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
780 {
781 	thread_t thread = current_thread();
782 	__lck_require_preemption_disabled(lock, thread);
783 	return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
784 }
785 
786 #if DEBUG || DEVELOPMENT
787 __abortlike
788 static void
__hw_lock_unlock_unowned_panic(hw_lock_t lock)789 __hw_lock_unlock_unowned_panic(hw_lock_t lock)
790 {
791 	panic("hwlock: thread %p is trying to lock %p recursively",
792 	    current_thread(), lock);
793 }
794 #endif /* DEBUG || DEVELOPMENT */
795 
796 /*
797  *	Routine: hw_lock_unlock
798  *
799  *	Unconditionally release lock, release preemption level.
800  */
801 static inline void
hw_lock_unlock_internal(hw_lock_t lock)802 hw_lock_unlock_internal(hw_lock_t lock)
803 {
804 #if DEBUG || DEVELOPMENT
805 	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
806 	    LOCK_CORRECTNESS_PANIC()) {
807 		__hw_lock_unlock_unowned_panic(lock);
808 	}
809 #endif /* DEBUG || DEVELOPMENT */
810 
811 	os_atomic_store(&lock->lock_data, 0, release);
812 #if     CONFIG_DTRACE
813 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
814 #endif /* CONFIG_DTRACE */
815 }
816 
817 void
818 (hw_lock_unlock)(hw_lock_t lock)
819 {
820 	hw_lock_unlock_internal(lock);
821 	lock_enable_preemption();
822 }
823 
824 void
825 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
826 {
827 	hw_lock_unlock_internal(lock);
828 }
829 
830 void
hw_lock_assert(__assert_only hw_lock_t lock,__assert_only unsigned int type)831 hw_lock_assert(__assert_only hw_lock_t lock, __assert_only unsigned int type)
832 {
833 #if MACH_ASSERT
834 	thread_t thread, holder;
835 
836 	holder = HW_LOCK_STATE_TO_THREAD(lock->lock_data);
837 	thread = current_thread();
838 
839 	if (type == LCK_ASSERT_OWNED) {
840 		if (holder == 0) {
841 			panic("Lock not owned %p = %p", lock, holder);
842 		}
843 		if (holder != thread) {
844 			panic("Lock not owned by current thread %p = %p", lock, holder);
845 		}
846 	} else if (type == LCK_ASSERT_NOTOWNED) {
847 		if (holder != THREAD_NULL && holder == thread) {
848 			panic("Lock owned by current thread %p = %p", lock, holder);
849 		}
850 	} else {
851 		panic("hw_lock_assert(): invalid arg (%u)", type);
852 	}
853 #endif /* MACH_ASSERT */
854 }
855 
856 /*
857  *	Routine hw_lock_held, doesn't change preemption state.
858  *	N.B.  Racy, of course.
859  */
860 unsigned int
hw_lock_held(hw_lock_t lock)861 hw_lock_held(hw_lock_t lock)
862 {
863 	return ordered_load_hw(lock) != 0;
864 }
865 
866 static hw_lock_status_t NOINLINE
867 hw_lock_bit_to_contended(
868 	hw_lock_bit_t          *lock,
869 	uint32_t                bit,
870 	hw_spin_policy_t        pol,
871 	bool (^lock_pause)(void)
872 	LCK_GRP_ARG(lck_grp_t *grp))
873 {
874 	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
875 	hw_spin_state_t   state = { };
876 	hw_lock_status_t  rc = HW_LOCK_CONTENDED;
877 
878 #if CONFIG_DTRACE || LOCK_STATS
879 	uint64_t begin = 0;
880 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
881 
882 	if (__improbable(stat_enabled)) {
883 		begin = mach_absolute_time();
884 	}
885 #endif /* LOCK_STATS || CONFIG_DTRACE */
886 
887 	do {
888 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
889 			rc = (hw_lock_trylock_bit(lock, bit, true) ? HW_LOCK_ACQUIRED : HW_LOCK_CONTENDED);
890 
891 			if (rc == HW_LOCK_ACQUIRED) {
892 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
893 				goto end;
894 			}
895 
896 			if (__improbable(lock_pause && lock_pause())) {
897 				goto end;
898 			}
899 		}
900 
901 		assert(rc == HW_LOCK_CONTENDED);
902 	} while (hw_spin_should_keep_spinning(lock, pol, to, &state));
903 
904 end:
905 #if CONFIG_DTRACE || LOCK_STATS
906 	if (__improbable(stat_enabled)) {
907 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
908 		    mach_absolute_time() - begin);
909 	}
910 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
911 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
912 	return rc;
913 }
914 
915 __result_use_check
916 static inline hw_lock_status_t
917 hw_lock_bit_to_internal(
918 	hw_lock_bit_t          *lock,
919 	unsigned int            bit,
920 	hw_spin_policy_t        pol,
921 	bool (^lock_pause)(void)
922 	LCK_GRP_ARG(lck_grp_t *grp))
923 {
924 	if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
925 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
926 		return HW_LOCK_ACQUIRED;
927 	}
928 
929 	return hw_lock_bit_to_contended(lock, bit, pol, lock_pause LCK_GRP_ARG(grp));
930 }
931 
932 /*
933  *	Routine: hw_lock_bit_to
934  *
935  *	Acquire bit lock, spinning until it becomes available or timeout.
936  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
937  *	preemption disabled.
938  */
939 unsigned
940 int
941 (hw_lock_bit_to)(
942 	hw_lock_bit_t          * lock,
943 	uint32_t                bit,
944 	hw_spin_policy_t        pol
945 	LCK_GRP_ARG(lck_grp_t *grp))
946 {
947 	_disable_preemption();
948 	return (unsigned int)hw_lock_bit_to_internal(lock, bit, pol, NULL LCK_GRP_ARG(grp));
949 }
950 
951 /*
952  *	Routine: hw_lock_bit
953  *
954  *	Acquire bit lock, spinning until it becomes available,
955  *	return with preemption disabled.
956  */
957 void
958 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
959 {
960 	_disable_preemption();
961 	(void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy, NULL LCK_GRP_ARG(grp));
962 }
963 
964 /*
965  *	Routine: hw_lock_bit_nopreempt
966  *
967  *	Acquire bit lock with preemption already disabled, spinning until it becomes available.
968  */
969 void
970 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
971 {
972 	__lck_require_preemption_disabled(lock, current_thread());
973 	(void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy, NULL LCK_GRP_ARG(grp));
974 }
975 
976 /*
977  *	Routine: hw_lock_bit_to_b
978  *
979  *	Acquire bit lock, spinning until it becomes available, times out,
980  *      or the supplied lock_pause callout returns true.
981  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
982  *	preemption disabled iff the lock is successfully acquired.
983  */
hw_lock_status_t(hw_lock_bit_to_b)984 hw_lock_status_t
985 (hw_lock_bit_to_b)(
986 	hw_lock_bit_t          * lock,
987 	uint32_t                bit,
988 	hw_spin_policy_t        pol,
989 	bool (^lock_pause) (void)
990 	LCK_GRP_ARG(lck_grp_t * grp))
991 {
992 	_disable_preemption();
993 	hw_lock_status_t ret = hw_lock_bit_to_internal(lock, bit, pol, lock_pause LCK_GRP_ARG(grp));
994 	if (ret != HW_LOCK_ACQUIRED) {
995 		lock_enable_preemption();
996 	}
997 	return ret;
998 }
999 
1000 
1001 bool
1002 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
1003 {
1004 	bool success = false;
1005 
1006 	_disable_preemption();
1007 	success = hw_lock_trylock_bit(lock, bit, false);
1008 	if (!success) {
1009 		lock_enable_preemption();
1010 	}
1011 
1012 	if (success) {
1013 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
1014 	}
1015 
1016 	return success;
1017 }
1018 
1019 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)1020 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
1021 {
1022 	os_atomic_andnot(lock, 1u << bit, release);
1023 #if CONFIG_DTRACE
1024 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
1025 #endif
1026 }
1027 
1028 /*
1029  *	Routine:	hw_unlock_bit
1030  *
1031  *		Release spin-lock. The second parameter is the bit number to test and set.
1032  *		Decrement the preemption level.
1033  */
1034 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)1035 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1036 {
1037 	hw_unlock_bit_internal(lock, bit);
1038 	lock_enable_preemption();
1039 }
1040 
1041 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)1042 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1043 {
1044 	__lck_require_preemption_disabled(lock, current_thread());
1045 	hw_unlock_bit_internal(lock, bit);
1046 }
1047 
1048 
1049 #pragma mark - lck_*_sleep
1050 
1051 /*
1052  * Routine:	lck_spin_sleep
1053  */
1054 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1055 lck_spin_sleep_grp(
1056 	lck_spin_t              *lck,
1057 	lck_sleep_action_t      lck_sleep_action,
1058 	event_t                 event,
1059 	wait_interrupt_t        interruptible,
1060 	lck_grp_t               *grp)
1061 {
1062 	wait_result_t   res;
1063 
1064 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1065 		panic("Invalid lock sleep action %x", lck_sleep_action);
1066 	}
1067 
1068 	res = assert_wait(event, interruptible);
1069 	if (res == THREAD_WAITING) {
1070 		lck_spin_unlock(lck);
1071 		res = thread_block(THREAD_CONTINUE_NULL);
1072 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1073 			lck_spin_lock_grp(lck, grp);
1074 		}
1075 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1076 		lck_spin_unlock(lck);
1077 	}
1078 
1079 	return res;
1080 }
1081 
1082 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1083 lck_spin_sleep(
1084 	lck_spin_t              *lck,
1085 	lck_sleep_action_t      lck_sleep_action,
1086 	event_t                 event,
1087 	wait_interrupt_t        interruptible)
1088 {
1089 	return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1090 }
1091 
1092 /*
1093  * Routine:	lck_spin_sleep_deadline
1094  */
1095 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1096 lck_spin_sleep_deadline(
1097 	lck_spin_t              *lck,
1098 	lck_sleep_action_t      lck_sleep_action,
1099 	event_t                 event,
1100 	wait_interrupt_t        interruptible,
1101 	uint64_t                deadline)
1102 {
1103 	wait_result_t   res;
1104 
1105 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1106 		panic("Invalid lock sleep action %x", lck_sleep_action);
1107 	}
1108 
1109 	res = assert_wait_deadline(event, interruptible, deadline);
1110 	if (res == THREAD_WAITING) {
1111 		lck_spin_unlock(lck);
1112 		res = thread_block(THREAD_CONTINUE_NULL);
1113 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1114 			lck_spin_lock(lck);
1115 		}
1116 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1117 		lck_spin_unlock(lck);
1118 	}
1119 
1120 	return res;
1121 }
1122 
1123 /*
1124  * Routine:	lck_mtx_sleep
1125  */
1126 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1127 lck_mtx_sleep(
1128 	lck_mtx_t               *lck,
1129 	lck_sleep_action_t      lck_sleep_action,
1130 	event_t                 event,
1131 	wait_interrupt_t        interruptible)
1132 {
1133 	wait_result_t           res;
1134 	thread_pri_floor_t      token;
1135 
1136 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1137 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1138 
1139 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1140 		panic("Invalid lock sleep action %x", lck_sleep_action);
1141 	}
1142 
1143 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1144 		/*
1145 		 * We get a priority floor
1146 		 * during the time that this thread is asleep, so that when it
1147 		 * is re-awakened (and not yet contending on the mutex), it is
1148 		 * runnable at a reasonably high priority.
1149 		 */
1150 		token = thread_priority_floor_start();
1151 	}
1152 
1153 	res = assert_wait(event, interruptible);
1154 	if (res == THREAD_WAITING) {
1155 		lck_mtx_unlock(lck);
1156 		res = thread_block(THREAD_CONTINUE_NULL);
1157 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1158 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1159 				lck_mtx_lock_spin(lck);
1160 			} else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1161 				lck_mtx_lock_spin_always(lck);
1162 			} else {
1163 				lck_mtx_lock(lck);
1164 			}
1165 		}
1166 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1167 		lck_mtx_unlock(lck);
1168 	}
1169 
1170 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1171 		thread_priority_floor_end(&token);
1172 	}
1173 
1174 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1175 
1176 	return res;
1177 }
1178 
1179 
1180 /*
1181  * Routine:	lck_mtx_sleep_deadline
1182  */
1183 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1184 lck_mtx_sleep_deadline(
1185 	lck_mtx_t               *lck,
1186 	lck_sleep_action_t      lck_sleep_action,
1187 	event_t                 event,
1188 	wait_interrupt_t        interruptible,
1189 	uint64_t                deadline)
1190 {
1191 	wait_result_t           res;
1192 	thread_pri_floor_t      token;
1193 
1194 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1195 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1196 
1197 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1198 		panic("Invalid lock sleep action %x", lck_sleep_action);
1199 	}
1200 
1201 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1202 		/*
1203 		 * See lck_mtx_sleep().
1204 		 */
1205 		token = thread_priority_floor_start();
1206 	}
1207 
1208 	res = assert_wait_deadline(event, interruptible, deadline);
1209 	if (res == THREAD_WAITING) {
1210 		lck_mtx_unlock(lck);
1211 		res = thread_block(THREAD_CONTINUE_NULL);
1212 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1213 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1214 				lck_mtx_lock_spin(lck);
1215 			} else {
1216 				lck_mtx_lock(lck);
1217 			}
1218 		}
1219 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1220 		lck_mtx_unlock(lck);
1221 	}
1222 
1223 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1224 		thread_priority_floor_end(&token);
1225 	}
1226 
1227 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1228 
1229 	return res;
1230 }
1231 
1232 /*
1233  * sleep_with_inheritor and wakeup_with_inheritor KPI
1234  *
1235  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1236  * the latest thread specified as inheritor.
1237  *
1238  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1239  * direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1240  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1241  *
1242  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1243  *
1244  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1245  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1246  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1247  * invoking any turnstile operation.
1248  *
1249  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1250  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1251  * is instantiated for this KPI to manage the hash without interrupt disabled.
1252  * Also:
1253  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1254  * - every event will have a dedicated wait_queue.
1255  *
1256  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1257  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1258  */
1259 
1260 
1261 typedef enum {
1262 	LCK_WAKEUP_THREAD,
1263 	LCK_WAKEUP_ONE,
1264 	LCK_WAKEUP_ALL
1265 } lck_wakeup_type_t;
1266 
1267 static kern_return_t
wakeup_with_inheritor_and_turnstile(event_t event,wait_result_t result,lck_wakeup_type_t wake_type,lck_wake_action_t action,thread_t * thread_wokenup)1268 wakeup_with_inheritor_and_turnstile(
1269 	event_t                 event,
1270 	wait_result_t           result,
1271 	lck_wakeup_type_t       wake_type,
1272 	lck_wake_action_t       action,
1273 	thread_t               *thread_wokenup)
1274 {
1275 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1276 	uint32_t index;
1277 	struct turnstile *ts = NULL;
1278 	kern_return_t ret = KERN_NOT_WAITING;
1279 
1280 	/*
1281 	 * the hash bucket spinlock is used as turnstile interlock
1282 	 */
1283 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1284 
1285 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1286 
1287 	switch (wake_type) {
1288 	case LCK_WAKEUP_ONE: {
1289 		waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1290 
1291 		if (action == LCK_WAKE_DEFAULT) {
1292 			flags = WAITQ_UPDATE_INHERITOR;
1293 		} else {
1294 			assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1295 		}
1296 
1297 		/*
1298 		 * WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1299 		 * if it finds a thread
1300 		 */
1301 		if (thread_wokenup) {
1302 			thread_t wokeup;
1303 
1304 			wokeup = waitq_wakeup64_identify(&ts->ts_waitq,
1305 			    CAST_EVENT64_T(event), result, flags);
1306 			*thread_wokenup = wokeup;
1307 			ret = wokeup ? KERN_SUCCESS : KERN_NOT_WAITING;
1308 		} else {
1309 			ret = waitq_wakeup64_one(&ts->ts_waitq,
1310 			    CAST_EVENT64_T(event), result, flags);
1311 		}
1312 		if (ret == KERN_SUCCESS && action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1313 			goto complete;
1314 		}
1315 		if (ret == KERN_NOT_WAITING) {
1316 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
1317 			    TURNSTILE_IMMEDIATE_UPDATE);
1318 		}
1319 		break;
1320 	}
1321 	case LCK_WAKEUP_ALL: {
1322 		ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event),
1323 		    result, WAITQ_UPDATE_INHERITOR);
1324 		break;
1325 	}
1326 	case LCK_WAKEUP_THREAD: {
1327 		assert(thread_wokenup);
1328 		ret = waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T(event),
1329 		    *thread_wokenup, result);
1330 		break;
1331 	}
1332 	}
1333 
1334 	/*
1335 	 * turnstile_update_inheritor_complete could be called while holding the interlock.
1336 	 * In this case the new inheritor or is null, or is a thread that is just been woken up
1337 	 * and have not blocked because it is racing with the same interlock used here
1338 	 * after the wait.
1339 	 * So there is no chain to update for the new inheritor.
1340 	 *
1341 	 * However unless the current thread is the old inheritor,
1342 	 * old inheritor can be blocked and requires a chain update.
1343 	 *
1344 	 * The chain should be short because kernel turnstiles cannot have user turnstiles
1345 	 * chained after them.
1346 	 *
1347 	 * We can anyway optimize this by asking turnstile to tell us
1348 	 * if old inheritor needs an update and drop the lock
1349 	 * just in that case.
1350 	 */
1351 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1352 
1353 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1354 
1355 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1356 
1357 complete:
1358 	turnstile_complete_hash((uintptr_t)event, type);
1359 
1360 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1361 
1362 	turnstile_cleanup();
1363 
1364 	return ret;
1365 }
1366 
1367 static wait_result_t
1368 sleep_with_inheritor_and_turnstile(
1369 	event_t                 event,
1370 	thread_t                inheritor,
1371 	wait_interrupt_t        interruptible,
1372 	uint64_t                deadline,
1373 	void                  (^primitive_lock)(void),
1374 	void                  (^primitive_unlock)(void))
1375 {
1376 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1377 	wait_result_t ret;
1378 	uint32_t index;
1379 	struct turnstile *ts = NULL;
1380 
1381 	/*
1382 	 * the hash bucket spinlock is used as turnstile interlock,
1383 	 * lock it before releasing the primitive lock
1384 	 */
1385 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1386 
1387 	primitive_unlock();
1388 
1389 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1390 
1391 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1392 	/*
1393 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1394 	 * waitq_assert_wait64 after.
1395 	 */
1396 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1397 
1398 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1399 
1400 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1401 
1402 	/*
1403 	 * Update new and old inheritor chains outside the interlock;
1404 	 */
1405 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1406 
1407 	if (ret == THREAD_WAITING) {
1408 		ret = thread_block(THREAD_CONTINUE_NULL);
1409 	}
1410 
1411 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1412 
1413 	turnstile_complete_hash((uintptr_t)event, type);
1414 
1415 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1416 
1417 	turnstile_cleanup();
1418 
1419 	primitive_lock();
1420 
1421 	return ret;
1422 }
1423 
1424 /*
1425  * change_sleep_inheritor is independent from the locking primitive.
1426  */
1427 
1428 /*
1429  * Name: change_sleep_inheritor
1430  *
1431  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1432  *
1433  * Args:
1434  *   Arg1: event to redirect the push.
1435  *   Arg2: new inheritor for event.
1436  *
1437  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1438  *
1439  * Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1440  *             wakeup for the event is called.
1441  *             NOTE: this cannot be called from interrupt context.
1442  */
1443 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1444 change_sleep_inheritor(event_t event, thread_t inheritor)
1445 {
1446 	uint32_t index;
1447 	struct turnstile *ts = NULL;
1448 	kern_return_t ret =  KERN_SUCCESS;
1449 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1450 
1451 	/*
1452 	 * the hash bucket spinlock is used as turnstile interlock
1453 	 */
1454 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1455 
1456 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1457 
1458 	if (!turnstile_has_waiters(ts)) {
1459 		ret = KERN_NOT_WAITING;
1460 	}
1461 
1462 	/*
1463 	 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1464 	 */
1465 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1466 
1467 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1468 
1469 	/*
1470 	 * update the chains outside the interlock
1471 	 */
1472 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1473 
1474 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1475 
1476 	turnstile_complete_hash((uintptr_t)event, type);
1477 
1478 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1479 
1480 	turnstile_cleanup();
1481 
1482 	return ret;
1483 }
1484 
1485 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1486 lck_spin_sleep_with_inheritor(
1487 	lck_spin_t *lock,
1488 	lck_sleep_action_t lck_sleep_action,
1489 	event_t event,
1490 	thread_t inheritor,
1491 	wait_interrupt_t interruptible,
1492 	uint64_t deadline)
1493 {
1494 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1495 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1496 		           interruptible, deadline,
1497 		           ^{}, ^{ lck_spin_unlock(lock); });
1498 	} else {
1499 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1500 		           interruptible, deadline,
1501 		           ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1502 	}
1503 }
1504 
1505 wait_result_t
hw_lck_ticket_sleep_with_inheritor(hw_lck_ticket_t * lock,lck_grp_t * grp __unused,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1506 hw_lck_ticket_sleep_with_inheritor(
1507 	hw_lck_ticket_t *lock,
1508 	lck_grp_t *grp __unused,
1509 	lck_sleep_action_t lck_sleep_action,
1510 	event_t event,
1511 	thread_t inheritor,
1512 	wait_interrupt_t interruptible,
1513 	uint64_t deadline)
1514 {
1515 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1516 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1517 		           interruptible, deadline,
1518 		           ^{}, ^{ hw_lck_ticket_unlock(lock); });
1519 	} else {
1520 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1521 		           interruptible, deadline,
1522 		           ^{ hw_lck_ticket_lock(lock, grp); }, ^{ hw_lck_ticket_unlock(lock); });
1523 	}
1524 }
1525 
1526 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1527 lck_ticket_sleep_with_inheritor(
1528 	lck_ticket_t *lock,
1529 	lck_grp_t *grp,
1530 	lck_sleep_action_t lck_sleep_action,
1531 	event_t event,
1532 	thread_t inheritor,
1533 	wait_interrupt_t interruptible,
1534 	uint64_t deadline)
1535 {
1536 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1537 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1538 		           interruptible, deadline,
1539 		           ^{}, ^{ lck_ticket_unlock(lock); });
1540 	} else {
1541 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1542 		           interruptible, deadline,
1543 		           ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1544 	}
1545 }
1546 
1547 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1548 lck_mtx_sleep_with_inheritor(
1549 	lck_mtx_t              *lock,
1550 	lck_sleep_action_t      lck_sleep_action,
1551 	event_t                 event,
1552 	thread_t                inheritor,
1553 	wait_interrupt_t        interruptible,
1554 	uint64_t                deadline)
1555 {
1556 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1557 
1558 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1559 		return sleep_with_inheritor_and_turnstile(event,
1560 		           inheritor,
1561 		           interruptible,
1562 		           deadline,
1563 		           ^{;},
1564 		           ^{lck_mtx_unlock(lock);});
1565 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1566 		return sleep_with_inheritor_and_turnstile(event,
1567 		           inheritor,
1568 		           interruptible,
1569 		           deadline,
1570 		           ^{lck_mtx_lock_spin(lock);},
1571 		           ^{lck_mtx_unlock(lock);});
1572 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1573 		return sleep_with_inheritor_and_turnstile(event,
1574 		           inheritor,
1575 		           interruptible,
1576 		           deadline,
1577 		           ^{lck_mtx_lock_spin_always(lock);},
1578 		           ^{lck_mtx_unlock(lock);});
1579 	} else {
1580 		return sleep_with_inheritor_and_turnstile(event,
1581 		           inheritor,
1582 		           interruptible,
1583 		           deadline,
1584 		           ^{lck_mtx_lock(lock);},
1585 		           ^{lck_mtx_unlock(lock);});
1586 	}
1587 }
1588 
1589 /*
1590  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1591  */
1592 
1593 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1594 lck_rw_sleep_with_inheritor(
1595 	lck_rw_t               *lock,
1596 	lck_sleep_action_t      lck_sleep_action,
1597 	event_t                 event,
1598 	thread_t                inheritor,
1599 	wait_interrupt_t        interruptible,
1600 	uint64_t                deadline)
1601 {
1602 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1603 
1604 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1605 
1606 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1607 		return sleep_with_inheritor_and_turnstile(event,
1608 		           inheritor,
1609 		           interruptible,
1610 		           deadline,
1611 		           ^{;},
1612 		           ^{lck_rw_type = lck_rw_done(lock);});
1613 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1614 		return sleep_with_inheritor_and_turnstile(event,
1615 		           inheritor,
1616 		           interruptible,
1617 		           deadline,
1618 		           ^{lck_rw_lock(lock, lck_rw_type);},
1619 		           ^{lck_rw_type = lck_rw_done(lock);});
1620 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1621 		return sleep_with_inheritor_and_turnstile(event,
1622 		           inheritor,
1623 		           interruptible,
1624 		           deadline,
1625 		           ^{lck_rw_lock_exclusive(lock);},
1626 		           ^{lck_rw_type = lck_rw_done(lock);});
1627 	} else {
1628 		return sleep_with_inheritor_and_turnstile(event,
1629 		           inheritor,
1630 		           interruptible,
1631 		           deadline,
1632 		           ^{lck_rw_lock_shared(lock);},
1633 		           ^{lck_rw_type = lck_rw_done(lock);});
1634 	}
1635 }
1636 
1637 /*
1638  * wakeup_with_inheritor functions are independent from the locking primitive.
1639  */
1640 
1641 kern_return_t
wakeup_thread_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t thread_towake)1642 wakeup_thread_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t thread_towake)
1643 {
1644 	return wakeup_with_inheritor_and_turnstile(event,
1645 	           result,
1646 	           LCK_WAKEUP_THREAD,
1647 	           action,
1648 	           &thread_towake);
1649 }
1650 
1651 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1652 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1653 {
1654 	return wakeup_with_inheritor_and_turnstile(event,
1655 	           result,
1656 	           LCK_WAKEUP_ONE,
1657 	           action,
1658 	           thread_wokenup);
1659 }
1660 
1661 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1662 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1663 {
1664 	return wakeup_with_inheritor_and_turnstile(event,
1665 	           result,
1666 	           LCK_WAKEUP_ALL,
1667 	           0,
1668 	           NULL);
1669 }
1670 
1671 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1672 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1673 {
1674 	assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1675 	assert(waitq_type(waitq) == WQT_TURNSTILE);
1676 	waitinfo->owner = 0;
1677 	waitinfo->context = 0;
1678 
1679 	if (waitq_held(waitq)) {
1680 		return;
1681 	}
1682 
1683 	struct turnstile *turnstile = waitq_to_turnstile(waitq);
1684 	assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1685 	waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1686 }
1687 
1688 static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1689 static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1690 static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1691 
1692 static wait_result_t
1693 cond_sleep_with_inheritor_and_turnstile_type(
1694 	cond_swi_var_t cond,
1695 	bool (^cond_sleep_check)(ctid_t*),
1696 	wait_interrupt_t interruptible,
1697 	uint64_t deadline,
1698 	turnstile_type_t type)
1699 {
1700 	wait_result_t ret;
1701 	uint32_t index;
1702 	struct turnstile *ts = NULL;
1703 	ctid_t ctid = 0;
1704 	thread_t inheritor;
1705 
1706 	/*
1707 	 * the hash bucket spinlock is used as turnstile interlock,
1708 	 * lock it before checking the sleep condition
1709 	 */
1710 	turnstile_hash_bucket_lock((uintptr_t)cond, &index, type);
1711 
1712 	/*
1713 	 * In case the sleep check succeeds, the block will
1714 	 * provide us the ctid observed on the variable.
1715 	 */
1716 	if (!cond_sleep_check(&ctid)) {
1717 		turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1718 		return THREAD_NOT_WAITING;
1719 	}
1720 
1721 	/*
1722 	 * We can translate the ctid to a thread_t only
1723 	 * if cond_sleep_check succeded.
1724 	 */
1725 	inheritor = ctid_get_thread(ctid);
1726 	assert(inheritor != NULL);
1727 
1728 	ts = turnstile_prepare_hash((uintptr_t)cond, type);
1729 
1730 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1731 	/*
1732 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1733 	 * waitq_assert_wait64 after.
1734 	 */
1735 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1736 
1737 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1738 
1739 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1740 
1741 	/*
1742 	 * Update new and old inheritor chains outside the interlock;
1743 	 */
1744 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1745 	if (ret == THREAD_WAITING) {
1746 		ret = thread_block(THREAD_CONTINUE_NULL);
1747 	}
1748 
1749 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1750 
1751 	turnstile_complete_hash((uintptr_t)cond, type);
1752 
1753 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1754 
1755 	turnstile_cleanup();
1756 	return ret;
1757 }
1758 
1759 /*
1760  * Name: cond_sleep_with_inheritor32_mask
1761  *
1762  * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1763  *              Allows a thread to conditionally sleep while indicating which thread should
1764  *              inherit the priority push associated with the condition.
1765  *              The condition should be expressed through a cond_swi_var32_s pointer.
1766  *              The condition needs to be populated by the caller with the ctid of the
1767  *              thread that should inherit the push. The remaining bits of the condition
1768  *              can be used by the caller to implement its own synchronization logic.
1769  *              A copy of the condition value observed by the caller when it decided to call
1770  *              this function should be provided to prevent races with matching wakeups.
1771  *              This function will atomically check the value stored in the condition against
1772  *              the expected/observed one provided only for the bits that are set in the mask.
1773  *              If the check doesn't pass the thread will not sleep and the function will return.
1774  *              The ctid provided in the condition will be used only after a successful
1775  *              check.
1776  *
1777  * Args:
1778  *   Arg1: cond_swi_var32_s pointer that stores the condition to check.
1779  *   Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1780  *   Arg3: mask to apply to the condition to check.
1781  *   Arg4: interruptible flag for wait.
1782  *   Arg5: deadline for wait.
1783  *
1784  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1785  *             wakeup for the cond is called.
1786  *
1787  * Returns: result of the wait.
1788  */
1789 static wait_result_t
cond_sleep_with_inheritor32_mask(cond_swi_var_t cond,cond_swi_var32_s expected_cond,uint32_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1790 cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1791 {
1792 	bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1793 		cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1794 		bool ret;
1795 		if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1796 			ret = true;
1797 			*ctid = cond_val.cond32_owner;
1798 		} else {
1799 			ret = false;
1800 		}
1801 		return ret;
1802 	};
1803 
1804 	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1805 }
1806 
1807 /*
1808  * Name: cond_sleep_with_inheritor64_mask
1809  *
1810  * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1811  *              Allows a thread to conditionally sleep while indicating which thread should
1812  *              inherit the priority push associated with the condition.
1813  *              The condition should be expressed through a cond_swi_var64_s pointer.
1814  *              The condition needs to be populated by the caller with the ctid of the
1815  *              thread that should inherit the push. The remaining bits of the condition
1816  *              can be used by the caller to implement its own synchronization logic.
1817  *              A copy of the condition value observed by the caller when it decided to call
1818  *              this function should be provided to prevent races with matching wakeups.
1819  *              This function will atomically check the value stored in the condition against
1820  *              the expected/observed one provided only for the bits that are set in the mask.
1821  *              If the check doesn't pass the thread will not sleep and the function will return.
1822  *              The ctid provided in the condition will be used only after a successful
1823  *              check.
1824  *
1825  * Args:
1826  *   Arg1: cond_swi_var64_s pointer that stores the condition to check.
1827  *   Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1828  *   Arg3: mask to apply to the condition to check.
1829  *   Arg4: interruptible flag for wait.
1830  *   Arg5: deadline for wait.
1831  *
1832  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1833  *             wakeup for the cond is called.
1834  *
1835  * Returns: result of the wait.
1836  */
1837 wait_result_t
cond_sleep_with_inheritor64_mask(cond_swi_var_t cond,cond_swi_var64_s expected_cond,uint64_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1838 cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1839 {
1840 	bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1841 		cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1842 		bool ret;
1843 		if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1844 			ret = true;
1845 			*ctid = cond_val.cond64_owner;
1846 		} else {
1847 			ret = false;
1848 		}
1849 		return ret;
1850 	};
1851 
1852 	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1853 }
1854 
1855 /*
1856  * Name: cond_sleep_with_inheritor32
1857  *
1858  * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1859  *              Allows a thread to conditionally sleep while indicating which thread should
1860  *              inherit the priority push associated with the condition.
1861  *              The condition should be expressed through a cond_swi_var32_s pointer.
1862  *              The condition needs to be populated by the caller with the ctid of the
1863  *              thread that should inherit the push. The remaining bits of the condition
1864  *              can be used by the caller to implement its own synchronization logic.
1865  *              A copy of the condition value observed by the caller when it decided to call
1866  *              this function should be provided to prevent races with matching wakeups.
1867  *              This function will atomically check the value stored in the condition against
1868  *              the expected/observed one provided. If the check doesn't pass the thread will not
1869  *              sleep and the function will return.
1870  *              The ctid provided in the condition will be used only after a successful
1871  *              check.
1872  *
1873  * Args:
1874  *   Arg1: cond_swi_var32_s pointer that stores the condition to check.
1875  *   Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1876  *   Arg3: interruptible flag for wait.
1877  *   Arg4: deadline for wait.
1878  *
1879  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1880  *             wakeup for the cond is called.
1881  *
1882  * Returns: result of the wait.
1883  */
1884 wait_result_t
cond_sleep_with_inheritor32(cond_swi_var_t cond,cond_swi_var32_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1885 cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1886 {
1887 	return cond_sleep_with_inheritor32_mask(cond, expected_cond, ~0u, interruptible, deadline);
1888 }
1889 
1890 /*
1891  * Name: cond_sleep_with_inheritor64
1892  *
1893  * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1894  *              Allows a thread to conditionally sleep while indicating which thread should
1895  *              inherit the priority push associated with the condition.
1896  *              The condition should be expressed through a cond_swi_var64_s pointer.
1897  *              The condition needs to be populated by the caller with the ctid of the
1898  *              thread that should inherit the push. The remaining bits of the condition
1899  *              can be used by the caller to implement its own synchronization logic.
1900  *              A copy of the condition value observed by the caller when it decided to call
1901  *              this function should be provided to prevent races with matching wakeups.
1902  *              This function will atomically check the value stored in the condition against
1903  *              the expected/observed one provided. If the check doesn't pass the thread will not
1904  *              sleep and the function will return.
1905  *              The ctid provided in the condition will be used only after a successful
1906  *              check.
1907  *
1908  * Args:
1909  *   Arg1: cond_swi_var64_s pointer that stores the condition to check.
1910  *   Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1911  *   Arg3: interruptible flag for wait.
1912  *   Arg4: deadline for wait.
1913  *
1914  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1915  *             wakeup for the cond is called.
1916  *
1917  * Returns: result of the wait.
1918  */
1919 wait_result_t
cond_sleep_with_inheritor64(cond_swi_var_t cond,cond_swi_var64_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1920 cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1921 {
1922 	return cond_sleep_with_inheritor64_mask(cond, expected_cond, ~0ull, interruptible, deadline);
1923 }
1924 
1925 /*
1926  * Name: cond_wakeup_one_with_inheritor
1927  *
1928  * Description: Wake up one waiter waiting on the condition (if any).
1929  *              The thread woken up will be the one with the higher sched priority waiting on the condition.
1930  *              The push for the condition will be transferred from the last inheritor to the woken up thread.
1931  *
1932  * Args:
1933  *   Arg1: condition to wake from.
1934  *   Arg2: wait result to pass to the woken up thread.
1935  *   Arg3: pointer for storing the thread wokenup.
1936  *
1937  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1938  *
1939  * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1940  *             condition or a wakeup for the event is called.
1941  *             A reference for the wokenup thread is acquired.
1942  *             NOTE: this cannot be called from interrupt context.
1943  */
1944 kern_return_t
cond_wakeup_one_with_inheritor(cond_swi_var_t cond,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1945 cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1946 {
1947 	return wakeup_with_inheritor_and_turnstile((event_t)cond,
1948 	           result,
1949 	           LCK_WAKEUP_ONE,
1950 	           action,
1951 	           thread_wokenup);
1952 }
1953 
1954 /*
1955  * Name: cond_wakeup_all_with_inheritor
1956  *
1957  * Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1958  *
1959  * Args:
1960  *   Arg1: condition to wake from.
1961  *   Arg2: wait result to pass to the woken up threads.
1962  *
1963  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1964  *
1965  * Conditions: NOTE: this cannot be called from interrupt context.
1966  */
1967 kern_return_t
cond_wakeup_all_with_inheritor(cond_swi_var_t cond,wait_result_t result)1968 cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1969 {
1970 	return wakeup_with_inheritor_and_turnstile((event_t)cond,
1971 	           result,
1972 	           LCK_WAKEUP_ALL,
1973 	           0,
1974 	           NULL);
1975 }
1976 
1977 
1978 #pragma mark - gates
1979 
1980 #define GATE_TYPE        3
1981 #define GATE_ILOCK_BIT   0
1982 #define GATE_WAITERS_BIT 1
1983 
1984 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1985 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1986 
1987 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1988 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1989 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1990 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1991 #define ordered_store_gate(gate, value)  os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1992 
1993 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1994 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1995 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1996 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1997 
1998 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1999 
2000 #define GATE_EVENT(gate)     ((event_t) gate)
2001 #define EVENT_TO_GATE(event) ((gate_t *) event)
2002 
2003 typedef void (*void_func_void)(void);
2004 
2005 __abortlike
2006 static void
gate_verify_tag_panic(gate_t * gate)2007 gate_verify_tag_panic(gate_t *gate)
2008 {
2009 	panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2010 }
2011 
2012 __abortlike
2013 static void
gate_verify_destroy_panic(gate_t * gate)2014 gate_verify_destroy_panic(gate_t *gate)
2015 {
2016 	panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2017 }
2018 
2019 static void
gate_verify(gate_t * gate)2020 gate_verify(gate_t *gate)
2021 {
2022 	if (gate->gt_type != GATE_TYPE) {
2023 		gate_verify_tag_panic(gate);
2024 	}
2025 	if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
2026 		gate_verify_destroy_panic(gate);
2027 	}
2028 
2029 	assert(gate->gt_refs > 0);
2030 }
2031 
2032 __abortlike
2033 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)2034 gate_already_owned_panic(gate_t *gate, thread_t holder)
2035 {
2036 	panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2037 }
2038 
2039 static kern_return_t
gate_try_close(gate_t * gate)2040 gate_try_close(gate_t *gate)
2041 {
2042 	uintptr_t state;
2043 	thread_t holder;
2044 	kern_return_t ret;
2045 	thread_t thread = current_thread();
2046 
2047 	gate_verify(gate);
2048 
2049 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2050 		return KERN_SUCCESS;
2051 	}
2052 
2053 	gate_ilock(gate);
2054 	state = ordered_load_gate(gate);
2055 	holder = GATE_STATE_TO_THREAD(state);
2056 
2057 	if (holder == NULL) {
2058 		assert(gate_has_waiter_bit(state) == FALSE);
2059 
2060 		state = GATE_THREAD_TO_STATE(current_thread());
2061 		state |= GATE_ILOCK;
2062 		ordered_store_gate(gate, state);
2063 		ret = KERN_SUCCESS;
2064 	} else {
2065 		if (holder == current_thread()) {
2066 			gate_already_owned_panic(gate, holder);
2067 		}
2068 		ret = KERN_FAILURE;
2069 	}
2070 
2071 	gate_iunlock(gate);
2072 	return ret;
2073 }
2074 
2075 static void
gate_close(gate_t * gate)2076 gate_close(gate_t* gate)
2077 {
2078 	uintptr_t state;
2079 	thread_t holder;
2080 	thread_t thread = current_thread();
2081 
2082 	gate_verify(gate);
2083 
2084 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2085 		return;
2086 	}
2087 
2088 	gate_ilock(gate);
2089 	state = ordered_load_gate(gate);
2090 	holder = GATE_STATE_TO_THREAD(state);
2091 
2092 	if (holder != NULL) {
2093 		gate_already_owned_panic(gate, holder);
2094 	}
2095 
2096 	assert(gate_has_waiter_bit(state) == FALSE);
2097 
2098 	state = GATE_THREAD_TO_STATE(thread);
2099 	state |= GATE_ILOCK;
2100 	ordered_store_gate(gate, state);
2101 
2102 	gate_iunlock(gate);
2103 }
2104 
2105 static void
gate_open_turnstile(gate_t * gate)2106 gate_open_turnstile(gate_t *gate)
2107 {
2108 	struct turnstile *ts = NULL;
2109 
2110 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile,
2111 	    TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2112 	waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2113 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2114 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2115 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2116 	/*
2117 	 * We can do the cleanup while holding the interlock.
2118 	 * It is ok because:
2119 	 * 1. current_thread is the previous inheritor and it is running
2120 	 * 2. new inheritor is NULL.
2121 	 * => No chain of turnstiles needs to be updated.
2122 	 */
2123 	turnstile_cleanup();
2124 }
2125 
2126 __abortlike
2127 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2128 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2129 {
2130 	if (open) {
2131 		panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2132 	} else {
2133 		panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2134 	}
2135 }
2136 
2137 static void
gate_open(gate_t * gate)2138 gate_open(gate_t *gate)
2139 {
2140 	uintptr_t state;
2141 	thread_t holder;
2142 	bool waiters;
2143 	thread_t thread = current_thread();
2144 
2145 	gate_verify(gate);
2146 	if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2147 		return;
2148 	}
2149 
2150 	gate_ilock(gate);
2151 	state = ordered_load_gate(gate);
2152 	holder = GATE_STATE_TO_THREAD(state);
2153 	waiters = gate_has_waiter_bit(state);
2154 
2155 	if (holder != thread) {
2156 		gate_not_owned_panic(gate, holder, true);
2157 	}
2158 
2159 	if (waiters) {
2160 		gate_open_turnstile(gate);
2161 	}
2162 
2163 	state = GATE_ILOCK;
2164 	ordered_store_gate(gate, state);
2165 
2166 	gate_iunlock(gate);
2167 }
2168 
2169 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2170 gate_handoff_turnstile(gate_t *gate,
2171     int flags,
2172     thread_t *thread_woken_up,
2173     bool *waiters)
2174 {
2175 	struct turnstile *ts = NULL;
2176 	kern_return_t ret = KERN_FAILURE;
2177 	thread_t hp_thread;
2178 
2179 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2180 	/*
2181 	 * Wake up the higest priority thread waiting on the gate
2182 	 */
2183 	hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2184 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2185 
2186 	if (hp_thread != NULL) {
2187 		/*
2188 		 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2189 		 */
2190 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2191 		*thread_woken_up = hp_thread;
2192 		*waiters = turnstile_has_waiters(ts);
2193 		/*
2194 		 * Note: hp_thread is the new holder and the new inheritor.
2195 		 * In case there are no more waiters, it doesn't need to be the inheritor
2196 		 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2197 		 * handoff can go through the fast path.
2198 		 * We could set the inheritor to NULL here, or the new holder itself can set it
2199 		 * on its way back from the sleep. In the latter case there are more chanses that
2200 		 * new waiters will come by, avoiding to do the opearation at all.
2201 		 */
2202 		ret = KERN_SUCCESS;
2203 	} else {
2204 		/*
2205 		 * waiters can have been woken up by an interrupt and still not
2206 		 * have updated gate->waiters, so we couldn't find them on the waitq.
2207 		 * Update the inheritor to NULL here, so that the current thread can return to userspace
2208 		 * indipendently from when the interrupted waiters will finish the wait.
2209 		 */
2210 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2211 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2212 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2213 		}
2214 		// there are no waiters.
2215 		ret = KERN_NOT_WAITING;
2216 	}
2217 
2218 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2219 
2220 	/*
2221 	 * We can do the cleanup while holding the interlock.
2222 	 * It is ok because:
2223 	 * 1. current_thread is the previous inheritor and it is running
2224 	 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2225 	 *    of the gate before trying to sleep.
2226 	 * => No chain of turnstiles needs to be updated.
2227 	 */
2228 	turnstile_cleanup();
2229 
2230 	return ret;
2231 }
2232 
2233 static kern_return_t
gate_handoff(gate_t * gate,int flags)2234 gate_handoff(gate_t *gate,
2235     int flags)
2236 {
2237 	kern_return_t ret;
2238 	thread_t new_holder = NULL;
2239 	uintptr_t state;
2240 	thread_t holder;
2241 	bool waiters;
2242 	thread_t thread = current_thread();
2243 
2244 	assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2245 	gate_verify(gate);
2246 
2247 	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2248 		if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2249 			//gate opened but there were no waiters, so return KERN_NOT_WAITING.
2250 			return KERN_NOT_WAITING;
2251 		}
2252 	}
2253 
2254 	gate_ilock(gate);
2255 	state = ordered_load_gate(gate);
2256 	holder = GATE_STATE_TO_THREAD(state);
2257 	waiters = gate_has_waiter_bit(state);
2258 
2259 	if (holder != current_thread()) {
2260 		gate_not_owned_panic(gate, holder, false);
2261 	}
2262 
2263 	if (waiters) {
2264 		ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2265 		if (ret == KERN_SUCCESS) {
2266 			state = GATE_THREAD_TO_STATE(new_holder);
2267 			if (waiters) {
2268 				state |= GATE_WAITERS;
2269 			}
2270 		} else {
2271 			if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2272 				state = 0;
2273 			}
2274 		}
2275 	} else {
2276 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2277 			state = 0;
2278 		}
2279 		ret = KERN_NOT_WAITING;
2280 	}
2281 	state |= GATE_ILOCK;
2282 	ordered_store_gate(gate, state);
2283 
2284 	gate_iunlock(gate);
2285 
2286 	if (new_holder) {
2287 		thread_deallocate(new_holder);
2288 	}
2289 	return ret;
2290 }
2291 
2292 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2293 gate_steal_turnstile(gate_t *gate,
2294     thread_t new_inheritor)
2295 {
2296 	struct turnstile *ts = NULL;
2297 
2298 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2299 
2300 	turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2301 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2302 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2303 
2304 	/*
2305 	 * turnstile_cleanup might need to update the chain of the old holder.
2306 	 * This operation should happen without the turnstile interlock held.
2307 	 */
2308 	return turnstile_cleanup;
2309 }
2310 
2311 __abortlike
2312 static void
gate_not_closed_panic(gate_t * gate,bool wait)2313 gate_not_closed_panic(gate_t *gate, bool wait)
2314 {
2315 	if (wait) {
2316 		panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2317 	} else {
2318 		panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2319 	}
2320 }
2321 
2322 static void
gate_steal(gate_t * gate)2323 gate_steal(gate_t *gate)
2324 {
2325 	uintptr_t state;
2326 	thread_t holder;
2327 	thread_t thread = current_thread();
2328 	bool waiters;
2329 
2330 	void_func_void func_after_interlock_unlock;
2331 
2332 	gate_verify(gate);
2333 
2334 	gate_ilock(gate);
2335 	state = ordered_load_gate(gate);
2336 	holder = GATE_STATE_TO_THREAD(state);
2337 	waiters = gate_has_waiter_bit(state);
2338 
2339 	if (holder == NULL) {
2340 		gate_not_closed_panic(gate, false);
2341 	}
2342 
2343 	state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2344 	if (waiters) {
2345 		state |= GATE_WAITERS;
2346 		ordered_store_gate(gate, state);
2347 		func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2348 		gate_iunlock(gate);
2349 
2350 		func_after_interlock_unlock();
2351 	} else {
2352 		ordered_store_gate(gate, state);
2353 		gate_iunlock(gate);
2354 	}
2355 }
2356 
2357 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2358 gate_wait_turnstile(gate_t *gate,
2359     wait_interrupt_t interruptible,
2360     uint64_t deadline,
2361     thread_t holder,
2362     wait_result_t* wait,
2363     bool* waiters)
2364 {
2365 	struct turnstile *ts;
2366 	uintptr_t state;
2367 
2368 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2369 
2370 	turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2371 	waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2372 
2373 	gate_iunlock(gate);
2374 
2375 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2376 
2377 	*wait = thread_block(THREAD_CONTINUE_NULL);
2378 
2379 	gate_ilock(gate);
2380 
2381 	*waiters = turnstile_has_waiters(ts);
2382 
2383 	if (!*waiters) {
2384 		/*
2385 		 * We want to enable the fast path as soon as we see that there are no more waiters.
2386 		 * On the fast path the holder will not do any turnstile operations.
2387 		 * Set the inheritor as NULL here.
2388 		 *
2389 		 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2390 		 * already been set to NULL.
2391 		 */
2392 		state = ordered_load_gate(gate);
2393 		holder = GATE_STATE_TO_THREAD(state);
2394 		if (holder &&
2395 		    ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2396 		    holder == current_thread())) {     // thread was woken up and it is the new holder
2397 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2398 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2399 		}
2400 	}
2401 
2402 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2403 
2404 	/*
2405 	 * turnstile_cleanup might need to update the chain of the old holder.
2406 	 * This operation should happen without the turnstile primitive interlock held.
2407 	 */
2408 	return turnstile_cleanup;
2409 }
2410 
2411 static void
gate_free_internal(gate_t * gate)2412 gate_free_internal(gate_t *gate)
2413 {
2414 	zfree(KT_GATE, gate);
2415 }
2416 
2417 __abortlike
2418 static void
gate_too_many_refs_panic(gate_t * gate)2419 gate_too_many_refs_panic(gate_t *gate)
2420 {
2421 	panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2422 }
2423 
2424 static gate_wait_result_t
2425 gate_wait(gate_t* gate,
2426     wait_interrupt_t interruptible,
2427     uint64_t deadline,
2428     void (^primitive_unlock)(void),
2429     void (^primitive_lock)(void))
2430 {
2431 	gate_wait_result_t ret;
2432 	void_func_void func_after_interlock_unlock;
2433 	wait_result_t wait_result;
2434 	uintptr_t state;
2435 	thread_t holder;
2436 	bool waiters;
2437 
2438 	gate_verify(gate);
2439 
2440 	gate_ilock(gate);
2441 	state = ordered_load_gate(gate);
2442 	holder = GATE_STATE_TO_THREAD(state);
2443 
2444 	if (holder == NULL) {
2445 		gate_not_closed_panic(gate, true);
2446 	}
2447 
2448 	/*
2449 	 * Get a ref on the gate so it will not
2450 	 * be freed while we are coming back from the sleep.
2451 	 */
2452 	if (gate->gt_refs == UINT16_MAX) {
2453 		gate_too_many_refs_panic(gate);
2454 	}
2455 	gate->gt_refs++;
2456 	state |= GATE_WAITERS;
2457 	ordered_store_gate(gate, state);
2458 
2459 	/*
2460 	 * Release the primitive lock before any
2461 	 * turnstile operation. Turnstile
2462 	 * does not support a blocking primitive as
2463 	 * interlock.
2464 	 *
2465 	 * In this way, concurrent threads will be
2466 	 * able to acquire the primitive lock
2467 	 * but still will wait for me through the
2468 	 * gate interlock.
2469 	 */
2470 	primitive_unlock();
2471 
2472 	func_after_interlock_unlock = gate_wait_turnstile(    gate,
2473 	    interruptible,
2474 	    deadline,
2475 	    holder,
2476 	    &wait_result,
2477 	    &waiters);
2478 
2479 	state = ordered_load_gate(gate);
2480 	holder = GATE_STATE_TO_THREAD(state);
2481 
2482 	switch (wait_result) {
2483 	case THREAD_INTERRUPTED:
2484 	case THREAD_TIMED_OUT:
2485 		assert(holder != current_thread());
2486 
2487 		if (waiters) {
2488 			state |= GATE_WAITERS;
2489 		} else {
2490 			state &= ~GATE_WAITERS;
2491 		}
2492 		ordered_store_gate(gate, state);
2493 
2494 		if (wait_result == THREAD_INTERRUPTED) {
2495 			ret = GATE_INTERRUPTED;
2496 		} else {
2497 			ret = GATE_TIMED_OUT;
2498 		}
2499 		break;
2500 	default:
2501 		/*
2502 		 * Note it is possible that even if the gate was handed off to
2503 		 * me, someone called gate_steal() before I woke up.
2504 		 *
2505 		 * As well as it is possible that the gate was opened, but someone
2506 		 * closed it while I was waking up.
2507 		 *
2508 		 * In both cases we return GATE_OPENED, as the gate was opened to me
2509 		 * at one point, it is the caller responsibility to check again if
2510 		 * the gate is open.
2511 		 */
2512 		if (holder == current_thread()) {
2513 			ret = GATE_HANDOFF;
2514 		} else {
2515 			ret = GATE_OPENED;
2516 		}
2517 		break;
2518 	}
2519 
2520 	assert(gate->gt_refs > 0);
2521 	uint32_t ref = --gate->gt_refs;
2522 	bool to_free = gate->gt_alloc;
2523 	gate_iunlock(gate);
2524 
2525 	if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2526 		if (to_free == true) {
2527 			assert(!waiters);
2528 			if (ref == 0) {
2529 				gate_free_internal(gate);
2530 			}
2531 			ret = GATE_OPENED;
2532 		} else {
2533 			gate_verify_destroy_panic(gate);
2534 		}
2535 	}
2536 
2537 	/*
2538 	 * turnstile func that needs to be executed without
2539 	 * holding the primitive interlock
2540 	 */
2541 	func_after_interlock_unlock();
2542 
2543 	primitive_lock();
2544 
2545 	return ret;
2546 }
2547 
2548 static void
gate_assert(gate_t * gate,int flags)2549 gate_assert(gate_t *gate, int flags)
2550 {
2551 	uintptr_t state;
2552 	thread_t holder;
2553 
2554 	gate_verify(gate);
2555 
2556 	gate_ilock(gate);
2557 	state = ordered_load_gate(gate);
2558 	holder = GATE_STATE_TO_THREAD(state);
2559 
2560 	switch (flags) {
2561 	case GATE_ASSERT_CLOSED:
2562 		assert(holder != NULL);
2563 		break;
2564 	case GATE_ASSERT_OPEN:
2565 		assert(holder == NULL);
2566 		break;
2567 	case GATE_ASSERT_HELD:
2568 		assert(holder == current_thread());
2569 		break;
2570 	default:
2571 		panic("invalid %s flag %d", __func__, flags);
2572 	}
2573 
2574 	gate_iunlock(gate);
2575 }
2576 
2577 enum {
2578 	GT_INIT_DEFAULT = 0,
2579 	GT_INIT_ALLOC
2580 };
2581 
2582 static void
gate_init(gate_t * gate,uint type)2583 gate_init(gate_t *gate, uint type)
2584 {
2585 	bzero(gate, sizeof(gate_t));
2586 
2587 	gate->gt_data = 0;
2588 	gate->gt_turnstile = NULL;
2589 	gate->gt_refs = 1;
2590 	switch (type) {
2591 	case GT_INIT_ALLOC:
2592 		gate->gt_alloc = 1;
2593 		break;
2594 	default:
2595 		gate->gt_alloc = 0;
2596 		break;
2597 	}
2598 	gate->gt_type = GATE_TYPE;
2599 	gate->gt_flags_pad = 0;
2600 }
2601 
2602 static gate_t*
gate_alloc_init(void)2603 gate_alloc_init(void)
2604 {
2605 	gate_t *gate;
2606 	gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2607 	gate_init(gate, GT_INIT_ALLOC);
2608 	return gate;
2609 }
2610 
2611 __abortlike
2612 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2613 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2614 {
2615 	panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2616 }
2617 
2618 __abortlike
2619 static void
gate_destroy_waiter_panic(gate_t * gate)2620 gate_destroy_waiter_panic(gate_t *gate)
2621 {
2622 	panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2623 }
2624 
2625 static uint16_t
gate_destroy_internal(gate_t * gate)2626 gate_destroy_internal(gate_t *gate)
2627 {
2628 	uintptr_t state;
2629 	thread_t holder;
2630 	uint16_t ref;
2631 
2632 	gate_ilock(gate);
2633 	state = ordered_load_gate(gate);
2634 	holder = GATE_STATE_TO_THREAD(state);
2635 
2636 	/*
2637 	 * The gate must be open
2638 	 * and all the threads must
2639 	 * have been woken up by this time
2640 	 */
2641 	if (holder != NULL) {
2642 		gate_destroy_owned_panic(gate, holder);
2643 	}
2644 	if (gate_has_waiter_bit(state)) {
2645 		gate_destroy_waiter_panic(gate);
2646 	}
2647 
2648 	assert(gate->gt_refs > 0);
2649 
2650 	ref = --gate->gt_refs;
2651 
2652 	/*
2653 	 * Mark the gate as destroyed.
2654 	 * The interlock bit still need
2655 	 * to be available to let the
2656 	 * last wokenup threads to clear
2657 	 * the wait.
2658 	 */
2659 	state = GATE_DESTROYED;
2660 	state |= GATE_ILOCK;
2661 	ordered_store_gate(gate, state);
2662 	gate_iunlock(gate);
2663 	return ref;
2664 }
2665 
2666 __abortlike
2667 static void
gate_destroy_panic(gate_t * gate)2668 gate_destroy_panic(gate_t *gate)
2669 {
2670 	panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2671 }
2672 
2673 static void
gate_destroy(gate_t * gate)2674 gate_destroy(gate_t *gate)
2675 {
2676 	gate_verify(gate);
2677 	if (gate->gt_alloc == 1) {
2678 		gate_destroy_panic(gate);
2679 	}
2680 	gate_destroy_internal(gate);
2681 }
2682 
2683 __abortlike
2684 static void
gate_free_panic(gate_t * gate)2685 gate_free_panic(gate_t *gate)
2686 {
2687 	panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2688 }
2689 
2690 static void
gate_free(gate_t * gate)2691 gate_free(gate_t *gate)
2692 {
2693 	uint16_t ref;
2694 
2695 	gate_verify(gate);
2696 
2697 	if (gate->gt_alloc == 0) {
2698 		gate_free_panic(gate);
2699 	}
2700 
2701 	ref = gate_destroy_internal(gate);
2702 	/*
2703 	 * Some of the threads waiting on the gate
2704 	 * might still need to run after being woken up.
2705 	 * They will access the gate to cleanup the
2706 	 * state, so we cannot free it.
2707 	 * The last waiter will free the gate in this case.
2708 	 */
2709 	if (ref == 0) {
2710 		gate_free_internal(gate);
2711 	}
2712 }
2713 
2714 /*
2715  * Name: lck_rw_gate_init
2716  *
2717  * Description: initializes a variable declared with decl_lck_rw_gate_data.
2718  *
2719  * Args:
2720  *   Arg1: lck_rw_t lock used to protect the gate.
2721  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2722  */
2723 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2724 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2725 {
2726 	(void) lock;
2727 	gate_init(gate, GT_INIT_DEFAULT);
2728 }
2729 
2730 /*
2731  * Name: lck_rw_gate_alloc_init
2732  *
2733  * Description: allocates and initializes a gate_t.
2734  *
2735  * Args:
2736  *   Arg1: lck_rw_t lock used to protect the gate.
2737  *
2738  * Returns:
2739  *         gate_t allocated.
2740  */
2741 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2742 lck_rw_gate_alloc_init(lck_rw_t *lock)
2743 {
2744 	(void) lock;
2745 	return gate_alloc_init();
2746 }
2747 
2748 /*
2749  * Name: lck_rw_gate_destroy
2750  *
2751  * Description: destroys a variable previously initialized
2752  *              with lck_rw_gate_init().
2753  *
2754  * Args:
2755  *   Arg1: lck_rw_t lock used to protect the gate.
2756  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2757  */
2758 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2759 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2760 {
2761 	(void) lock;
2762 	gate_destroy(gate);
2763 }
2764 
2765 /*
2766  * Name: lck_rw_gate_free
2767  *
2768  * Description: destroys and tries to free a gate previously allocated
2769  *              with lck_rw_gate_alloc_init().
2770  *              The gate free might be delegated to the last thread returning
2771  *              from the gate_wait().
2772  *
2773  * Args:
2774  *   Arg1: lck_rw_t lock used to protect the gate.
2775  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2776  */
2777 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2778 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2779 {
2780 	(void) lock;
2781 	gate_free(gate);
2782 }
2783 
2784 /*
2785  * Name: lck_rw_gate_try_close
2786  *
2787  * Description: Tries to close the gate.
2788  *              In case of success the current thread will be set as
2789  *              the holder of the gate.
2790  *
2791  * Args:
2792  *   Arg1: lck_rw_t lock used to protect the gate.
2793  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2794  *
2795  * Conditions: Lock must be held. Returns with the lock held.
2796  *
2797  * Returns:
2798  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2799  *          of the gate.
2800  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2801  *          to wake up possible waiters on the gate before returning to userspace.
2802  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2803  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2804  *
2805  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2806  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2807  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2808  *          be done without dropping the lock that is protecting the gate in between.
2809  */
2810 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2811 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2812 {
2813 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2814 
2815 	return gate_try_close(gate);
2816 }
2817 
2818 /*
2819  * Name: lck_rw_gate_close
2820  *
2821  * Description: Closes the gate. The current thread will be set as
2822  *              the holder of the gate. Will panic if the gate is already closed.
2823  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2824  *              to wake up possible waiters on the gate before returning to userspace.
2825  *
2826  * Args:
2827  *   Arg1: lck_rw_t lock used to protect the gate.
2828  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2829  *
2830  * Conditions: Lock must be held. Returns with the lock held.
2831  *             The gate must be open.
2832  *
2833  */
2834 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2835 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2836 {
2837 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2838 
2839 	return gate_close(gate);
2840 }
2841 
2842 /*
2843  * Name: lck_rw_gate_open
2844  *
2845  * Description: Opens the gate and wakes up possible waiters.
2846  *
2847  * Args:
2848  *   Arg1: lck_rw_t lock used to protect the gate.
2849  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2850  *
2851  * Conditions: Lock must be held. Returns with the lock held.
2852  *             The current thread must be the holder of the gate.
2853  *
2854  */
2855 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2856 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2857 {
2858 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2859 
2860 	gate_open(gate);
2861 }
2862 
2863 /*
2864  * Name: lck_rw_gate_handoff
2865  *
2866  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2867  *              priority will be selected as the new holder of the gate, and woken up,
2868  *              with the gate remaining in the closed state throughout.
2869  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2870  *              will be returned.
2871  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2872  *              case no waiters were found.
2873  *
2874  *
2875  * Args:
2876  *   Arg1: lck_rw_t lock used to protect the gate.
2877  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2878  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2879  *
2880  * Conditions: Lock must be held. Returns with the lock held.
2881  *             The current thread must be the holder of the gate.
2882  *
2883  * Returns:
2884  *          KERN_SUCCESS in case one of the waiters became the new holder.
2885  *          KERN_NOT_WAITING in case there were no waiters.
2886  *
2887  */
2888 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2889 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2890 {
2891 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2892 
2893 	return gate_handoff(gate, flags);
2894 }
2895 
2896 /*
2897  * Name: lck_rw_gate_steal
2898  *
2899  * Description: Set the current ownership of the gate. It sets the current thread as the
2900  *              new holder of the gate.
2901  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2902  *              to wake up possible waiters on the gate before returning to userspace.
2903  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2904  *              anymore.
2905  *
2906  *
2907  * Args:
2908  *   Arg1: lck_rw_t lock used to protect the gate.
2909  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2910  *
2911  * Conditions: Lock must be held. Returns with the lock held.
2912  *             The gate must be closed and the current thread must not already be the holder.
2913  *
2914  */
2915 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2916 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2917 {
2918 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2919 
2920 	gate_steal(gate);
2921 }
2922 
2923 /*
2924  * Name: lck_rw_gate_wait
2925  *
2926  * Description: Waits for the current thread to become the holder of the gate or for the
2927  *              gate to become open. An interruptible mode and deadline can be specified
2928  *              to return earlier from the wait.
2929  *
2930  * Args:
2931  *   Arg1: lck_rw_t lock used to protect the gate.
2932  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2933  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2934  *   Arg3: interruptible flag for wait.
2935  *   Arg4: deadline
2936  *
2937  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2938  *             Lock will be dropped while waiting.
2939  *             The gate must be closed.
2940  *
2941  * Returns: Reason why the thread was woken up.
2942  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2943  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2944  *                         to wake up possible waiters on the gate before returning to userspace.
2945  *          GATE_OPENED - the gate was opened by the holder.
2946  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
2947  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
2948  */
2949 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2950 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2951 {
2952 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2953 
2954 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2955 
2956 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2957 		return gate_wait(gate,
2958 		           interruptible,
2959 		           deadline,
2960 		           ^{lck_rw_type = lck_rw_done(lock);},
2961 		           ^{;});
2962 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2963 		return gate_wait(gate,
2964 		           interruptible,
2965 		           deadline,
2966 		           ^{lck_rw_type = lck_rw_done(lock);},
2967 		           ^{lck_rw_lock(lock, lck_rw_type);});
2968 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2969 		return gate_wait(gate,
2970 		           interruptible,
2971 		           deadline,
2972 		           ^{lck_rw_type = lck_rw_done(lock);},
2973 		           ^{lck_rw_lock_exclusive(lock);});
2974 	} else {
2975 		return gate_wait(gate,
2976 		           interruptible,
2977 		           deadline,
2978 		           ^{lck_rw_type = lck_rw_done(lock);},
2979 		           ^{lck_rw_lock_shared(lock);});
2980 	}
2981 }
2982 
2983 /*
2984  * Name: lck_rw_gate_assert
2985  *
2986  * Description: asserts that the gate is in the specified state.
2987  *
2988  * Args:
2989  *   Arg1: lck_rw_t lock used to protect the gate.
2990  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2991  *   Arg3: flags to specified assert type.
2992  *         GATE_ASSERT_CLOSED - the gate is currently closed
2993  *         GATE_ASSERT_OPEN - the gate is currently opened
2994  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2995  */
2996 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2997 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2998 {
2999 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3000 
3001 	gate_assert(gate, flags);
3002 	return;
3003 }
3004 
3005 /*
3006  * Name: lck_mtx_gate_init
3007  *
3008  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
3009  *
3010  * Args:
3011  *   Arg1: lck_mtx_t lock used to protect the gate.
3012  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3013  */
3014 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)3015 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
3016 {
3017 	(void) lock;
3018 	gate_init(gate, GT_INIT_DEFAULT);
3019 }
3020 
3021 /*
3022  * Name: lck_mtx_gate_alloc_init
3023  *
3024  * Description: allocates and initializes a gate_t.
3025  *
3026  * Args:
3027  *   Arg1: lck_mtx_t lock used to protect the gate.
3028  *
3029  * Returns:
3030  *         gate_t allocated.
3031  */
3032 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)3033 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3034 {
3035 	(void) lock;
3036 	return gate_alloc_init();
3037 }
3038 
3039 /*
3040  * Name: lck_mtx_gate_destroy
3041  *
3042  * Description: destroys a variable previously initialized
3043  *              with lck_mtx_gate_init().
3044  *
3045  * Args:
3046  *   Arg1: lck_mtx_t lock used to protect the gate.
3047  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3048  */
3049 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)3050 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3051 {
3052 	(void) lock;
3053 	gate_destroy(gate);
3054 }
3055 
3056 /*
3057  * Name: lck_mtx_gate_free
3058  *
3059  * Description: destroys and tries to free a gate previously allocated
3060  *	        with lck_mtx_gate_alloc_init().
3061  *              The gate free might be delegated to the last thread returning
3062  *              from the gate_wait().
3063  *
3064  * Args:
3065  *   Arg1: lck_mtx_t lock used to protect the gate.
3066  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3067  */
3068 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3069 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3070 {
3071 	(void) lock;
3072 	gate_free(gate);
3073 }
3074 
3075 /*
3076  * Name: lck_mtx_gate_try_close
3077  *
3078  * Description: Tries to close the gate.
3079  *              In case of success the current thread will be set as
3080  *              the holder of the gate.
3081  *
3082  * Args:
3083  *   Arg1: lck_mtx_t lock used to protect the gate.
3084  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3085  *
3086  * Conditions: Lock must be held. Returns with the lock held.
3087  *
3088  * Returns:
3089  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3090  *          of the gate.
3091  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3092  *          to wake up possible waiters on the gate before returning to userspace.
3093  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3094  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3095  *
3096  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3097  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3098  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3099  *          be done without dropping the lock that is protecting the gate in between.
3100  */
3101 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3102 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3103 {
3104 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3105 
3106 	return gate_try_close(gate);
3107 }
3108 
3109 /*
3110  * Name: lck_mtx_gate_close
3111  *
3112  * Description: Closes the gate. The current thread will be set as
3113  *              the holder of the gate. Will panic if the gate is already closed.
3114  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3115  *              to wake up possible waiters on the gate before returning to userspace.
3116  *
3117  * Args:
3118  *   Arg1: lck_mtx_t lock used to protect the gate.
3119  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3120  *
3121  * Conditions: Lock must be held. Returns with the lock held.
3122  *             The gate must be open.
3123  *
3124  */
3125 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3126 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3127 {
3128 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3129 
3130 	return gate_close(gate);
3131 }
3132 
3133 /*
3134  * Name: lck_mtx_gate_open
3135  *
3136  * Description: Opens of the gate and wakes up possible waiters.
3137  *
3138  * Args:
3139  *   Arg1: lck_mtx_t lock used to protect the gate.
3140  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3141  *
3142  * Conditions: Lock must be held. Returns with the lock held.
3143  *             The current thread must be the holder of the gate.
3144  *
3145  */
3146 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3147 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3148 {
3149 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3150 
3151 	gate_open(gate);
3152 }
3153 
3154 /*
3155  * Name: lck_mtx_gate_handoff
3156  *
3157  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3158  *              priority will be selected as the new holder of the gate, and woken up,
3159  *              with the gate remaining in the closed state throughout.
3160  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3161  *              will be returned.
3162  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3163  *              case no waiters were found.
3164  *
3165  *
3166  * Args:
3167  *   Arg1: lck_mtx_t lock used to protect the gate.
3168  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3169  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3170  *
3171  * Conditions: Lock must be held. Returns with the lock held.
3172  *             The current thread must be the holder of the gate.
3173  *
3174  * Returns:
3175  *          KERN_SUCCESS in case one of the waiters became the new holder.
3176  *          KERN_NOT_WAITING in case there were no waiters.
3177  *
3178  */
3179 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3180 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3181 {
3182 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3183 
3184 	return gate_handoff(gate, flags);
3185 }
3186 
3187 /*
3188  * Name: lck_mtx_gate_steal
3189  *
3190  * Description: Steals the ownership of the gate. It sets the current thread as the
3191  *              new holder of the gate.
3192  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3193  *              to wake up possible waiters on the gate before returning to userspace.
3194  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3195  *              anymore.
3196  *
3197  *
3198  * Args:
3199  *   Arg1: lck_mtx_t lock used to protect the gate.
3200  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3201  *
3202  * Conditions: Lock must be held. Returns with the lock held.
3203  *             The gate must be closed and the current thread must not already be the holder.
3204  *
3205  */
3206 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3207 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3208 {
3209 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3210 
3211 	gate_steal(gate);
3212 }
3213 
3214 /*
3215  * Name: lck_mtx_gate_wait
3216  *
3217  * Description: Waits for the current thread to become the holder of the gate or for the
3218  *              gate to become open. An interruptible mode and deadline can be specified
3219  *              to return earlier from the wait.
3220  *
3221  * Args:
3222  *   Arg1: lck_mtx_t lock used to protect the gate.
3223  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3224  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3225  *   Arg3: interruptible flag for wait.
3226  *   Arg4: deadline
3227  *
3228  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3229  *             Lock will be dropped while waiting.
3230  *             The gate must be closed.
3231  *
3232  * Returns: Reason why the thread was woken up.
3233  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3234  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3235  *                         to wake up possible waiters on the gate before returning to userspace.
3236  *          GATE_OPENED - the gate was opened by the holder.
3237  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3238  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3239  */
3240 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3241 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3242 {
3243 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3244 
3245 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3246 		return gate_wait(gate,
3247 		           interruptible,
3248 		           deadline,
3249 		           ^{lck_mtx_unlock(lock);},
3250 		           ^{;});
3251 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3252 		return gate_wait(gate,
3253 		           interruptible,
3254 		           deadline,
3255 		           ^{lck_mtx_unlock(lock);},
3256 		           ^{lck_mtx_lock_spin(lock);});
3257 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3258 		return gate_wait(gate,
3259 		           interruptible,
3260 		           deadline,
3261 		           ^{lck_mtx_unlock(lock);},
3262 		           ^{lck_mtx_lock_spin_always(lock);});
3263 	} else {
3264 		return gate_wait(gate,
3265 		           interruptible,
3266 		           deadline,
3267 		           ^{lck_mtx_unlock(lock);},
3268 		           ^{lck_mtx_lock(lock);});
3269 	}
3270 }
3271 
3272 /*
3273  * Name: lck_mtx_gate_assert
3274  *
3275  * Description: asserts that the gate is in the specified state.
3276  *
3277  * Args:
3278  *   Arg1: lck_mtx_t lock used to protect the gate.
3279  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3280  *   Arg3: flags to specified assert type.
3281  *         GATE_ASSERT_CLOSED - the gate is currently closed
3282  *         GATE_ASSERT_OPEN - the gate is currently opened
3283  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3284  */
3285 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3286 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3287 {
3288 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3289 
3290 	gate_assert(gate, flags);
3291 }
3292 
3293 #pragma mark - LCK_*_DECLARE support
3294 
3295 __startup_func
3296 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3297 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3298 {
3299 	lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3300 }
3301 
3302 __startup_func
3303 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3304 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3305 {
3306 	lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3307 }
3308 
3309 __startup_func
3310 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3311 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3312 {
3313 	lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3314 }
3315 
3316 __startup_func
3317 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3318 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3319 {
3320 	simple_lock_init(sp->lck, sp->lck_init_arg);
3321 }
3322 
3323 __startup_func
3324 void
lck_ticket_startup_init(struct lck_ticket_startup_spec * sp)3325 lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3326 {
3327 	lck_ticket_init(sp->lck, sp->lck_grp);
3328 }
3329