xref: /xnu-10002.1.13/osfmk/kern/locks.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 
57 #define LOCK_PRIVATE 1
58 
59 #include <mach_ldebug.h>
60 #include <debug.h>
61 
62 #include <mach/kern_return.h>
63 
64 #include <kern/locks_internal.h>
65 #include <kern/lock_stat.h>
66 #include <kern/locks.h>
67 #include <kern/misc_protos.h>
68 #include <kern/zalloc.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/sched_prim.h>
72 #include <kern/debug.h>
73 #include <libkern/section_keywords.h>
74 #if defined(__x86_64__)
75 #include <i386/tsc.h>
76 #include <i386/machine_routines.h>
77 #endif
78 #include <machine/atomic.h>
79 #include <machine/machine_cpu.h>
80 #include <string.h>
81 #include <vm/pmap.h>
82 
83 #include <sys/kdebug.h>
84 
85 #define LCK_MTX_SLEEP_CODE              0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
87 #define LCK_MTX_LCK_WAIT_CODE           2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
89 
90 // Panic in tests that check lock usage correctness
91 // These are undesirable when in a panic or a debugger is runnning.
92 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93 
94 #if MACH_LDEBUG
95 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96 #else
97 #define ALIGN_TEST(p, t) do{}while(0)
98 #endif
99 
100 #define NOINLINE                __attribute__((noinline))
101 
102 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104 
105 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106 
107 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109 
110 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111 
112 struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113 
114 #if CONFIG_PV_TICKET
115 SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; /* used by waitq.py */
116 #endif
117 
118 #if DEBUG
119 TUNABLE(uint32_t, LcksOpts, "lcks", LCK_OPTION_ENABLE_DEBUG);
120 #else
121 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
122 #endif
123 
124 #if CONFIG_DTRACE
125 #if defined (__x86_64__)
126 machine_timeout_t dtrace_spin_threshold = 500; // 500ns
127 #elif defined(__arm64__)
128 MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129     0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130 #endif
131 #endif
132 
133 struct lck_mcs PERCPU_DATA(lck_mcs);
134 
135 __kdebug_only
136 uintptr_t
unslide_for_kdebug(const void * object)137 unslide_for_kdebug(const void* object)
138 {
139 	if (__improbable(kdebug_enable)) {
140 		return VM_KERNEL_UNSLIDE_OR_PERM(object);
141 	} else {
142 		return 0;
143 	}
144 }
145 
146 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)147 __lck_require_preemption_disabled_panic(void *lock)
148 {
149 	panic("Attempt to take no-preempt lock %p in preemptible context", lock);
150 }
151 
152 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)153 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
154 {
155 	if (__improbable(!lock_preemption_disabled_for_thread(self))) {
156 		__lck_require_preemption_disabled_panic(lock);
157 	}
158 }
159 
160 #pragma mark - HW Spin policies
161 
162 /*
163  * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
164  */
165 __attribute__((always_inline))
166 hw_spin_timeout_t
hw_spin_compute_timeout(hw_spin_policy_t pol)167 hw_spin_compute_timeout(hw_spin_policy_t pol)
168 {
169 	hw_spin_timeout_t ret = {
170 		.hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
171 	};
172 
173 	ret.hwst_timeout <<= pol->hwsp_timeout_shift;
174 #if SCHED_HYGIENE_DEBUG
175 	ret.hwst_in_ppl = pmap_in_ppl();
176 	/* Note we can't check if we are interruptible if in ppl */
177 	ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
178 #endif /* SCHED_HYGIENE_DEBUG */
179 
180 #if SCHED_HYGIENE_DEBUG
181 #ifndef KASAN
182 	if (ret.hwst_timeout > 0 &&
183 	    !ret.hwst_in_ppl &&
184 	    !ret.hwst_interruptible &&
185 	    interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
186 		uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
187 
188 #if defined(__x86_64__)
189 		int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
190 #endif
191 		if (int_timeout < ret.hwst_timeout) {
192 			ret.hwst_timeout = int_timeout;
193 		}
194 	}
195 #endif /* !KASAN */
196 #endif /* SCHED_HYGIENE_DEBUG */
197 
198 	return ret;
199 }
200 
201 __attribute__((always_inline))
202 bool
hw_spin_in_ppl(hw_spin_timeout_t to)203 hw_spin_in_ppl(hw_spin_timeout_t to)
204 {
205 #if SCHED_HYGIENE_DEBUG
206 	return to.hwst_in_ppl;
207 #else
208 	(void)to;
209 	return pmap_in_ppl();
210 #endif
211 }
212 
213 bool
hw_spin_should_keep_spinning(void * lock,hw_spin_policy_t pol,hw_spin_timeout_t to,hw_spin_state_t * state)214 hw_spin_should_keep_spinning(
215 	void                   *lock,
216 	hw_spin_policy_t        pol,
217 	hw_spin_timeout_t       to,
218 	hw_spin_state_t        *state)
219 {
220 	hw_spin_timeout_status_t rc;
221 #if SCHED_HYGIENE_DEBUG
222 	uint64_t irq_time = 0;
223 #endif
224 	uint64_t now;
225 
226 	if (__improbable(to.hwst_timeout == 0)) {
227 		return true;
228 	}
229 
230 	now = ml_get_timebase();
231 	if (__probable(now < state->hwss_deadline)) {
232 		/* keep spinning */
233 		return true;
234 	}
235 
236 #if SCHED_HYGIENE_DEBUG
237 	if (to.hwst_interruptible) {
238 		irq_time = current_thread()->machine.int_time_mt;
239 	}
240 #endif /* SCHED_HYGIENE_DEBUG */
241 
242 	if (__probable(state->hwss_deadline == 0)) {
243 		state->hwss_start     = now;
244 		state->hwss_deadline  = now + to.hwst_timeout;
245 #if SCHED_HYGIENE_DEBUG
246 		state->hwss_irq_start = irq_time;
247 #endif
248 		return true;
249 	}
250 
251 	/*
252 	 * Update fields that the callback needs
253 	 */
254 	state->hwss_now     = now;
255 #if SCHED_HYGIENE_DEBUG
256 	state->hwss_irq_end = irq_time;
257 #endif /* SCHED_HYGIENE_DEBUG */
258 
259 	rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
260 	    to, *state);
261 	if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
262 		/* push the deadline */
263 		state->hwss_deadline += to.hwst_timeout;
264 	}
265 	return rc == HW_LOCK_TIMEOUT_CONTINUE;
266 }
267 
268 __attribute__((always_inline))
269 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)270 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
271 {
272 #if DEBUG || DEVELOPMENT
273 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
274 #else
275 	(void)owner;
276 #endif
277 }
278 
279 __attribute__((always_inline))
280 void
lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)281 lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
282 {
283 #if DEBUG || DEVELOPMENT
284 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
285 	    (uintptr_t)ctid_get_thread_unsafe(ctid);
286 #else
287 	(void)ctid;
288 #endif
289 }
290 
291 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)292 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
293 {
294 	lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
295 
296 	if (owner < (1u << CTID_SIZE_BIT)) {
297 		owner = (uintptr_t)ctid_get_thread_unsafe((uint32_t)owner);
298 	} else {
299 		/* strip possible bits used by the lock implementations */
300 		owner &= ~0x7ul;
301 	}
302 
303 	lsti->lock = lck;
304 	lsti->owner_thread_cur = owner;
305 	lsti->owner_cpu = ~0u;
306 	os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
307 
308 	if (owner == 0) {
309 		/* if the owner isn't known, just bail */
310 		goto out;
311 	}
312 
313 	for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
314 		cpu_data_t *data = cpu_datap(i);
315 		if (data && (uintptr_t)data->cpu_active_thread == owner) {
316 			lsti->owner_cpu = i;
317 			os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
318 #if __x86_64__
319 			if ((uint32_t)cpu_number() != i) {
320 				/* Cause NMI and panic on the owner's cpu */
321 				NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
322 			}
323 #endif
324 			break;
325 		}
326 	}
327 
328 out:
329 	return lsti;
330 }
331 
332 #pragma mark - HW locks
333 
334 /*
335  * Routine:	hw_lock_init
336  *
337  *	Initialize a hardware lock.
338  */
339 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)340 hw_lock_init(hw_lock_t lock)
341 {
342 	ordered_store_hw(lock, 0);
343 }
344 
345 __result_use_check
346 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)347 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
348 {
349 #if OS_ATOMIC_USE_LLSC
350 	uintptr_t oldval;
351 	os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
352 		if (oldval != 0) {
353 		        wait_for_event(); // clears the monitor so we don't need give_up()
354 		        return false;
355 		}
356 	});
357 	return true;
358 #else // !OS_ATOMIC_USE_LLSC
359 #if OS_ATOMIC_HAS_LLSC
360 	uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
361 	if (oldval != 0) {
362 		wait_for_event(); // clears the monitor so we don't need give_up()
363 		return false;
364 	}
365 #endif
366 	return lock_cmpxchg(&lock->lock_data, 0, newval, acquire);
367 #endif // !OS_ATOMIC_USE_LLSC
368 }
369 
370 __result_use_check
371 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)372 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
373 {
374 	uint32_t mask = 1u << bit;
375 
376 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
377 	uint32_t oldval, newval;
378 	os_atomic_rmw_loop(target, oldval, newval, acquire, {
379 		newval = oldval | mask;
380 		if (__improbable(oldval & mask)) {
381 #if OS_ATOMIC_HAS_LLSC
382 		        if (wait) {
383 		                wait_for_event(); // clears the monitor so we don't need give_up()
384 			} else {
385 		                os_atomic_clear_exclusive();
386 			}
387 #else
388 		        if (wait) {
389 		                cpu_pause();
390 			}
391 #endif
392 		        return false;
393 		}
394 	});
395 	return true;
396 #else
397 	uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
398 	if (__improbable(oldval & mask)) {
399 		if (wait) {
400 			wait_for_event(); // clears the monitor so we don't need give_up()
401 		} else {
402 			os_atomic_clear_exclusive();
403 		}
404 		return false;
405 	}
406 	return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
407 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
408 }
409 
410 static hw_spin_timeout_status_t
hw_spin_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)411 hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
412 {
413 	hw_lock_t lock  = _lock;
414 	uintptr_t owner = lock->lock_data & ~0x7ul;
415 	lck_spinlock_to_info_t lsti;
416 
417 	if (!spinlock_timeout_panic) {
418 		/* keep spinning rather than panicing */
419 		return HW_LOCK_TIMEOUT_CONTINUE;
420 	}
421 
422 	if (pmap_in_ppl()) {
423 		/*
424 		 * This code is used by the PPL and can't write to globals.
425 		 */
426 		panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
427 		    "current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
428 		    lock, HW_SPIN_TIMEOUT_ARG(to, st),
429 		    (void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
430 	}
431 
432 	// Capture the actual time spent blocked, which may be higher than the timeout
433 	// if a misbehaving interrupt stole this thread's CPU time.
434 	lsti = lck_spinlock_timeout_hit(lock, owner);
435 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
436 	    "current owner: %p (on cpu %d), "
437 #if DEBUG || DEVELOPMENT
438 	    "initial owner: %p, "
439 #endif /* DEBUG || DEVELOPMENT */
440 	    HW_SPIN_TIMEOUT_DETAILS_FMT,
441 	    lock, HW_SPIN_TIMEOUT_ARG(to, st),
442 	    (void *)lsti->owner_thread_cur, lsti->owner_cpu,
443 #if DEBUG || DEVELOPMENT
444 	    (void *)lsti->owner_thread_orig,
445 #endif /* DEBUG || DEVELOPMENT */
446 	    HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
447 }
448 
449 const struct hw_spin_policy hw_lock_spin_policy = {
450 	.hwsp_name              = "hw_lock_t",
451 	.hwsp_timeout_atomic    = &lock_panic_timeout,
452 	.hwsp_op_timeout        = hw_spin_timeout_panic,
453 };
454 
455 static hw_spin_timeout_status_t
hw_spin_always_return(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)456 hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
457 {
458 #pragma unused(_lock, to, st)
459 	return HW_LOCK_TIMEOUT_RETURN;
460 }
461 
462 const struct hw_spin_policy hw_lock_spin_panic_policy = {
463 	.hwsp_name              = "hw_lock_t[panic]",
464 #if defined(__x86_64__)
465 	.hwsp_timeout           = &LockTimeOutTSC,
466 #else
467 	.hwsp_timeout_atomic    = &LockTimeOut,
468 #endif
469 	.hwsp_timeout_shift     = 2,
470 	.hwsp_op_timeout        = hw_spin_always_return,
471 };
472 
473 #if DEBUG || DEVELOPMENT
474 static machine_timeout_t hw_lock_test_to;
475 const struct hw_spin_policy hw_lock_test_give_up_policy = {
476 	.hwsp_name              = "testing policy",
477 #if defined(__x86_64__)
478 	.hwsp_timeout           = &LockTimeOutTSC,
479 #else
480 	.hwsp_timeout_atomic    = &LockTimeOut,
481 #endif
482 	.hwsp_timeout_shift     = 2,
483 	.hwsp_op_timeout        = hw_spin_always_return,
484 };
485 
486 __startup_func
487 static void
hw_lock_test_to_init(void)488 hw_lock_test_to_init(void)
489 {
490 	uint64_t timeout;
491 
492 	nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &timeout);
493 #if defined(__x86_64__)
494 	timeout = tmrCvt(timeout, tscFCvtn2t);
495 #endif
496 	os_atomic_init(&hw_lock_test_to, timeout);
497 }
498 STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
499 #endif
500 
501 static hw_spin_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)502 hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
503 {
504 	hw_lock_bit_t *lock = _lock;
505 
506 	if (!spinlock_timeout_panic) {
507 		/* keep spinning rather than panicing */
508 		return HW_LOCK_TIMEOUT_CONTINUE;
509 	}
510 
511 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
512 	    "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
513 	    lock, HW_SPIN_TIMEOUT_ARG(to, st),
514 	    *lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
515 }
516 
517 static const struct hw_spin_policy hw_lock_bit_policy = {
518 	.hwsp_name              = "hw_lock_bit_t",
519 	.hwsp_timeout_atomic    = &lock_panic_timeout,
520 	.hwsp_op_timeout        = hw_lock_bit_timeout_panic,
521 };
522 
523 #if __arm64__
524 const uint64_t hw_lock_bit_timeout_2s = 0x3000000;
525 const struct hw_spin_policy hw_lock_bit_policy_2s = {
526 	.hwsp_name              = "hw_lock_bit_t",
527 	.hwsp_timeout           = &hw_lock_bit_timeout_2s,
528 	.hwsp_op_timeout        = hw_lock_bit_timeout_panic,
529 };
530 #endif
531 
532 /*
533  *	Routine: hw_lock_lock_contended
534  *
535  *	Spin until lock is acquired or timeout expires.
536  *	timeout is in mach_absolute_time ticks. Called with
537  *	preemption disabled.
538  */
539 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,uintptr_t data,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))540 hw_lock_lock_contended(
541 	hw_lock_t               lock,
542 	uintptr_t               data,
543 	hw_spin_policy_t        pol
544 	LCK_GRP_ARG(lck_grp_t *grp))
545 {
546 	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
547 	hw_spin_state_t   state = { };
548 	hw_lock_status_t  rc = HW_LOCK_CONTENDED;
549 
550 	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
551 	    HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
552 		panic("hwlock: thread %p is trying to lock %p recursively",
553 		    HW_LOCK_STATE_TO_THREAD(data), lock);
554 	}
555 
556 #if CONFIG_DTRACE || LOCK_STATS
557 	uint64_t begin = 0;
558 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
559 
560 	if (__improbable(stat_enabled)) {
561 		begin = mach_absolute_time();
562 	}
563 #endif /* CONFIG_DTRACE || LOCK_STATS */
564 
565 	if (!hw_spin_in_ppl(to)) {
566 		/*
567 		 * This code is used by the PPL and can't write to globals.
568 		 */
569 		lck_spinlock_timeout_set_orig_owner(lock->lock_data);
570 	}
571 
572 	do {
573 		for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
574 			cpu_pause();
575 			if (hw_lock_trylock_contended(lock, data)) {
576 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
577 				rc = HW_LOCK_ACQUIRED;
578 				goto end;
579 			}
580 		}
581 	} while (hw_spin_should_keep_spinning(lock, pol, to, &state));
582 
583 end:
584 #if CONFIG_DTRACE || LOCK_STATS
585 	if (__improbable(stat_enabled)) {
586 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
587 		    mach_absolute_time() - begin);
588 	}
589 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
590 #endif /* CONFIG_DTRACE || LOCK_STATS */
591 	return rc;
592 }
593 
594 static hw_spin_timeout_status_t
hw_wait_while_equals32_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)595 hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
596 {
597 	uint32_t *address = _lock;
598 
599 	if (!spinlock_timeout_panic) {
600 		/* keep spinning rather than panicing */
601 		return HW_LOCK_TIMEOUT_CONTINUE;
602 	}
603 
604 	panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
605 	    "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
606 	    address, HW_SPIN_TIMEOUT_ARG(to, st),
607 	    *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
608 }
609 
610 static const struct hw_spin_policy hw_wait_while_equals32_policy = {
611 	.hwsp_name              = "hw_wait_while_equals32",
612 	.hwsp_timeout_atomic    = &lock_panic_timeout,
613 	.hwsp_op_timeout        = hw_wait_while_equals32_panic,
614 };
615 
616 static hw_spin_timeout_status_t
hw_wait_while_equals64_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)617 hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
618 {
619 	uint64_t *address = _lock;
620 
621 	if (!spinlock_timeout_panic) {
622 		/* keep spinning rather than panicing */
623 		return HW_LOCK_TIMEOUT_CONTINUE;
624 	}
625 
626 	panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
627 	    "current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
628 	    address, HW_SPIN_TIMEOUT_ARG(to, st),
629 	    *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
630 }
631 
632 static const struct hw_spin_policy hw_wait_while_equals64_policy = {
633 	.hwsp_name              = "hw_wait_while_equals64",
634 	.hwsp_timeout_atomic    = &lock_panic_timeout,
635 	.hwsp_op_timeout        = hw_wait_while_equals64_panic,
636 };
637 
638 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)639 hw_wait_while_equals32(uint32_t *address, uint32_t current)
640 {
641 	hw_spin_policy_t  pol   = &hw_wait_while_equals32_policy;
642 	hw_spin_timeout_t to    = hw_spin_compute_timeout(pol);
643 	hw_spin_state_t   state = { };
644 	uint32_t          v;
645 
646 	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
647 		hw_spin_should_keep_spinning(address, pol, to, &state);
648 	}
649 
650 	return v;
651 }
652 
653 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)654 hw_wait_while_equals64(uint64_t *address, uint64_t current)
655 {
656 	hw_spin_policy_t  pol   = &hw_wait_while_equals64_policy;
657 	hw_spin_timeout_t to    = hw_spin_compute_timeout(pol);
658 	hw_spin_state_t   state = { };
659 	uint64_t          v;
660 
661 	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
662 		hw_spin_should_keep_spinning(address, pol, to, &state);
663 	}
664 
665 	return v;
666 }
667 
668 __result_use_check
669 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))670 hw_lock_to_internal(
671 	hw_lock_t               lock,
672 	thread_t                thread,
673 	hw_spin_policy_t        pol
674 	LCK_GRP_ARG(lck_grp_t *grp))
675 {
676 	uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
677 
678 	if (__probable(hw_lock_trylock_contended(lock, state))) {
679 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
680 		return HW_LOCK_ACQUIRED;
681 	}
682 
683 	return hw_lock_lock_contended(lock, state, pol LCK_GRP_ARG(grp));
684 }
685 
686 /*
687  *	Routine: hw_lock_lock
688  *
689  *	Acquire lock, spinning until it becomes available,
690  *	return with preemption disabled.
691  */
692 void
693 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
694 {
695 	thread_t thread = current_thread();
696 	lock_disable_preemption_for_thread(thread);
697 	(void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
698 	    LCK_GRP_ARG(grp));
699 }
700 
701 /*
702  *	Routine: hw_lock_lock_nopreempt
703  *
704  *	Acquire lock, spinning until it becomes available.
705  */
706 void
707 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
708 {
709 	thread_t thread = current_thread();
710 	__lck_require_preemption_disabled(lock, thread);
711 	(void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
712 	    LCK_GRP_ARG(grp));
713 }
714 
715 /*
716  *	Routine: hw_lock_to
717  *
718  *	Acquire lock, spinning until it becomes available or timeout.
719  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
720  *	preemption disabled.
721  */
722 unsigned
723 int
724 (hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
725 {
726 	thread_t thread = current_thread();
727 	lock_disable_preemption_for_thread(thread);
728 	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
729 }
730 
731 /*
732  *	Routine: hw_lock_to_nopreempt
733  *
734  *	Acquire lock, spinning until it becomes available or timeout.
735  *	Timeout is in mach_absolute_time ticks, called and return with
736  *	preemption disabled.
737  */
738 unsigned
739 int
740 (hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
741 {
742 	thread_t thread = current_thread();
743 	__lck_require_preemption_disabled(lock, thread);
744 	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
745 }
746 
747 __result_use_check
748 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))749 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
750 {
751 	if (__probable(lock_cmpxchg(&lock->lock_data, 0,
752 	    HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
753 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
754 		return true;
755 	}
756 	return false;
757 }
758 
759 /*
760  *	Routine: hw_lock_try
761  *
762  *	returns with preemption disabled on success.
763  */
764 unsigned
765 int
766 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
767 {
768 	thread_t thread = current_thread();
769 	lock_disable_preemption_for_thread(thread);
770 	unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
771 	if (!success) {
772 		lock_enable_preemption();
773 	}
774 	return success;
775 }
776 
777 unsigned
778 int
779 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
780 {
781 	thread_t thread = current_thread();
782 	__lck_require_preemption_disabled(lock, thread);
783 	return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
784 }
785 
786 #if DEBUG || DEVELOPMENT
787 __abortlike
788 static void
__hw_lock_unlock_unowned_panic(hw_lock_t lock)789 __hw_lock_unlock_unowned_panic(hw_lock_t lock)
790 {
791 	panic("hwlock: thread %p is trying to lock %p recursively",
792 	    current_thread(), lock);
793 }
794 #endif /* DEBUG || DEVELOPMENT */
795 
796 /*
797  *	Routine: hw_lock_unlock
798  *
799  *	Unconditionally release lock, release preemption level.
800  */
801 static inline void
hw_lock_unlock_internal(hw_lock_t lock)802 hw_lock_unlock_internal(hw_lock_t lock)
803 {
804 #if DEBUG || DEVELOPMENT
805 	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
806 	    LOCK_CORRECTNESS_PANIC()) {
807 		__hw_lock_unlock_unowned_panic(lock);
808 	}
809 #endif /* DEBUG || DEVELOPMENT */
810 
811 	os_atomic_store(&lock->lock_data, 0, release);
812 #if     CONFIG_DTRACE
813 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
814 #endif /* CONFIG_DTRACE */
815 }
816 
817 void
818 (hw_lock_unlock)(hw_lock_t lock)
819 {
820 	hw_lock_unlock_internal(lock);
821 	lock_enable_preemption();
822 }
823 
824 void
825 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
826 {
827 	hw_lock_unlock_internal(lock);
828 }
829 
830 void
hw_lock_assert(__assert_only hw_lock_t lock,__assert_only unsigned int type)831 hw_lock_assert(__assert_only hw_lock_t lock, __assert_only unsigned int type)
832 {
833 #if MACH_ASSERT
834 	thread_t thread, holder;
835 
836 	holder = HW_LOCK_STATE_TO_THREAD(lock->lock_data);
837 	thread = current_thread();
838 
839 	if (type == LCK_ASSERT_OWNED) {
840 		if (holder == 0) {
841 			panic("Lock not owned %p = %p", lock, holder);
842 		}
843 		if (holder != thread) {
844 			panic("Lock not owned by current thread %p = %p", lock, holder);
845 		}
846 	} else if (type == LCK_ASSERT_NOTOWNED) {
847 		if (holder != THREAD_NULL && holder == thread) {
848 			panic("Lock owned by current thread %p = %p", lock, holder);
849 		}
850 	} else {
851 		panic("hw_lock_assert(): invalid arg (%u)", type);
852 	}
853 #endif /* MACH_ASSERT */
854 }
855 
856 /*
857  *	Routine hw_lock_held, doesn't change preemption state.
858  *	N.B.  Racy, of course.
859  */
860 unsigned int
hw_lock_held(hw_lock_t lock)861 hw_lock_held(hw_lock_t lock)
862 {
863 	return ordered_load_hw(lock) != 0;
864 }
865 
866 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))867 hw_lock_bit_to_contended(
868 	hw_lock_bit_t          *lock,
869 	uint32_t                bit,
870 	hw_spin_policy_t        pol
871 	LCK_GRP_ARG(lck_grp_t *grp))
872 {
873 	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
874 	hw_spin_state_t   state = { };
875 	hw_lock_status_t  rc = HW_LOCK_CONTENDED;
876 
877 #if CONFIG_DTRACE || LOCK_STATS
878 	uint64_t begin = 0;
879 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
880 
881 	if (__improbable(stat_enabled)) {
882 		begin = mach_absolute_time();
883 	}
884 #endif /* LOCK_STATS || CONFIG_DTRACE */
885 
886 	do {
887 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
888 			rc = hw_lock_trylock_bit(lock, bit, true);
889 
890 			if (rc == HW_LOCK_ACQUIRED) {
891 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
892 				goto end;
893 			}
894 		}
895 
896 		assert(rc == HW_LOCK_CONTENDED);
897 	} while (hw_spin_should_keep_spinning(lock, pol, to, &state));
898 
899 end:
900 #if CONFIG_DTRACE || LOCK_STATS
901 	if (__improbable(stat_enabled)) {
902 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
903 		    mach_absolute_time() - begin);
904 	}
905 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
906 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
907 	return rc;
908 }
909 
910 __result_use_check
911 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))912 hw_lock_bit_to_internal(
913 	hw_lock_bit_t          *lock,
914 	unsigned int            bit,
915 	hw_spin_policy_t        pol
916 	LCK_GRP_ARG(lck_grp_t *grp))
917 {
918 	if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
919 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
920 		return HW_LOCK_ACQUIRED;
921 	}
922 
923 	return (unsigned)hw_lock_bit_to_contended(lock, bit, pol LCK_GRP_ARG(grp));
924 }
925 
926 /*
927  *	Routine: hw_lock_bit_to
928  *
929  *	Acquire bit lock, spinning until it becomes available or timeout.
930  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
931  *	preemption disabled.
932  */
933 unsigned
934 int
935 (hw_lock_bit_to)(
936 	hw_lock_bit_t          * lock,
937 	uint32_t                bit,
938 	hw_spin_policy_t        pol
939 	LCK_GRP_ARG(lck_grp_t *grp))
940 {
941 	_disable_preemption();
942 	return hw_lock_bit_to_internal(lock, bit, pol LCK_GRP_ARG(grp));
943 }
944 
945 /*
946  *	Routine: hw_lock_bit
947  *
948  *	Acquire bit lock, spinning until it becomes available,
949  *	return with preemption disabled.
950  */
951 void
952 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
953 {
954 	_disable_preemption();
955 	(void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
956 }
957 
958 /*
959  *	Routine: hw_lock_bit_nopreempt
960  *
961  *	Acquire bit lock, spinning until it becomes available.
962  */
963 void
964 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
965 {
966 	__lck_require_preemption_disabled(lock, current_thread());
967 	(void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
968 }
969 
970 
971 unsigned
972 int
973 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
974 {
975 	boolean_t success = false;
976 
977 	_disable_preemption();
978 	success = hw_lock_trylock_bit(lock, bit, false);
979 	if (!success) {
980 		lock_enable_preemption();
981 	}
982 
983 	if (success) {
984 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
985 	}
986 
987 	return success;
988 }
989 
990 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)991 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
992 {
993 	os_atomic_andnot(lock, 1u << bit, release);
994 #if CONFIG_DTRACE
995 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
996 #endif
997 }
998 
999 /*
1000  *	Routine:	hw_unlock_bit
1001  *
1002  *		Release spin-lock. The second parameter is the bit number to test and set.
1003  *		Decrement the preemption level.
1004  */
1005 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)1006 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1007 {
1008 	hw_unlock_bit_internal(lock, bit);
1009 	lock_enable_preemption();
1010 }
1011 
1012 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)1013 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1014 {
1015 	__lck_require_preemption_disabled(lock, current_thread());
1016 	hw_unlock_bit_internal(lock, bit);
1017 }
1018 
1019 
1020 #pragma mark - lck_*_sleep
1021 
1022 /*
1023  * Routine:	lck_spin_sleep
1024  */
1025 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1026 lck_spin_sleep_grp(
1027 	lck_spin_t              *lck,
1028 	lck_sleep_action_t      lck_sleep_action,
1029 	event_t                 event,
1030 	wait_interrupt_t        interruptible,
1031 	lck_grp_t               *grp)
1032 {
1033 	wait_result_t   res;
1034 
1035 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1036 		panic("Invalid lock sleep action %x", lck_sleep_action);
1037 	}
1038 
1039 	res = assert_wait(event, interruptible);
1040 	if (res == THREAD_WAITING) {
1041 		lck_spin_unlock(lck);
1042 		res = thread_block(THREAD_CONTINUE_NULL);
1043 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1044 			lck_spin_lock_grp(lck, grp);
1045 		}
1046 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1047 		lck_spin_unlock(lck);
1048 	}
1049 
1050 	return res;
1051 }
1052 
1053 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1054 lck_spin_sleep(
1055 	lck_spin_t              *lck,
1056 	lck_sleep_action_t      lck_sleep_action,
1057 	event_t                 event,
1058 	wait_interrupt_t        interruptible)
1059 {
1060 	return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1061 }
1062 
1063 /*
1064  * Routine:	lck_spin_sleep_deadline
1065  */
1066 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1067 lck_spin_sleep_deadline(
1068 	lck_spin_t              *lck,
1069 	lck_sleep_action_t      lck_sleep_action,
1070 	event_t                 event,
1071 	wait_interrupt_t        interruptible,
1072 	uint64_t                deadline)
1073 {
1074 	wait_result_t   res;
1075 
1076 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1077 		panic("Invalid lock sleep action %x", lck_sleep_action);
1078 	}
1079 
1080 	res = assert_wait_deadline(event, interruptible, deadline);
1081 	if (res == THREAD_WAITING) {
1082 		lck_spin_unlock(lck);
1083 		res = thread_block(THREAD_CONTINUE_NULL);
1084 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1085 			lck_spin_lock(lck);
1086 		}
1087 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1088 		lck_spin_unlock(lck);
1089 	}
1090 
1091 	return res;
1092 }
1093 
1094 /*
1095  * Routine:	lck_mtx_sleep
1096  */
1097 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1098 lck_mtx_sleep(
1099 	lck_mtx_t               *lck,
1100 	lck_sleep_action_t      lck_sleep_action,
1101 	event_t                 event,
1102 	wait_interrupt_t        interruptible)
1103 {
1104 	wait_result_t           res;
1105 	thread_pri_floor_t      token;
1106 
1107 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1108 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1109 
1110 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1111 		panic("Invalid lock sleep action %x", lck_sleep_action);
1112 	}
1113 
1114 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1115 		/*
1116 		 * We get a priority floor
1117 		 * during the time that this thread is asleep, so that when it
1118 		 * is re-awakened (and not yet contending on the mutex), it is
1119 		 * runnable at a reasonably high priority.
1120 		 */
1121 		token = thread_priority_floor_start();
1122 	}
1123 
1124 	res = assert_wait(event, interruptible);
1125 	if (res == THREAD_WAITING) {
1126 		lck_mtx_unlock(lck);
1127 		res = thread_block(THREAD_CONTINUE_NULL);
1128 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1129 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1130 				lck_mtx_lock_spin(lck);
1131 			} else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1132 				lck_mtx_lock_spin_always(lck);
1133 			} else {
1134 				lck_mtx_lock(lck);
1135 			}
1136 		}
1137 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1138 		lck_mtx_unlock(lck);
1139 	}
1140 
1141 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1142 		thread_priority_floor_end(&token);
1143 	}
1144 
1145 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1146 
1147 	return res;
1148 }
1149 
1150 
1151 /*
1152  * Routine:	lck_mtx_sleep_deadline
1153  */
1154 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1155 lck_mtx_sleep_deadline(
1156 	lck_mtx_t               *lck,
1157 	lck_sleep_action_t      lck_sleep_action,
1158 	event_t                 event,
1159 	wait_interrupt_t        interruptible,
1160 	uint64_t                deadline)
1161 {
1162 	wait_result_t           res;
1163 	thread_pri_floor_t      token;
1164 
1165 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1166 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1167 
1168 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1169 		panic("Invalid lock sleep action %x", lck_sleep_action);
1170 	}
1171 
1172 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1173 		/*
1174 		 * See lck_mtx_sleep().
1175 		 */
1176 		token = thread_priority_floor_start();
1177 	}
1178 
1179 	res = assert_wait_deadline(event, interruptible, deadline);
1180 	if (res == THREAD_WAITING) {
1181 		lck_mtx_unlock(lck);
1182 		res = thread_block(THREAD_CONTINUE_NULL);
1183 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1184 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1185 				lck_mtx_lock_spin(lck);
1186 			} else {
1187 				lck_mtx_lock(lck);
1188 			}
1189 		}
1190 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1191 		lck_mtx_unlock(lck);
1192 	}
1193 
1194 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1195 		thread_priority_floor_end(&token);
1196 	}
1197 
1198 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1199 
1200 	return res;
1201 }
1202 
1203 /*
1204  * sleep_with_inheritor and wakeup_with_inheritor KPI
1205  *
1206  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1207  * the latest thread specified as inheritor.
1208  *
1209  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1210  * direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1211  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1212  *
1213  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1214  *
1215  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1216  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1217  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1218  * invoking any turnstile operation.
1219  *
1220  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1221  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1222  * is instantiated for this KPI to manage the hash without interrupt disabled.
1223  * Also:
1224  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1225  * - every event will have a dedicated wait_queue.
1226  *
1227  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1228  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1229  */
1230 
1231 static kern_return_t
wakeup_with_inheritor_and_turnstile(event_t event,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1232 wakeup_with_inheritor_and_turnstile(
1233 	event_t                 event,
1234 	wait_result_t           result,
1235 	bool                    wake_one,
1236 	lck_wake_action_t       action,
1237 	thread_t               *thread_wokenup)
1238 {
1239 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1240 	uint32_t index;
1241 	struct turnstile *ts = NULL;
1242 	kern_return_t ret = KERN_NOT_WAITING;
1243 
1244 	/*
1245 	 * the hash bucket spinlock is used as turnstile interlock
1246 	 */
1247 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1248 
1249 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1250 
1251 	if (wake_one) {
1252 		waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1253 
1254 		if (action == LCK_WAKE_DEFAULT) {
1255 			flags = WAITQ_UPDATE_INHERITOR;
1256 		} else {
1257 			assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1258 		}
1259 
1260 		/*
1261 		 * WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1262 		 * if it finds a thread
1263 		 */
1264 		if (thread_wokenup) {
1265 			thread_t wokeup;
1266 
1267 			wokeup = waitq_wakeup64_identify(&ts->ts_waitq,
1268 			    CAST_EVENT64_T(event), result, flags);
1269 			*thread_wokenup = wokeup;
1270 			ret = wokeup ? KERN_SUCCESS : KERN_NOT_WAITING;
1271 		} else {
1272 			ret = waitq_wakeup64_one(&ts->ts_waitq,
1273 			    CAST_EVENT64_T(event), result, flags);
1274 		}
1275 		if (ret == KERN_SUCCESS && action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1276 			goto complete;
1277 		}
1278 		if (ret == KERN_NOT_WAITING) {
1279 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL,
1280 			    TURNSTILE_IMMEDIATE_UPDATE);
1281 		}
1282 	} else {
1283 		ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event),
1284 		    result, WAITQ_UPDATE_INHERITOR);
1285 	}
1286 
1287 	/*
1288 	 * turnstile_update_inheritor_complete could be called while holding the interlock.
1289 	 * In this case the new inheritor or is null, or is a thread that is just been woken up
1290 	 * and have not blocked because it is racing with the same interlock used here
1291 	 * after the wait.
1292 	 * So there is no chain to update for the new inheritor.
1293 	 *
1294 	 * However unless the current thread is the old inheritor,
1295 	 * old inheritor can be blocked and requires a chain update.
1296 	 *
1297 	 * The chain should be short because kernel turnstiles cannot have user turnstiles
1298 	 * chained after them.
1299 	 *
1300 	 * We can anyway optimize this by asking turnstile to tell us
1301 	 * if old inheritor needs an update and drop the lock
1302 	 * just in that case.
1303 	 */
1304 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1305 
1306 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1307 
1308 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1309 
1310 complete:
1311 	turnstile_complete_hash((uintptr_t)event, type);
1312 
1313 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1314 
1315 	turnstile_cleanup();
1316 
1317 	return ret;
1318 }
1319 
1320 static wait_result_t
1321 sleep_with_inheritor_and_turnstile(
1322 	event_t                 event,
1323 	thread_t                inheritor,
1324 	wait_interrupt_t        interruptible,
1325 	uint64_t                deadline,
1326 	void                  (^primitive_lock)(void),
1327 	void                  (^primitive_unlock)(void))
1328 {
1329 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1330 	wait_result_t ret;
1331 	uint32_t index;
1332 	struct turnstile *ts = NULL;
1333 
1334 	/*
1335 	 * the hash bucket spinlock is used as turnstile interlock,
1336 	 * lock it before releasing the primitive lock
1337 	 */
1338 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1339 
1340 	primitive_unlock();
1341 
1342 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1343 
1344 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1345 	/*
1346 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1347 	 * waitq_assert_wait64 after.
1348 	 */
1349 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1350 
1351 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1352 
1353 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1354 
1355 	/*
1356 	 * Update new and old inheritor chains outside the interlock;
1357 	 */
1358 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1359 
1360 	if (ret == THREAD_WAITING) {
1361 		ret = thread_block(THREAD_CONTINUE_NULL);
1362 	}
1363 
1364 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1365 
1366 	turnstile_complete_hash((uintptr_t)event, type);
1367 
1368 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1369 
1370 	turnstile_cleanup();
1371 
1372 	primitive_lock();
1373 
1374 	return ret;
1375 }
1376 
1377 /*
1378  * change_sleep_inheritor is independent from the locking primitive.
1379  */
1380 
1381 /*
1382  * Name: change_sleep_inheritor
1383  *
1384  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1385  *
1386  * Args:
1387  *   Arg1: event to redirect the push.
1388  *   Arg2: new inheritor for event.
1389  *
1390  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1391  *
1392  * Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1393  *             wakeup for the event is called.
1394  *             NOTE: this cannot be called from interrupt context.
1395  */
1396 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1397 change_sleep_inheritor(event_t event, thread_t inheritor)
1398 {
1399 	uint32_t index;
1400 	struct turnstile *ts = NULL;
1401 	kern_return_t ret =  KERN_SUCCESS;
1402 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1403 
1404 	/*
1405 	 * the hash bucket spinlock is used as turnstile interlock
1406 	 */
1407 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1408 
1409 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1410 
1411 	if (!turnstile_has_waiters(ts)) {
1412 		ret = KERN_NOT_WAITING;
1413 	}
1414 
1415 	/*
1416 	 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1417 	 */
1418 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1419 
1420 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1421 
1422 	/*
1423 	 * update the chains outside the interlock
1424 	 */
1425 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1426 
1427 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1428 
1429 	turnstile_complete_hash((uintptr_t)event, type);
1430 
1431 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1432 
1433 	turnstile_cleanup();
1434 
1435 	return ret;
1436 }
1437 
1438 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1439 lck_spin_sleep_with_inheritor(
1440 	lck_spin_t *lock,
1441 	lck_sleep_action_t lck_sleep_action,
1442 	event_t event,
1443 	thread_t inheritor,
1444 	wait_interrupt_t interruptible,
1445 	uint64_t deadline)
1446 {
1447 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1448 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1449 		           interruptible, deadline,
1450 		           ^{}, ^{ lck_spin_unlock(lock); });
1451 	} else {
1452 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1453 		           interruptible, deadline,
1454 		           ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1455 	}
1456 }
1457 
1458 wait_result_t
hw_lck_ticket_sleep_with_inheritor(hw_lck_ticket_t * lock,lck_grp_t * grp __unused,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1459 hw_lck_ticket_sleep_with_inheritor(
1460 	hw_lck_ticket_t *lock,
1461 	lck_grp_t *grp __unused,
1462 	lck_sleep_action_t lck_sleep_action,
1463 	event_t event,
1464 	thread_t inheritor,
1465 	wait_interrupt_t interruptible,
1466 	uint64_t deadline)
1467 {
1468 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1469 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1470 		           interruptible, deadline,
1471 		           ^{}, ^{ hw_lck_ticket_unlock(lock); });
1472 	} else {
1473 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1474 		           interruptible, deadline,
1475 		           ^{ hw_lck_ticket_lock(lock, grp); }, ^{ hw_lck_ticket_unlock(lock); });
1476 	}
1477 }
1478 
1479 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1480 lck_ticket_sleep_with_inheritor(
1481 	lck_ticket_t *lock,
1482 	lck_grp_t *grp,
1483 	lck_sleep_action_t lck_sleep_action,
1484 	event_t event,
1485 	thread_t inheritor,
1486 	wait_interrupt_t interruptible,
1487 	uint64_t deadline)
1488 {
1489 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1490 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1491 		           interruptible, deadline,
1492 		           ^{}, ^{ lck_ticket_unlock(lock); });
1493 	} else {
1494 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1495 		           interruptible, deadline,
1496 		           ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1497 	}
1498 }
1499 
1500 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1501 lck_mtx_sleep_with_inheritor(
1502 	lck_mtx_t              *lock,
1503 	lck_sleep_action_t      lck_sleep_action,
1504 	event_t                 event,
1505 	thread_t                inheritor,
1506 	wait_interrupt_t        interruptible,
1507 	uint64_t                deadline)
1508 {
1509 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1510 
1511 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1512 		return sleep_with_inheritor_and_turnstile(event,
1513 		           inheritor,
1514 		           interruptible,
1515 		           deadline,
1516 		           ^{;},
1517 		           ^{lck_mtx_unlock(lock);});
1518 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1519 		return sleep_with_inheritor_and_turnstile(event,
1520 		           inheritor,
1521 		           interruptible,
1522 		           deadline,
1523 		           ^{lck_mtx_lock_spin(lock);},
1524 		           ^{lck_mtx_unlock(lock);});
1525 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1526 		return sleep_with_inheritor_and_turnstile(event,
1527 		           inheritor,
1528 		           interruptible,
1529 		           deadline,
1530 		           ^{lck_mtx_lock_spin_always(lock);},
1531 		           ^{lck_mtx_unlock(lock);});
1532 	} else {
1533 		return sleep_with_inheritor_and_turnstile(event,
1534 		           inheritor,
1535 		           interruptible,
1536 		           deadline,
1537 		           ^{lck_mtx_lock(lock);},
1538 		           ^{lck_mtx_unlock(lock);});
1539 	}
1540 }
1541 
1542 /*
1543  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1544  */
1545 
1546 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1547 lck_rw_sleep_with_inheritor(
1548 	lck_rw_t               *lock,
1549 	lck_sleep_action_t      lck_sleep_action,
1550 	event_t                 event,
1551 	thread_t                inheritor,
1552 	wait_interrupt_t        interruptible,
1553 	uint64_t                deadline)
1554 {
1555 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1556 
1557 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1558 
1559 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1560 		return sleep_with_inheritor_and_turnstile(event,
1561 		           inheritor,
1562 		           interruptible,
1563 		           deadline,
1564 		           ^{;},
1565 		           ^{lck_rw_type = lck_rw_done(lock);});
1566 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1567 		return sleep_with_inheritor_and_turnstile(event,
1568 		           inheritor,
1569 		           interruptible,
1570 		           deadline,
1571 		           ^{lck_rw_lock(lock, lck_rw_type);},
1572 		           ^{lck_rw_type = lck_rw_done(lock);});
1573 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1574 		return sleep_with_inheritor_and_turnstile(event,
1575 		           inheritor,
1576 		           interruptible,
1577 		           deadline,
1578 		           ^{lck_rw_lock_exclusive(lock);},
1579 		           ^{lck_rw_type = lck_rw_done(lock);});
1580 	} else {
1581 		return sleep_with_inheritor_and_turnstile(event,
1582 		           inheritor,
1583 		           interruptible,
1584 		           deadline,
1585 		           ^{lck_rw_lock_shared(lock);},
1586 		           ^{lck_rw_type = lck_rw_done(lock);});
1587 	}
1588 }
1589 
1590 /*
1591  * wakeup_with_inheritor functions are independent from the locking primitive.
1592  */
1593 
1594 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1595 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1596 {
1597 	return wakeup_with_inheritor_and_turnstile(event,
1598 	           result,
1599 	           TRUE,
1600 	           action,
1601 	           thread_wokenup);
1602 }
1603 
1604 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1605 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1606 {
1607 	return wakeup_with_inheritor_and_turnstile(event,
1608 	           result,
1609 	           FALSE,
1610 	           0,
1611 	           NULL);
1612 }
1613 
1614 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1615 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1616 {
1617 	assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1618 	assert(waitq_type(waitq) == WQT_TURNSTILE);
1619 	waitinfo->owner = 0;
1620 	waitinfo->context = 0;
1621 
1622 	if (waitq_held(waitq)) {
1623 		return;
1624 	}
1625 
1626 	struct turnstile *turnstile = waitq_to_turnstile(waitq);
1627 	assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1628 	waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1629 }
1630 
1631 static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1632 static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1633 static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1634 
1635 static wait_result_t
1636 cond_sleep_with_inheritor_and_turnstile_type(
1637 	cond_swi_var_t cond,
1638 	bool (^cond_sleep_check)(ctid_t*),
1639 	wait_interrupt_t interruptible,
1640 	uint64_t deadline,
1641 	turnstile_type_t type)
1642 {
1643 	wait_result_t ret;
1644 	uint32_t index;
1645 	struct turnstile *ts = NULL;
1646 	ctid_t ctid = 0;
1647 	thread_t inheritor;
1648 
1649 	/*
1650 	 * the hash bucket spinlock is used as turnstile interlock,
1651 	 * lock it before checking the sleep condition
1652 	 */
1653 	turnstile_hash_bucket_lock((uintptr_t)cond, &index, type);
1654 
1655 	/*
1656 	 * In case the sleep check succeeds, the block will
1657 	 * provide us the ctid observed on the variable.
1658 	 */
1659 	if (!cond_sleep_check(&ctid)) {
1660 		turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1661 		return THREAD_NOT_WAITING;
1662 	}
1663 
1664 	/*
1665 	 * We can translate the ctid to a thread_t only
1666 	 * if cond_sleep_check succeded.
1667 	 */
1668 	inheritor = ctid_get_thread(ctid);
1669 	assert(inheritor != NULL);
1670 
1671 	ts = turnstile_prepare_hash((uintptr_t)cond, type);
1672 
1673 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1674 	/*
1675 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1676 	 * waitq_assert_wait64 after.
1677 	 */
1678 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1679 
1680 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1681 
1682 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1683 
1684 	/*
1685 	 * Update new and old inheritor chains outside the interlock;
1686 	 */
1687 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1688 	if (ret == THREAD_WAITING) {
1689 		ret = thread_block(THREAD_CONTINUE_NULL);
1690 	}
1691 
1692 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1693 
1694 	turnstile_complete_hash((uintptr_t)cond, type);
1695 
1696 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1697 
1698 	turnstile_cleanup();
1699 	return ret;
1700 }
1701 
1702 /*
1703  * Name: cond_sleep_with_inheritor32_mask
1704  *
1705  * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1706  *              Allows a thread to conditionally sleep while indicating which thread should
1707  *              inherit the priority push associated with the condition.
1708  *              The condition should be expressed through a cond_swi_var32_s pointer.
1709  *              The condition needs to be populated by the caller with the ctid of the
1710  *              thread that should inherit the push. The remaining bits of the condition
1711  *              can be used by the caller to implement its own synchronization logic.
1712  *              A copy of the condition value observed by the caller when it decided to call
1713  *              this function should be provided to prevent races with matching wakeups.
1714  *              This function will atomically check the value stored in the condition against
1715  *              the expected/observed one provided only for the bits that are set in the mask.
1716  *              If the check doesn't pass the thread will not sleep and the function will return.
1717  *              The ctid provided in the condition will be used only after a successful
1718  *              check.
1719  *
1720  * Args:
1721  *   Arg1: cond_swi_var32_s pointer that stores the condition to check.
1722  *   Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1723  *   Arg3: mask to apply to the condition to check.
1724  *   Arg4: interruptible flag for wait.
1725  *   Arg5: deadline for wait.
1726  *
1727  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1728  *             wakeup for the cond is called.
1729  *
1730  * Returns: result of the wait.
1731  */
1732 static wait_result_t
cond_sleep_with_inheritor32_mask(cond_swi_var_t cond,cond_swi_var32_s expected_cond,uint32_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1733 cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1734 {
1735 	bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1736 		cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1737 		bool ret;
1738 		if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1739 			ret = true;
1740 			*ctid = cond_val.cond32_owner;
1741 		} else {
1742 			ret = false;
1743 		}
1744 		return ret;
1745 	};
1746 
1747 	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1748 }
1749 
1750 /*
1751  * Name: cond_sleep_with_inheritor64_mask
1752  *
1753  * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1754  *              Allows a thread to conditionally sleep while indicating which thread should
1755  *              inherit the priority push associated with the condition.
1756  *              The condition should be expressed through a cond_swi_var64_s pointer.
1757  *              The condition needs to be populated by the caller with the ctid of the
1758  *              thread that should inherit the push. The remaining bits of the condition
1759  *              can be used by the caller to implement its own synchronization logic.
1760  *              A copy of the condition value observed by the caller when it decided to call
1761  *              this function should be provided to prevent races with matching wakeups.
1762  *              This function will atomically check the value stored in the condition against
1763  *              the expected/observed one provided only for the bits that are set in the mask.
1764  *              If the check doesn't pass the thread will not sleep and the function will return.
1765  *              The ctid provided in the condition will be used only after a successful
1766  *              check.
1767  *
1768  * Args:
1769  *   Arg1: cond_swi_var64_s pointer that stores the condition to check.
1770  *   Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1771  *   Arg3: mask to apply to the condition to check.
1772  *   Arg4: interruptible flag for wait.
1773  *   Arg5: deadline for wait.
1774  *
1775  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1776  *             wakeup for the cond is called.
1777  *
1778  * Returns: result of the wait.
1779  */
1780 wait_result_t
cond_sleep_with_inheritor64_mask(cond_swi_var_t cond,cond_swi_var64_s expected_cond,uint64_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1781 cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1782 {
1783 	bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1784 		cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1785 		bool ret;
1786 		if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1787 			ret = true;
1788 			*ctid = cond_val.cond64_owner;
1789 		} else {
1790 			ret = false;
1791 		}
1792 		return ret;
1793 	};
1794 
1795 	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1796 }
1797 
1798 /*
1799  * Name: cond_sleep_with_inheritor32
1800  *
1801  * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1802  *              Allows a thread to conditionally sleep while indicating which thread should
1803  *              inherit the priority push associated with the condition.
1804  *              The condition should be expressed through a cond_swi_var32_s pointer.
1805  *              The condition needs to be populated by the caller with the ctid of the
1806  *              thread that should inherit the push. The remaining bits of the condition
1807  *              can be used by the caller to implement its own synchronization logic.
1808  *              A copy of the condition value observed by the caller when it decided to call
1809  *              this function should be provided to prevent races with matching wakeups.
1810  *              This function will atomically check the value stored in the condition against
1811  *              the expected/observed one provided. If the check doesn't pass the thread will not
1812  *              sleep and the function will return.
1813  *              The ctid provided in the condition will be used only after a successful
1814  *              check.
1815  *
1816  * Args:
1817  *   Arg1: cond_swi_var32_s pointer that stores the condition to check.
1818  *   Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1819  *   Arg3: interruptible flag for wait.
1820  *   Arg4: deadline for wait.
1821  *
1822  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1823  *             wakeup for the cond is called.
1824  *
1825  * Returns: result of the wait.
1826  */
1827 wait_result_t
cond_sleep_with_inheritor32(cond_swi_var_t cond,cond_swi_var32_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1828 cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1829 {
1830 	return cond_sleep_with_inheritor32_mask(cond, expected_cond, ~0u, interruptible, deadline);
1831 }
1832 
1833 /*
1834  * Name: cond_sleep_with_inheritor64
1835  *
1836  * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1837  *              Allows a thread to conditionally sleep while indicating which thread should
1838  *              inherit the priority push associated with the condition.
1839  *              The condition should be expressed through a cond_swi_var64_s pointer.
1840  *              The condition needs to be populated by the caller with the ctid of the
1841  *              thread that should inherit the push. The remaining bits of the condition
1842  *              can be used by the caller to implement its own synchronization logic.
1843  *              A copy of the condition value observed by the caller when it decided to call
1844  *              this function should be provided to prevent races with matching wakeups.
1845  *              This function will atomically check the value stored in the condition against
1846  *              the expected/observed one provided. If the check doesn't pass the thread will not
1847  *              sleep and the function will return.
1848  *              The ctid provided in the condition will be used only after a successful
1849  *              check.
1850  *
1851  * Args:
1852  *   Arg1: cond_swi_var64_s pointer that stores the condition to check.
1853  *   Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1854  *   Arg3: interruptible flag for wait.
1855  *   Arg4: deadline for wait.
1856  *
1857  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1858  *             wakeup for the cond is called.
1859  *
1860  * Returns: result of the wait.
1861  */
1862 wait_result_t
cond_sleep_with_inheritor64(cond_swi_var_t cond,cond_swi_var64_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1863 cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1864 {
1865 	return cond_sleep_with_inheritor64_mask(cond, expected_cond, ~0ull, interruptible, deadline);
1866 }
1867 
1868 /*
1869  * Name: cond_wakeup_one_with_inheritor
1870  *
1871  * Description: Wake up one waiter waiting on the condition (if any).
1872  *              The thread woken up will be the one with the higher sched priority waiting on the condition.
1873  *              The push for the condition will be transferred from the last inheritor to the woken up thread.
1874  *
1875  * Args:
1876  *   Arg1: condition to wake from.
1877  *   Arg2: wait result to pass to the woken up thread.
1878  *   Arg3: pointer for storing the thread wokenup.
1879  *
1880  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1881  *
1882  * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1883  *             condition or a wakeup for the event is called.
1884  *             A reference for the wokenup thread is acquired.
1885  *             NOTE: this cannot be called from interrupt context.
1886  */
1887 kern_return_t
cond_wakeup_one_with_inheritor(cond_swi_var_t cond,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1888 cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1889 {
1890 	return wakeup_with_inheritor_and_turnstile((event_t)cond,
1891 	           result,
1892 	           TRUE,
1893 	           action,
1894 	           thread_wokenup);
1895 }
1896 
1897 /*
1898  * Name: cond_wakeup_all_with_inheritor
1899  *
1900  * Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1901  *
1902  * Args:
1903  *   Arg1: condition to wake from.
1904  *   Arg2: wait result to pass to the woken up threads.
1905  *
1906  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1907  *
1908  * Conditions: NOTE: this cannot be called from interrupt context.
1909  */
1910 kern_return_t
cond_wakeup_all_with_inheritor(cond_swi_var_t cond,wait_result_t result)1911 cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1912 {
1913 	return wakeup_with_inheritor_and_turnstile((event_t)cond,
1914 	           result,
1915 	           FALSE,
1916 	           0,
1917 	           NULL);
1918 }
1919 
1920 
1921 #pragma mark - gates
1922 
1923 #define GATE_TYPE        3
1924 #define GATE_ILOCK_BIT   0
1925 #define GATE_WAITERS_BIT 1
1926 
1927 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1928 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1929 
1930 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1931 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1932 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1933 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1934 #define ordered_store_gate(gate, value)  os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1935 
1936 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1937 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1938 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1939 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1940 
1941 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1942 
1943 #define GATE_EVENT(gate)     ((event_t) gate)
1944 #define EVENT_TO_GATE(event) ((gate_t *) event)
1945 
1946 typedef void (*void_func_void)(void);
1947 
1948 __abortlike
1949 static void
gate_verify_tag_panic(gate_t * gate)1950 gate_verify_tag_panic(gate_t *gate)
1951 {
1952 	panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1953 }
1954 
1955 __abortlike
1956 static void
gate_verify_destroy_panic(gate_t * gate)1957 gate_verify_destroy_panic(gate_t *gate)
1958 {
1959 	panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1960 }
1961 
1962 static void
gate_verify(gate_t * gate)1963 gate_verify(gate_t *gate)
1964 {
1965 	if (gate->gt_type != GATE_TYPE) {
1966 		gate_verify_tag_panic(gate);
1967 	}
1968 	if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
1969 		gate_verify_destroy_panic(gate);
1970 	}
1971 
1972 	assert(gate->gt_refs > 0);
1973 }
1974 
1975 __abortlike
1976 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)1977 gate_already_owned_panic(gate_t *gate, thread_t holder)
1978 {
1979 	panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
1980 }
1981 
1982 static kern_return_t
gate_try_close(gate_t * gate)1983 gate_try_close(gate_t *gate)
1984 {
1985 	uintptr_t state;
1986 	thread_t holder;
1987 	kern_return_t ret;
1988 	thread_t thread = current_thread();
1989 
1990 	gate_verify(gate);
1991 
1992 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
1993 		return KERN_SUCCESS;
1994 	}
1995 
1996 	gate_ilock(gate);
1997 	state = ordered_load_gate(gate);
1998 	holder = GATE_STATE_TO_THREAD(state);
1999 
2000 	if (holder == NULL) {
2001 		assert(gate_has_waiter_bit(state) == FALSE);
2002 
2003 		state = GATE_THREAD_TO_STATE(current_thread());
2004 		state |= GATE_ILOCK;
2005 		ordered_store_gate(gate, state);
2006 		ret = KERN_SUCCESS;
2007 	} else {
2008 		if (holder == current_thread()) {
2009 			gate_already_owned_panic(gate, holder);
2010 		}
2011 		ret = KERN_FAILURE;
2012 	}
2013 
2014 	gate_iunlock(gate);
2015 	return ret;
2016 }
2017 
2018 static void
gate_close(gate_t * gate)2019 gate_close(gate_t* gate)
2020 {
2021 	uintptr_t state;
2022 	thread_t holder;
2023 	thread_t thread = current_thread();
2024 
2025 	gate_verify(gate);
2026 
2027 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2028 		return;
2029 	}
2030 
2031 	gate_ilock(gate);
2032 	state = ordered_load_gate(gate);
2033 	holder = GATE_STATE_TO_THREAD(state);
2034 
2035 	if (holder != NULL) {
2036 		gate_already_owned_panic(gate, holder);
2037 	}
2038 
2039 	assert(gate_has_waiter_bit(state) == FALSE);
2040 
2041 	state = GATE_THREAD_TO_STATE(thread);
2042 	state |= GATE_ILOCK;
2043 	ordered_store_gate(gate, state);
2044 
2045 	gate_iunlock(gate);
2046 }
2047 
2048 static void
gate_open_turnstile(gate_t * gate)2049 gate_open_turnstile(gate_t *gate)
2050 {
2051 	struct turnstile *ts = NULL;
2052 
2053 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile,
2054 	    TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2055 	waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2056 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2057 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2058 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2059 	/*
2060 	 * We can do the cleanup while holding the interlock.
2061 	 * It is ok because:
2062 	 * 1. current_thread is the previous inheritor and it is running
2063 	 * 2. new inheritor is NULL.
2064 	 * => No chain of turnstiles needs to be updated.
2065 	 */
2066 	turnstile_cleanup();
2067 }
2068 
2069 __abortlike
2070 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2071 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2072 {
2073 	if (open) {
2074 		panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2075 	} else {
2076 		panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2077 	}
2078 }
2079 
2080 static void
gate_open(gate_t * gate)2081 gate_open(gate_t *gate)
2082 {
2083 	uintptr_t state;
2084 	thread_t holder;
2085 	bool waiters;
2086 	thread_t thread = current_thread();
2087 
2088 	gate_verify(gate);
2089 	if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2090 		return;
2091 	}
2092 
2093 	gate_ilock(gate);
2094 	state = ordered_load_gate(gate);
2095 	holder = GATE_STATE_TO_THREAD(state);
2096 	waiters = gate_has_waiter_bit(state);
2097 
2098 	if (holder != thread) {
2099 		gate_not_owned_panic(gate, holder, true);
2100 	}
2101 
2102 	if (waiters) {
2103 		gate_open_turnstile(gate);
2104 	}
2105 
2106 	state = GATE_ILOCK;
2107 	ordered_store_gate(gate, state);
2108 
2109 	gate_iunlock(gate);
2110 }
2111 
2112 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2113 gate_handoff_turnstile(gate_t *gate,
2114     int flags,
2115     thread_t *thread_woken_up,
2116     bool *waiters)
2117 {
2118 	struct turnstile *ts = NULL;
2119 	kern_return_t ret = KERN_FAILURE;
2120 	thread_t hp_thread;
2121 
2122 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2123 	/*
2124 	 * Wake up the higest priority thread waiting on the gate
2125 	 */
2126 	hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2127 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2128 
2129 	if (hp_thread != NULL) {
2130 		/*
2131 		 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2132 		 */
2133 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2134 		*thread_woken_up = hp_thread;
2135 		*waiters = turnstile_has_waiters(ts);
2136 		/*
2137 		 * Note: hp_thread is the new holder and the new inheritor.
2138 		 * In case there are no more waiters, it doesn't need to be the inheritor
2139 		 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2140 		 * handoff can go through the fast path.
2141 		 * We could set the inheritor to NULL here, or the new holder itself can set it
2142 		 * on its way back from the sleep. In the latter case there are more chanses that
2143 		 * new waiters will come by, avoiding to do the opearation at all.
2144 		 */
2145 		ret = KERN_SUCCESS;
2146 	} else {
2147 		/*
2148 		 * waiters can have been woken up by an interrupt and still not
2149 		 * have updated gate->waiters, so we couldn't find them on the waitq.
2150 		 * Update the inheritor to NULL here, so that the current thread can return to userspace
2151 		 * indipendently from when the interrupted waiters will finish the wait.
2152 		 */
2153 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2154 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2155 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2156 		}
2157 		// there are no waiters.
2158 		ret = KERN_NOT_WAITING;
2159 	}
2160 
2161 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2162 
2163 	/*
2164 	 * We can do the cleanup while holding the interlock.
2165 	 * It is ok because:
2166 	 * 1. current_thread is the previous inheritor and it is running
2167 	 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2168 	 *    of the gate before trying to sleep.
2169 	 * => No chain of turnstiles needs to be updated.
2170 	 */
2171 	turnstile_cleanup();
2172 
2173 	return ret;
2174 }
2175 
2176 static kern_return_t
gate_handoff(gate_t * gate,int flags)2177 gate_handoff(gate_t *gate,
2178     int flags)
2179 {
2180 	kern_return_t ret;
2181 	thread_t new_holder = NULL;
2182 	uintptr_t state;
2183 	thread_t holder;
2184 	bool waiters;
2185 	thread_t thread = current_thread();
2186 
2187 	assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2188 	gate_verify(gate);
2189 
2190 	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2191 		if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2192 			//gate opened but there were no waiters, so return KERN_NOT_WAITING.
2193 			return KERN_NOT_WAITING;
2194 		}
2195 	}
2196 
2197 	gate_ilock(gate);
2198 	state = ordered_load_gate(gate);
2199 	holder = GATE_STATE_TO_THREAD(state);
2200 	waiters = gate_has_waiter_bit(state);
2201 
2202 	if (holder != current_thread()) {
2203 		gate_not_owned_panic(gate, holder, false);
2204 	}
2205 
2206 	if (waiters) {
2207 		ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2208 		if (ret == KERN_SUCCESS) {
2209 			state = GATE_THREAD_TO_STATE(new_holder);
2210 			if (waiters) {
2211 				state |= GATE_WAITERS;
2212 			}
2213 		} else {
2214 			if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2215 				state = 0;
2216 			}
2217 		}
2218 	} else {
2219 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2220 			state = 0;
2221 		}
2222 		ret = KERN_NOT_WAITING;
2223 	}
2224 	state |= GATE_ILOCK;
2225 	ordered_store_gate(gate, state);
2226 
2227 	gate_iunlock(gate);
2228 
2229 	if (new_holder) {
2230 		thread_deallocate(new_holder);
2231 	}
2232 	return ret;
2233 }
2234 
2235 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2236 gate_steal_turnstile(gate_t *gate,
2237     thread_t new_inheritor)
2238 {
2239 	struct turnstile *ts = NULL;
2240 
2241 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2242 
2243 	turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2244 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2245 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2246 
2247 	/*
2248 	 * turnstile_cleanup might need to update the chain of the old holder.
2249 	 * This operation should happen without the turnstile interlock held.
2250 	 */
2251 	return turnstile_cleanup;
2252 }
2253 
2254 __abortlike
2255 static void
gate_not_closed_panic(gate_t * gate,bool wait)2256 gate_not_closed_panic(gate_t *gate, bool wait)
2257 {
2258 	if (wait) {
2259 		panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2260 	} else {
2261 		panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2262 	}
2263 }
2264 
2265 static void
gate_steal(gate_t * gate)2266 gate_steal(gate_t *gate)
2267 {
2268 	uintptr_t state;
2269 	thread_t holder;
2270 	thread_t thread = current_thread();
2271 	bool waiters;
2272 
2273 	void_func_void func_after_interlock_unlock;
2274 
2275 	gate_verify(gate);
2276 
2277 	gate_ilock(gate);
2278 	state = ordered_load_gate(gate);
2279 	holder = GATE_STATE_TO_THREAD(state);
2280 	waiters = gate_has_waiter_bit(state);
2281 
2282 	if (holder == NULL) {
2283 		gate_not_closed_panic(gate, false);
2284 	}
2285 
2286 	state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2287 	if (waiters) {
2288 		state |= GATE_WAITERS;
2289 		ordered_store_gate(gate, state);
2290 		func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2291 		gate_iunlock(gate);
2292 
2293 		func_after_interlock_unlock();
2294 	} else {
2295 		ordered_store_gate(gate, state);
2296 		gate_iunlock(gate);
2297 	}
2298 }
2299 
2300 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2301 gate_wait_turnstile(gate_t *gate,
2302     wait_interrupt_t interruptible,
2303     uint64_t deadline,
2304     thread_t holder,
2305     wait_result_t* wait,
2306     bool* waiters)
2307 {
2308 	struct turnstile *ts;
2309 	uintptr_t state;
2310 
2311 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2312 
2313 	turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2314 	waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2315 
2316 	gate_iunlock(gate);
2317 
2318 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2319 
2320 	*wait = thread_block(THREAD_CONTINUE_NULL);
2321 
2322 	gate_ilock(gate);
2323 
2324 	*waiters = turnstile_has_waiters(ts);
2325 
2326 	if (!*waiters) {
2327 		/*
2328 		 * We want to enable the fast path as soon as we see that there are no more waiters.
2329 		 * On the fast path the holder will not do any turnstile operations.
2330 		 * Set the inheritor as NULL here.
2331 		 *
2332 		 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2333 		 * already been set to NULL.
2334 		 */
2335 		state = ordered_load_gate(gate);
2336 		holder = GATE_STATE_TO_THREAD(state);
2337 		if (holder &&
2338 		    ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2339 		    holder == current_thread())) {     // thread was woken up and it is the new holder
2340 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2341 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2342 		}
2343 	}
2344 
2345 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2346 
2347 	/*
2348 	 * turnstile_cleanup might need to update the chain of the old holder.
2349 	 * This operation should happen without the turnstile primitive interlock held.
2350 	 */
2351 	return turnstile_cleanup;
2352 }
2353 
2354 static void
gate_free_internal(gate_t * gate)2355 gate_free_internal(gate_t *gate)
2356 {
2357 	zfree(KT_GATE, gate);
2358 }
2359 
2360 __abortlike
2361 static void
gate_too_many_refs_panic(gate_t * gate)2362 gate_too_many_refs_panic(gate_t *gate)
2363 {
2364 	panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2365 }
2366 
2367 static gate_wait_result_t
2368 gate_wait(gate_t* gate,
2369     wait_interrupt_t interruptible,
2370     uint64_t deadline,
2371     void (^primitive_unlock)(void),
2372     void (^primitive_lock)(void))
2373 {
2374 	gate_wait_result_t ret;
2375 	void_func_void func_after_interlock_unlock;
2376 	wait_result_t wait_result;
2377 	uintptr_t state;
2378 	thread_t holder;
2379 	bool waiters;
2380 
2381 	gate_verify(gate);
2382 
2383 	gate_ilock(gate);
2384 	state = ordered_load_gate(gate);
2385 	holder = GATE_STATE_TO_THREAD(state);
2386 
2387 	if (holder == NULL) {
2388 		gate_not_closed_panic(gate, true);
2389 	}
2390 
2391 	/*
2392 	 * Get a ref on the gate so it will not
2393 	 * be freed while we are coming back from the sleep.
2394 	 */
2395 	if (gate->gt_refs == UINT16_MAX) {
2396 		gate_too_many_refs_panic(gate);
2397 	}
2398 	gate->gt_refs++;
2399 	state |= GATE_WAITERS;
2400 	ordered_store_gate(gate, state);
2401 
2402 	/*
2403 	 * Release the primitive lock before any
2404 	 * turnstile operation. Turnstile
2405 	 * does not support a blocking primitive as
2406 	 * interlock.
2407 	 *
2408 	 * In this way, concurrent threads will be
2409 	 * able to acquire the primitive lock
2410 	 * but still will wait for me through the
2411 	 * gate interlock.
2412 	 */
2413 	primitive_unlock();
2414 
2415 	func_after_interlock_unlock = gate_wait_turnstile(    gate,
2416 	    interruptible,
2417 	    deadline,
2418 	    holder,
2419 	    &wait_result,
2420 	    &waiters);
2421 
2422 	state = ordered_load_gate(gate);
2423 	holder = GATE_STATE_TO_THREAD(state);
2424 
2425 	switch (wait_result) {
2426 	case THREAD_INTERRUPTED:
2427 	case THREAD_TIMED_OUT:
2428 		assert(holder != current_thread());
2429 
2430 		if (waiters) {
2431 			state |= GATE_WAITERS;
2432 		} else {
2433 			state &= ~GATE_WAITERS;
2434 		}
2435 		ordered_store_gate(gate, state);
2436 
2437 		if (wait_result == THREAD_INTERRUPTED) {
2438 			ret = GATE_INTERRUPTED;
2439 		} else {
2440 			ret = GATE_TIMED_OUT;
2441 		}
2442 		break;
2443 	default:
2444 		/*
2445 		 * Note it is possible that even if the gate was handed off to
2446 		 * me, someone called gate_steal() before I woke up.
2447 		 *
2448 		 * As well as it is possible that the gate was opened, but someone
2449 		 * closed it while I was waking up.
2450 		 *
2451 		 * In both cases we return GATE_OPENED, as the gate was opened to me
2452 		 * at one point, it is the caller responsibility to check again if
2453 		 * the gate is open.
2454 		 */
2455 		if (holder == current_thread()) {
2456 			ret = GATE_HANDOFF;
2457 		} else {
2458 			ret = GATE_OPENED;
2459 		}
2460 		break;
2461 	}
2462 
2463 	assert(gate->gt_refs > 0);
2464 	uint32_t ref = --gate->gt_refs;
2465 	bool to_free = gate->gt_alloc;
2466 	gate_iunlock(gate);
2467 
2468 	if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2469 		if (to_free == true) {
2470 			assert(!waiters);
2471 			if (ref == 0) {
2472 				gate_free_internal(gate);
2473 			}
2474 			ret = GATE_OPENED;
2475 		} else {
2476 			gate_verify_destroy_panic(gate);
2477 		}
2478 	}
2479 
2480 	/*
2481 	 * turnstile func that needs to be executed without
2482 	 * holding the primitive interlock
2483 	 */
2484 	func_after_interlock_unlock();
2485 
2486 	primitive_lock();
2487 
2488 	return ret;
2489 }
2490 
2491 static void
gate_assert(gate_t * gate,int flags)2492 gate_assert(gate_t *gate, int flags)
2493 {
2494 	uintptr_t state;
2495 	thread_t holder;
2496 
2497 	gate_verify(gate);
2498 
2499 	gate_ilock(gate);
2500 	state = ordered_load_gate(gate);
2501 	holder = GATE_STATE_TO_THREAD(state);
2502 
2503 	switch (flags) {
2504 	case GATE_ASSERT_CLOSED:
2505 		assert(holder != NULL);
2506 		break;
2507 	case GATE_ASSERT_OPEN:
2508 		assert(holder == NULL);
2509 		break;
2510 	case GATE_ASSERT_HELD:
2511 		assert(holder == current_thread());
2512 		break;
2513 	default:
2514 		panic("invalid %s flag %d", __func__, flags);
2515 	}
2516 
2517 	gate_iunlock(gate);
2518 }
2519 
2520 enum {
2521 	GT_INIT_DEFAULT = 0,
2522 	GT_INIT_ALLOC
2523 };
2524 
2525 static void
gate_init(gate_t * gate,uint type)2526 gate_init(gate_t *gate, uint type)
2527 {
2528 	bzero(gate, sizeof(gate_t));
2529 
2530 	gate->gt_data = 0;
2531 	gate->gt_turnstile = NULL;
2532 	gate->gt_refs = 1;
2533 	switch (type) {
2534 	case GT_INIT_ALLOC:
2535 		gate->gt_alloc = 1;
2536 		break;
2537 	default:
2538 		gate->gt_alloc = 0;
2539 		break;
2540 	}
2541 	gate->gt_type = GATE_TYPE;
2542 	gate->gt_flags_pad = 0;
2543 }
2544 
2545 static gate_t*
gate_alloc_init(void)2546 gate_alloc_init(void)
2547 {
2548 	gate_t *gate;
2549 	gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2550 	gate_init(gate, GT_INIT_ALLOC);
2551 	return gate;
2552 }
2553 
2554 __abortlike
2555 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2556 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2557 {
2558 	panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2559 }
2560 
2561 __abortlike
2562 static void
gate_destroy_waiter_panic(gate_t * gate)2563 gate_destroy_waiter_panic(gate_t *gate)
2564 {
2565 	panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2566 }
2567 
2568 static uint16_t
gate_destroy_internal(gate_t * gate)2569 gate_destroy_internal(gate_t *gate)
2570 {
2571 	uintptr_t state;
2572 	thread_t holder;
2573 	uint16_t ref;
2574 
2575 	gate_ilock(gate);
2576 	state = ordered_load_gate(gate);
2577 	holder = GATE_STATE_TO_THREAD(state);
2578 
2579 	/*
2580 	 * The gate must be open
2581 	 * and all the threads must
2582 	 * have been woken up by this time
2583 	 */
2584 	if (holder != NULL) {
2585 		gate_destroy_owned_panic(gate, holder);
2586 	}
2587 	if (gate_has_waiter_bit(state)) {
2588 		gate_destroy_waiter_panic(gate);
2589 	}
2590 
2591 	assert(gate->gt_refs > 0);
2592 
2593 	ref = --gate->gt_refs;
2594 
2595 	/*
2596 	 * Mark the gate as destroyed.
2597 	 * The interlock bit still need
2598 	 * to be available to let the
2599 	 * last wokenup threads to clear
2600 	 * the wait.
2601 	 */
2602 	state = GATE_DESTROYED;
2603 	state |= GATE_ILOCK;
2604 	ordered_store_gate(gate, state);
2605 	gate_iunlock(gate);
2606 	return ref;
2607 }
2608 
2609 __abortlike
2610 static void
gate_destroy_panic(gate_t * gate)2611 gate_destroy_panic(gate_t *gate)
2612 {
2613 	panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2614 }
2615 
2616 static void
gate_destroy(gate_t * gate)2617 gate_destroy(gate_t *gate)
2618 {
2619 	gate_verify(gate);
2620 	if (gate->gt_alloc == 1) {
2621 		gate_destroy_panic(gate);
2622 	}
2623 	gate_destroy_internal(gate);
2624 }
2625 
2626 __abortlike
2627 static void
gate_free_panic(gate_t * gate)2628 gate_free_panic(gate_t *gate)
2629 {
2630 	panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2631 }
2632 
2633 static void
gate_free(gate_t * gate)2634 gate_free(gate_t *gate)
2635 {
2636 	uint16_t ref;
2637 
2638 	gate_verify(gate);
2639 
2640 	if (gate->gt_alloc == 0) {
2641 		gate_free_panic(gate);
2642 	}
2643 
2644 	ref = gate_destroy_internal(gate);
2645 	/*
2646 	 * Some of the threads waiting on the gate
2647 	 * might still need to run after being woken up.
2648 	 * They will access the gate to cleanup the
2649 	 * state, so we cannot free it.
2650 	 * The last waiter will free the gate in this case.
2651 	 */
2652 	if (ref == 0) {
2653 		gate_free_internal(gate);
2654 	}
2655 }
2656 
2657 /*
2658  * Name: lck_rw_gate_init
2659  *
2660  * Description: initializes a variable declared with decl_lck_rw_gate_data.
2661  *
2662  * Args:
2663  *   Arg1: lck_rw_t lock used to protect the gate.
2664  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2665  */
2666 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2667 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2668 {
2669 	(void) lock;
2670 	gate_init(gate, GT_INIT_DEFAULT);
2671 }
2672 
2673 /*
2674  * Name: lck_rw_gate_alloc_init
2675  *
2676  * Description: allocates and initializes a gate_t.
2677  *
2678  * Args:
2679  *   Arg1: lck_rw_t lock used to protect the gate.
2680  *
2681  * Returns:
2682  *         gate_t allocated.
2683  */
2684 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2685 lck_rw_gate_alloc_init(lck_rw_t *lock)
2686 {
2687 	(void) lock;
2688 	return gate_alloc_init();
2689 }
2690 
2691 /*
2692  * Name: lck_rw_gate_destroy
2693  *
2694  * Description: destroys a variable previously initialized
2695  *              with lck_rw_gate_init().
2696  *
2697  * Args:
2698  *   Arg1: lck_rw_t lock used to protect the gate.
2699  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2700  */
2701 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2702 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2703 {
2704 	(void) lock;
2705 	gate_destroy(gate);
2706 }
2707 
2708 /*
2709  * Name: lck_rw_gate_free
2710  *
2711  * Description: destroys and tries to free a gate previously allocated
2712  *              with lck_rw_gate_alloc_init().
2713  *              The gate free might be delegated to the last thread returning
2714  *              from the gate_wait().
2715  *
2716  * Args:
2717  *   Arg1: lck_rw_t lock used to protect the gate.
2718  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2719  */
2720 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2721 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2722 {
2723 	(void) lock;
2724 	gate_free(gate);
2725 }
2726 
2727 /*
2728  * Name: lck_rw_gate_try_close
2729  *
2730  * Description: Tries to close the gate.
2731  *              In case of success the current thread will be set as
2732  *              the holder of the gate.
2733  *
2734  * Args:
2735  *   Arg1: lck_rw_t lock used to protect the gate.
2736  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2737  *
2738  * Conditions: Lock must be held. Returns with the lock held.
2739  *
2740  * Returns:
2741  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2742  *          of the gate.
2743  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2744  *          to wake up possible waiters on the gate before returning to userspace.
2745  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2746  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2747  *
2748  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2749  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2750  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2751  *          be done without dropping the lock that is protecting the gate in between.
2752  */
2753 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2754 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2755 {
2756 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2757 
2758 	return gate_try_close(gate);
2759 }
2760 
2761 /*
2762  * Name: lck_rw_gate_close
2763  *
2764  * Description: Closes the gate. The current thread will be set as
2765  *              the holder of the gate. Will panic if the gate is already closed.
2766  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2767  *              to wake up possible waiters on the gate before returning to userspace.
2768  *
2769  * Args:
2770  *   Arg1: lck_rw_t lock used to protect the gate.
2771  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2772  *
2773  * Conditions: Lock must be held. Returns with the lock held.
2774  *             The gate must be open.
2775  *
2776  */
2777 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2778 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2779 {
2780 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2781 
2782 	return gate_close(gate);
2783 }
2784 
2785 /*
2786  * Name: lck_rw_gate_open
2787  *
2788  * Description: Opens the gate and wakes up possible waiters.
2789  *
2790  * Args:
2791  *   Arg1: lck_rw_t lock used to protect the gate.
2792  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2793  *
2794  * Conditions: Lock must be held. Returns with the lock held.
2795  *             The current thread must be the holder of the gate.
2796  *
2797  */
2798 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2799 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2800 {
2801 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2802 
2803 	gate_open(gate);
2804 }
2805 
2806 /*
2807  * Name: lck_rw_gate_handoff
2808  *
2809  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2810  *              priority will be selected as the new holder of the gate, and woken up,
2811  *              with the gate remaining in the closed state throughout.
2812  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2813  *              will be returned.
2814  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2815  *              case no waiters were found.
2816  *
2817  *
2818  * Args:
2819  *   Arg1: lck_rw_t lock used to protect the gate.
2820  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2821  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2822  *
2823  * Conditions: Lock must be held. Returns with the lock held.
2824  *             The current thread must be the holder of the gate.
2825  *
2826  * Returns:
2827  *          KERN_SUCCESS in case one of the waiters became the new holder.
2828  *          KERN_NOT_WAITING in case there were no waiters.
2829  *
2830  */
2831 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2832 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2833 {
2834 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2835 
2836 	return gate_handoff(gate, flags);
2837 }
2838 
2839 /*
2840  * Name: lck_rw_gate_steal
2841  *
2842  * Description: Set the current ownership of the gate. It sets the current thread as the
2843  *              new holder of the gate.
2844  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2845  *              to wake up possible waiters on the gate before returning to userspace.
2846  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2847  *              anymore.
2848  *
2849  *
2850  * Args:
2851  *   Arg1: lck_rw_t lock used to protect the gate.
2852  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2853  *
2854  * Conditions: Lock must be held. Returns with the lock held.
2855  *             The gate must be closed and the current thread must not already be the holder.
2856  *
2857  */
2858 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2859 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2860 {
2861 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2862 
2863 	gate_steal(gate);
2864 }
2865 
2866 /*
2867  * Name: lck_rw_gate_wait
2868  *
2869  * Description: Waits for the current thread to become the holder of the gate or for the
2870  *              gate to become open. An interruptible mode and deadline can be specified
2871  *              to return earlier from the wait.
2872  *
2873  * Args:
2874  *   Arg1: lck_rw_t lock used to protect the gate.
2875  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2876  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2877  *   Arg3: interruptible flag for wait.
2878  *   Arg4: deadline
2879  *
2880  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2881  *             Lock will be dropped while waiting.
2882  *             The gate must be closed.
2883  *
2884  * Returns: Reason why the thread was woken up.
2885  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2886  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2887  *                         to wake up possible waiters on the gate before returning to userspace.
2888  *          GATE_OPENED - the gate was opened by the holder.
2889  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
2890  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
2891  */
2892 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2893 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2894 {
2895 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2896 
2897 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2898 
2899 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2900 		return gate_wait(gate,
2901 		           interruptible,
2902 		           deadline,
2903 		           ^{lck_rw_type = lck_rw_done(lock);},
2904 		           ^{;});
2905 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2906 		return gate_wait(gate,
2907 		           interruptible,
2908 		           deadline,
2909 		           ^{lck_rw_type = lck_rw_done(lock);},
2910 		           ^{lck_rw_lock(lock, lck_rw_type);});
2911 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2912 		return gate_wait(gate,
2913 		           interruptible,
2914 		           deadline,
2915 		           ^{lck_rw_type = lck_rw_done(lock);},
2916 		           ^{lck_rw_lock_exclusive(lock);});
2917 	} else {
2918 		return gate_wait(gate,
2919 		           interruptible,
2920 		           deadline,
2921 		           ^{lck_rw_type = lck_rw_done(lock);},
2922 		           ^{lck_rw_lock_shared(lock);});
2923 	}
2924 }
2925 
2926 /*
2927  * Name: lck_rw_gate_assert
2928  *
2929  * Description: asserts that the gate is in the specified state.
2930  *
2931  * Args:
2932  *   Arg1: lck_rw_t lock used to protect the gate.
2933  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2934  *   Arg3: flags to specified assert type.
2935  *         GATE_ASSERT_CLOSED - the gate is currently closed
2936  *         GATE_ASSERT_OPEN - the gate is currently opened
2937  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2938  */
2939 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2940 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2941 {
2942 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2943 
2944 	gate_assert(gate, flags);
2945 	return;
2946 }
2947 
2948 /*
2949  * Name: lck_mtx_gate_init
2950  *
2951  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2952  *
2953  * Args:
2954  *   Arg1: lck_mtx_t lock used to protect the gate.
2955  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2956  */
2957 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)2958 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2959 {
2960 	(void) lock;
2961 	gate_init(gate, GT_INIT_DEFAULT);
2962 }
2963 
2964 /*
2965  * Name: lck_mtx_gate_alloc_init
2966  *
2967  * Description: allocates and initializes a gate_t.
2968  *
2969  * Args:
2970  *   Arg1: lck_mtx_t lock used to protect the gate.
2971  *
2972  * Returns:
2973  *         gate_t allocated.
2974  */
2975 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)2976 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
2977 {
2978 	(void) lock;
2979 	return gate_alloc_init();
2980 }
2981 
2982 /*
2983  * Name: lck_mtx_gate_destroy
2984  *
2985  * Description: destroys a variable previously initialized
2986  *              with lck_mtx_gate_init().
2987  *
2988  * Args:
2989  *   Arg1: lck_mtx_t lock used to protect the gate.
2990  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2991  */
2992 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)2993 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
2994 {
2995 	(void) lock;
2996 	gate_destroy(gate);
2997 }
2998 
2999 /*
3000  * Name: lck_mtx_gate_free
3001  *
3002  * Description: destroys and tries to free a gate previously allocated
3003  *	        with lck_mtx_gate_alloc_init().
3004  *              The gate free might be delegated to the last thread returning
3005  *              from the gate_wait().
3006  *
3007  * Args:
3008  *   Arg1: lck_mtx_t lock used to protect the gate.
3009  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3010  */
3011 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3012 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3013 {
3014 	(void) lock;
3015 	gate_free(gate);
3016 }
3017 
3018 /*
3019  * Name: lck_mtx_gate_try_close
3020  *
3021  * Description: Tries to close the gate.
3022  *              In case of success the current thread will be set as
3023  *              the holder of the gate.
3024  *
3025  * Args:
3026  *   Arg1: lck_mtx_t lock used to protect the gate.
3027  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3028  *
3029  * Conditions: Lock must be held. Returns with the lock held.
3030  *
3031  * Returns:
3032  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3033  *          of the gate.
3034  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3035  *          to wake up possible waiters on the gate before returning to userspace.
3036  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3037  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3038  *
3039  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3040  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3041  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3042  *          be done without dropping the lock that is protecting the gate in between.
3043  */
3044 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3045 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3046 {
3047 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3048 
3049 	return gate_try_close(gate);
3050 }
3051 
3052 /*
3053  * Name: lck_mtx_gate_close
3054  *
3055  * Description: Closes the gate. The current thread will be set as
3056  *              the holder of the gate. Will panic if the gate is already closed.
3057  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3058  *              to wake up possible waiters on the gate before returning to userspace.
3059  *
3060  * Args:
3061  *   Arg1: lck_mtx_t lock used to protect the gate.
3062  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3063  *
3064  * Conditions: Lock must be held. Returns with the lock held.
3065  *             The gate must be open.
3066  *
3067  */
3068 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3069 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3070 {
3071 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3072 
3073 	return gate_close(gate);
3074 }
3075 
3076 /*
3077  * Name: lck_mtx_gate_open
3078  *
3079  * Description: Opens of the gate and wakes up possible waiters.
3080  *
3081  * Args:
3082  *   Arg1: lck_mtx_t lock used to protect the gate.
3083  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3084  *
3085  * Conditions: Lock must be held. Returns with the lock held.
3086  *             The current thread must be the holder of the gate.
3087  *
3088  */
3089 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3090 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3091 {
3092 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3093 
3094 	gate_open(gate);
3095 }
3096 
3097 /*
3098  * Name: lck_mtx_gate_handoff
3099  *
3100  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3101  *              priority will be selected as the new holder of the gate, and woken up,
3102  *              with the gate remaining in the closed state throughout.
3103  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3104  *              will be returned.
3105  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3106  *              case no waiters were found.
3107  *
3108  *
3109  * Args:
3110  *   Arg1: lck_mtx_t lock used to protect the gate.
3111  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3112  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3113  *
3114  * Conditions: Lock must be held. Returns with the lock held.
3115  *             The current thread must be the holder of the gate.
3116  *
3117  * Returns:
3118  *          KERN_SUCCESS in case one of the waiters became the new holder.
3119  *          KERN_NOT_WAITING in case there were no waiters.
3120  *
3121  */
3122 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3123 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3124 {
3125 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3126 
3127 	return gate_handoff(gate, flags);
3128 }
3129 
3130 /*
3131  * Name: lck_mtx_gate_steal
3132  *
3133  * Description: Steals the ownership of the gate. It sets the current thread as the
3134  *              new holder of the gate.
3135  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3136  *              to wake up possible waiters on the gate before returning to userspace.
3137  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3138  *              anymore.
3139  *
3140  *
3141  * Args:
3142  *   Arg1: lck_mtx_t lock used to protect the gate.
3143  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3144  *
3145  * Conditions: Lock must be held. Returns with the lock held.
3146  *             The gate must be closed and the current thread must not already be the holder.
3147  *
3148  */
3149 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3150 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3151 {
3152 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3153 
3154 	gate_steal(gate);
3155 }
3156 
3157 /*
3158  * Name: lck_mtx_gate_wait
3159  *
3160  * Description: Waits for the current thread to become the holder of the gate or for the
3161  *              gate to become open. An interruptible mode and deadline can be specified
3162  *              to return earlier from the wait.
3163  *
3164  * Args:
3165  *   Arg1: lck_mtx_t lock used to protect the gate.
3166  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3167  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3168  *   Arg3: interruptible flag for wait.
3169  *   Arg4: deadline
3170  *
3171  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3172  *             Lock will be dropped while waiting.
3173  *             The gate must be closed.
3174  *
3175  * Returns: Reason why the thread was woken up.
3176  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3177  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3178  *                         to wake up possible waiters on the gate before returning to userspace.
3179  *          GATE_OPENED - the gate was opened by the holder.
3180  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3181  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3182  */
3183 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3184 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3185 {
3186 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3187 
3188 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3189 		return gate_wait(gate,
3190 		           interruptible,
3191 		           deadline,
3192 		           ^{lck_mtx_unlock(lock);},
3193 		           ^{;});
3194 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3195 		return gate_wait(gate,
3196 		           interruptible,
3197 		           deadline,
3198 		           ^{lck_mtx_unlock(lock);},
3199 		           ^{lck_mtx_lock_spin(lock);});
3200 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3201 		return gate_wait(gate,
3202 		           interruptible,
3203 		           deadline,
3204 		           ^{lck_mtx_unlock(lock);},
3205 		           ^{lck_mtx_lock_spin_always(lock);});
3206 	} else {
3207 		return gate_wait(gate,
3208 		           interruptible,
3209 		           deadline,
3210 		           ^{lck_mtx_unlock(lock);},
3211 		           ^{lck_mtx_lock(lock);});
3212 	}
3213 }
3214 
3215 /*
3216  * Name: lck_mtx_gate_assert
3217  *
3218  * Description: asserts that the gate is in the specified state.
3219  *
3220  * Args:
3221  *   Arg1: lck_mtx_t lock used to protect the gate.
3222  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3223  *   Arg3: flags to specified assert type.
3224  *         GATE_ASSERT_CLOSED - the gate is currently closed
3225  *         GATE_ASSERT_OPEN - the gate is currently opened
3226  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3227  */
3228 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3229 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3230 {
3231 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3232 
3233 	gate_assert(gate, flags);
3234 }
3235 
3236 #pragma mark - LCK_*_DECLARE support
3237 
3238 __startup_func
3239 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3240 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3241 {
3242 	lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3243 }
3244 
3245 __startup_func
3246 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3247 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3248 {
3249 	lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3250 }
3251 
3252 __startup_func
3253 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3254 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3255 {
3256 	lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3257 }
3258 
3259 __startup_func
3260 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3261 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3262 {
3263 	simple_lock_init(sp->lck, sp->lck_init_arg);
3264 }
3265 
3266 __startup_func
3267 void
lck_ticket_startup_init(struct lck_ticket_startup_spec * sp)3268 lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3269 {
3270 	lck_ticket_init(sp->lck, sp->lck_grp);
3271 }
3272