xref: /xnu-8796.101.5/osfmk/kern/locks.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 
57 #define LOCK_PRIVATE 1
58 
59 #include <mach_ldebug.h>
60 #include <debug.h>
61 
62 #include <mach/kern_return.h>
63 
64 #include <kern/locks_internal.h>
65 #include <kern/lock_stat.h>
66 #include <kern/locks.h>
67 #include <kern/misc_protos.h>
68 #include <kern/zalloc.h>
69 #include <kern/thread.h>
70 #include <kern/processor.h>
71 #include <kern/sched_prim.h>
72 #include <kern/debug.h>
73 #include <libkern/section_keywords.h>
74 #if defined(__x86_64__)
75 #include <i386/tsc.h>
76 #include <i386/machine_routines.h>
77 #endif
78 #include <machine/atomic.h>
79 #include <machine/machine_cpu.h>
80 #include <string.h>
81 #include <vm/pmap.h>
82 
83 #include <sys/kdebug.h>
84 
85 #define LCK_MTX_SLEEP_CODE              0
86 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
87 #define LCK_MTX_LCK_WAIT_CODE           2
88 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
89 
90 // Panic in tests that check lock usage correctness
91 // These are undesirable when in a panic or a debugger is runnning.
92 #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93 
94 #if MACH_LDEBUG
95 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96 #else
97 #define ALIGN_TEST(p, t) do{}while(0)
98 #endif
99 
100 #define NOINLINE                __attribute__((noinline))
101 
102 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104 
105 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106 
107 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109 
110 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111 
112 struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113 
114 #if CONFIG_PV_TICKET
115 SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; /* used by waitq.py */
116 #endif
117 
118 #if DEBUG
119 TUNABLE(uint32_t, LcksOpts, "lcks", enaLkDeb);
120 #else
121 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
122 #endif
123 
124 #if CONFIG_DTRACE
125 #if defined (__x86_64__)
126 machine_timeout_t dtrace_spin_threshold = 500; // 500ns
127 #elif defined(__arm64__)
128 MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129     0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130 #endif
131 #endif
132 
133 struct lck_mcs PERCPU_DATA(lck_mcs);
134 
135 __kdebug_only
136 uintptr_t
unslide_for_kdebug(const void * object)137 unslide_for_kdebug(const void* object)
138 {
139 	if (__improbable(kdebug_enable)) {
140 		return VM_KERNEL_UNSLIDE_OR_PERM(object);
141 	} else {
142 		return 0;
143 	}
144 }
145 
146 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)147 __lck_require_preemption_disabled_panic(void *lock)
148 {
149 	panic("Attempt to take no-preempt lock %p in preemptible context", lock);
150 }
151 
152 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)153 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
154 {
155 	if (__improbable(!lock_preemption_disabled_for_thread(self))) {
156 		__lck_require_preemption_disabled_panic(lock);
157 	}
158 }
159 
160 #pragma mark - HW Spin policies
161 
162 /*
163  * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
164  */
165 __attribute__((always_inline))
166 hw_spin_timeout_t
hw_spin_compute_timeout(hw_spin_policy_t pol)167 hw_spin_compute_timeout(hw_spin_policy_t pol)
168 {
169 	hw_spin_timeout_t ret = {
170 		.hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
171 	};
172 
173 	ret.hwst_timeout <<= pol->hwsp_timeout_shift;
174 #if SCHED_HYGIENE_DEBUG
175 	ret.hwst_in_ppl = pmap_in_ppl();
176 	/* Note we can't check if we are interruptible if in ppl */
177 	ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
178 #endif /* SCHED_HYGIENE_DEBUG */
179 
180 #if SCHED_HYGIENE_DEBUG
181 #ifndef KASAN
182 	if (ret.hwst_timeout > 0 &&
183 	    !ret.hwst_in_ppl &&
184 	    !ret.hwst_interruptible &&
185 	    interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
186 		uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
187 
188 #if defined(__x86_64__)
189 		int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
190 #endif
191 		if (int_timeout < ret.hwst_timeout) {
192 			ret.hwst_timeout = int_timeout;
193 		}
194 	}
195 #endif /* !KASAN */
196 #endif /* SCHED_HYGIENE_DEBUG */
197 
198 	return ret;
199 }
200 
201 __attribute__((always_inline))
202 bool
hw_spin_in_ppl(hw_spin_timeout_t to)203 hw_spin_in_ppl(hw_spin_timeout_t to)
204 {
205 #if SCHED_HYGIENE_DEBUG
206 	return to.hwst_in_ppl;
207 #else
208 	(void)to;
209 	return pmap_in_ppl();
210 #endif
211 }
212 
213 bool
hw_spin_should_keep_spinning(void * lock,hw_spin_policy_t pol,hw_spin_timeout_t to,hw_spin_state_t * state)214 hw_spin_should_keep_spinning(
215 	void                   *lock,
216 	hw_spin_policy_t        pol,
217 	hw_spin_timeout_t       to,
218 	hw_spin_state_t        *state)
219 {
220 	hw_spin_timeout_status_t rc;
221 #if SCHED_HYGIENE_DEBUG
222 	uint64_t irq_time = 0;
223 #endif
224 	uint64_t now;
225 
226 	if (__improbable(to.hwst_timeout == 0)) {
227 		return true;
228 	}
229 
230 	now = ml_get_timebase();
231 	if (__probable(now < state->hwss_deadline)) {
232 		/* keep spinning */
233 		return true;
234 	}
235 
236 #if SCHED_HYGIENE_DEBUG
237 	if (to.hwst_interruptible) {
238 		irq_time = current_thread()->machine.int_time_mt;
239 	}
240 #endif /* SCHED_HYGIENE_DEBUG */
241 
242 	if (__probable(state->hwss_deadline == 0)) {
243 		state->hwss_start     = now;
244 		state->hwss_deadline  = now + to.hwst_timeout;
245 #if SCHED_HYGIENE_DEBUG
246 		state->hwss_irq_start = irq_time;
247 #endif
248 		return true;
249 	}
250 
251 	/*
252 	 * Update fields that the callback needs
253 	 */
254 	state->hwss_now     = now;
255 #if SCHED_HYGIENE_DEBUG
256 	state->hwss_irq_end = irq_time;
257 #endif /* SCHED_HYGIENE_DEBUG */
258 
259 	rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
260 	    to, *state);
261 	if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
262 		/* push the deadline */
263 		state->hwss_deadline += to.hwst_timeout;
264 	}
265 	return rc == HW_LOCK_TIMEOUT_CONTINUE;
266 }
267 
268 __attribute__((always_inline))
269 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)270 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
271 {
272 #if DEBUG || DEVELOPMENT
273 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
274 #else
275 	(void)owner;
276 #endif
277 }
278 
279 __attribute__((always_inline))
280 void
lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)281 lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
282 {
283 #if DEBUG || DEVELOPMENT
284 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
285 	    (uintptr_t)ctid_get_thread_unsafe(ctid);
286 #else
287 	(void)ctid;
288 #endif
289 }
290 
291 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)292 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
293 {
294 	lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
295 
296 	if (owner < (1u << CTID_SIZE_BIT)) {
297 		owner = (uintptr_t)ctid_get_thread_unsafe((uint32_t)owner);
298 	} else {
299 		/* strip possible bits used by the lock implementations */
300 		owner &= ~0x7ul;
301 	}
302 
303 	lsti->lock = lck;
304 	lsti->owner_thread_cur = owner;
305 	lsti->owner_cpu = ~0u;
306 	os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
307 
308 	if (owner == 0) {
309 		/* if the owner isn't known, just bail */
310 		goto out;
311 	}
312 
313 	for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
314 		cpu_data_t *data = cpu_datap(i);
315 		if (data && (uintptr_t)data->cpu_active_thread == owner) {
316 			lsti->owner_cpu = i;
317 			os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
318 #if __x86_64__
319 			if ((uint32_t)cpu_number() != i) {
320 				/* Cause NMI and panic on the owner's cpu */
321 				NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
322 			}
323 #endif
324 			break;
325 		}
326 	}
327 
328 out:
329 	return lsti;
330 }
331 
332 #pragma mark - HW locks
333 
334 /*
335  * Routine:	hw_lock_init
336  *
337  *	Initialize a hardware lock.
338  */
339 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)340 hw_lock_init(hw_lock_t lock)
341 {
342 	ordered_store_hw(lock, 0);
343 }
344 
345 __result_use_check
346 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)347 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
348 {
349 #if OS_ATOMIC_USE_LLSC
350 	uintptr_t oldval;
351 	os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
352 		if (oldval != 0) {
353 		        wait_for_event(); // clears the monitor so we don't need give_up()
354 		        return false;
355 		}
356 	});
357 	return true;
358 #else // !OS_ATOMIC_USE_LLSC
359 #if OS_ATOMIC_HAS_LLSC
360 	uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
361 	if (oldval != 0) {
362 		wait_for_event(); // clears the monitor so we don't need give_up()
363 		return false;
364 	}
365 #endif
366 	return lock_cmpxchg(&lock->lock_data, 0, newval, acquire);
367 #endif // !OS_ATOMIC_USE_LLSC
368 }
369 
370 __result_use_check
371 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)372 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
373 {
374 	uint32_t mask = 1u << bit;
375 
376 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
377 	uint32_t oldval, newval;
378 	os_atomic_rmw_loop(target, oldval, newval, acquire, {
379 		newval = oldval | mask;
380 		if (__improbable(oldval & mask)) {
381 #if OS_ATOMIC_HAS_LLSC
382 		        if (wait) {
383 		                wait_for_event(); // clears the monitor so we don't need give_up()
384 			} else {
385 		                os_atomic_clear_exclusive();
386 			}
387 #else
388 		        if (wait) {
389 		                cpu_pause();
390 			}
391 #endif
392 		        return false;
393 		}
394 	});
395 	return true;
396 #else
397 	uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
398 	if (__improbable(oldval & mask)) {
399 		if (wait) {
400 			wait_for_event(); // clears the monitor so we don't need give_up()
401 		} else {
402 			os_atomic_clear_exclusive();
403 		}
404 		return false;
405 	}
406 	return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
407 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
408 }
409 
410 static hw_spin_timeout_status_t
hw_spin_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)411 hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
412 {
413 	hw_lock_t lock  = _lock;
414 	uintptr_t owner = lock->lock_data & ~0x7ul;
415 	lck_spinlock_to_info_t lsti;
416 
417 	if (!spinlock_timeout_panic) {
418 		/* keep spinning rather than panicing */
419 		return HW_LOCK_TIMEOUT_CONTINUE;
420 	}
421 
422 	if (pmap_in_ppl()) {
423 		/*
424 		 * This code is used by the PPL and can't write to globals.
425 		 */
426 		panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
427 		    "current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
428 		    lock, HW_SPIN_TIMEOUT_ARG(to, st),
429 		    (void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
430 	}
431 
432 	// Capture the actual time spent blocked, which may be higher than the timeout
433 	// if a misbehaving interrupt stole this thread's CPU time.
434 	lsti = lck_spinlock_timeout_hit(lock, owner);
435 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
436 	    "current owner: %p (on cpu %d), "
437 #if DEBUG || DEVELOPMENT
438 	    "initial owner: %p, "
439 #endif /* DEBUG || DEVELOPMENT */
440 	    HW_SPIN_TIMEOUT_DETAILS_FMT,
441 	    lock, HW_SPIN_TIMEOUT_ARG(to, st),
442 	    (void *)lsti->owner_thread_cur, lsti->owner_cpu,
443 #if DEBUG || DEVELOPMENT
444 	    (void *)lsti->owner_thread_orig,
445 #endif /* DEBUG || DEVELOPMENT */
446 	    HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
447 }
448 
449 const struct hw_spin_policy hw_lock_spin_policy = {
450 	.hwsp_name              = "hw_lock_t",
451 	.hwsp_timeout_atomic    = &lock_panic_timeout,
452 	.hwsp_op_timeout        = hw_spin_timeout_panic,
453 };
454 
455 static hw_spin_timeout_status_t
hw_spin_always_return(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)456 hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
457 {
458 #pragma unused(_lock, to, st)
459 	return HW_LOCK_TIMEOUT_RETURN;
460 }
461 
462 const struct hw_spin_policy hw_lock_spin_panic_policy = {
463 	.hwsp_name              = "hw_lock_t[panic]",
464 #if defined(__x86_64__)
465 	.hwsp_timeout           = &LockTimeOutTSC,
466 #else
467 	.hwsp_timeout_atomic    = &LockTimeOut,
468 #endif
469 	.hwsp_timeout_shift     = 2,
470 	.hwsp_op_timeout        = hw_spin_always_return,
471 };
472 
473 #if DEBUG || DEVELOPMENT
474 static machine_timeout_t hw_lock_test_to;
475 const struct hw_spin_policy hw_lock_test_give_up_policy = {
476 	.hwsp_name              = "testing policy",
477 #if defined(__x86_64__)
478 	.hwsp_timeout           = &LockTimeOutTSC,
479 #else
480 	.hwsp_timeout_atomic    = &LockTimeOut,
481 #endif
482 	.hwsp_timeout_shift     = 2,
483 	.hwsp_op_timeout        = hw_spin_always_return,
484 };
485 
486 __startup_func
487 static void
hw_lock_test_to_init(void)488 hw_lock_test_to_init(void)
489 {
490 	uint64_t timeout;
491 
492 	nanoseconds_to_absolutetime(100 * NSEC_PER_USEC, &timeout);
493 #if defined(__x86_64__)
494 	timeout = tmrCvt(timeout, tscFCvtn2t);
495 #endif
496 	os_atomic_init(&hw_lock_test_to, timeout);
497 }
498 STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
499 #endif
500 
501 static hw_spin_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)502 hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
503 {
504 	hw_lock_bit_t *lock = _lock;
505 
506 	if (!spinlock_timeout_panic) {
507 		/* keep spinning rather than panicing */
508 		return HW_LOCK_TIMEOUT_CONTINUE;
509 	}
510 
511 	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
512 	    "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
513 	    lock, HW_SPIN_TIMEOUT_ARG(to, st),
514 	    *lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
515 }
516 
517 static const struct hw_spin_policy hw_lock_bit_policy = {
518 	.hwsp_name              = "hw_lock_bit_t",
519 	.hwsp_timeout_atomic    = &lock_panic_timeout,
520 	.hwsp_op_timeout        = hw_lock_bit_timeout_panic,
521 };
522 
523 #if __arm64__
524 const uint64_t hw_lock_bit_timeout_2s = 0x3000000;
525 const struct hw_spin_policy hw_lock_bit_policy_2s = {
526 	.hwsp_name              = "hw_lock_bit_t",
527 	.hwsp_timeout           = &hw_lock_bit_timeout_2s,
528 	.hwsp_op_timeout        = hw_lock_bit_timeout_panic,
529 };
530 #endif
531 
532 /*
533  *	Routine: hw_lock_lock_contended
534  *
535  *	Spin until lock is acquired or timeout expires.
536  *	timeout is in mach_absolute_time ticks. Called with
537  *	preemption disabled.
538  */
539 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,uintptr_t data,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))540 hw_lock_lock_contended(
541 	hw_lock_t               lock,
542 	uintptr_t               data,
543 	hw_spin_policy_t        pol
544 	LCK_GRP_ARG(lck_grp_t *grp))
545 {
546 	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
547 	hw_spin_state_t   state = { };
548 	hw_lock_status_t  rc = HW_LOCK_CONTENDED;
549 
550 	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
551 	    HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
552 		panic("hwlock: thread %p is trying to lock %p recursively",
553 		    HW_LOCK_STATE_TO_THREAD(data), lock);
554 	}
555 
556 #if CONFIG_DTRACE || LOCK_STATS
557 	uint64_t begin = 0;
558 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
559 
560 	if (__improbable(stat_enabled)) {
561 		begin = mach_absolute_time();
562 	}
563 #endif /* CONFIG_DTRACE || LOCK_STATS */
564 
565 	if (!hw_spin_in_ppl(to)) {
566 		/*
567 		 * This code is used by the PPL and can't write to globals.
568 		 */
569 		lck_spinlock_timeout_set_orig_owner(lock->lock_data);
570 	}
571 
572 	do {
573 		for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
574 			cpu_pause();
575 			if (hw_lock_trylock_contended(lock, data)) {
576 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
577 				rc = HW_LOCK_ACQUIRED;
578 				goto end;
579 			}
580 		}
581 	} while (hw_spin_should_keep_spinning(lock, pol, to, &state));
582 
583 end:
584 #if CONFIG_DTRACE || LOCK_STATS
585 	if (__improbable(stat_enabled)) {
586 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
587 		    mach_absolute_time() - begin);
588 	}
589 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
590 #endif /* CONFIG_DTRACE || LOCK_STATS */
591 	return rc;
592 }
593 
594 static hw_spin_timeout_status_t
hw_wait_while_equals32_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)595 hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
596 {
597 	uint32_t *address = _lock;
598 
599 	if (!spinlock_timeout_panic) {
600 		/* keep spinning rather than panicing */
601 		return HW_LOCK_TIMEOUT_CONTINUE;
602 	}
603 
604 	panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
605 	    "current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
606 	    address, HW_SPIN_TIMEOUT_ARG(to, st),
607 	    *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
608 }
609 
610 static const struct hw_spin_policy hw_wait_while_equals32_policy = {
611 	.hwsp_name              = "hw_wait_while_equals32",
612 	.hwsp_timeout_atomic    = &lock_panic_timeout,
613 	.hwsp_op_timeout        = hw_wait_while_equals32_panic,
614 };
615 
616 static hw_spin_timeout_status_t
hw_wait_while_equals64_panic(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)617 hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
618 {
619 	uint64_t *address = _lock;
620 
621 	if (!spinlock_timeout_panic) {
622 		/* keep spinning rather than panicing */
623 		return HW_LOCK_TIMEOUT_CONTINUE;
624 	}
625 
626 	panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
627 	    "current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
628 	    address, HW_SPIN_TIMEOUT_ARG(to, st),
629 	    *address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
630 }
631 
632 static const struct hw_spin_policy hw_wait_while_equals64_policy = {
633 	.hwsp_name              = "hw_wait_while_equals64",
634 	.hwsp_timeout_atomic    = &lock_panic_timeout,
635 	.hwsp_op_timeout        = hw_wait_while_equals64_panic,
636 };
637 
638 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)639 hw_wait_while_equals32(uint32_t *address, uint32_t current)
640 {
641 	hw_spin_policy_t  pol   = &hw_wait_while_equals32_policy;
642 	hw_spin_timeout_t to    = hw_spin_compute_timeout(pol);
643 	hw_spin_state_t   state = { };
644 	uint32_t          v;
645 
646 	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
647 		hw_spin_should_keep_spinning(address, pol, to, &state);
648 	}
649 
650 	return v;
651 }
652 
653 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)654 hw_wait_while_equals64(uint64_t *address, uint64_t current)
655 {
656 	hw_spin_policy_t  pol   = &hw_wait_while_equals64_policy;
657 	hw_spin_timeout_t to    = hw_spin_compute_timeout(pol);
658 	hw_spin_state_t   state = { };
659 	uint64_t          v;
660 
661 	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
662 		hw_spin_should_keep_spinning(address, pol, to, &state);
663 	}
664 
665 	return v;
666 }
667 
668 __result_use_check
669 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))670 hw_lock_to_internal(
671 	hw_lock_t               lock,
672 	thread_t                thread,
673 	hw_spin_policy_t        pol
674 	LCK_GRP_ARG(lck_grp_t *grp))
675 {
676 	uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
677 
678 	if (__probable(hw_lock_trylock_contended(lock, state))) {
679 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
680 		return HW_LOCK_ACQUIRED;
681 	}
682 
683 	return hw_lock_lock_contended(lock, state, pol LCK_GRP_ARG(grp));
684 }
685 
686 /*
687  *	Routine: hw_lock_lock
688  *
689  *	Acquire lock, spinning until it becomes available,
690  *	return with preemption disabled.
691  */
692 void
693 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
694 {
695 	thread_t thread = current_thread();
696 	lock_disable_preemption_for_thread(thread);
697 	(void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
698 	    LCK_GRP_ARG(grp));
699 }
700 
701 /*
702  *	Routine: hw_lock_lock_nopreempt
703  *
704  *	Acquire lock, spinning until it becomes available.
705  */
706 void
707 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
708 {
709 	thread_t thread = current_thread();
710 	__lck_require_preemption_disabled(lock, thread);
711 	(void)hw_lock_to_internal(lock, thread, &hw_lock_spin_policy
712 	    LCK_GRP_ARG(grp));
713 }
714 
715 /*
716  *	Routine: hw_lock_to
717  *
718  *	Acquire lock, spinning until it becomes available or timeout.
719  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
720  *	preemption disabled.
721  */
722 unsigned
723 int
724 (hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
725 {
726 	thread_t thread = current_thread();
727 	lock_disable_preemption_for_thread(thread);
728 	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
729 }
730 
731 /*
732  *	Routine: hw_lock_to_nopreempt
733  *
734  *	Acquire lock, spinning until it becomes available or timeout.
735  *	Timeout is in mach_absolute_time ticks, called and return with
736  *	preemption disabled.
737  */
738 unsigned
739 int
740 (hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
741 {
742 	thread_t thread = current_thread();
743 	__lck_require_preemption_disabled(lock, thread);
744 	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
745 }
746 
747 __result_use_check
748 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))749 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
750 {
751 	if (__probable(lock_cmpxchg(&lock->lock_data, 0,
752 	    HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
753 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
754 		return true;
755 	}
756 	return false;
757 }
758 
759 /*
760  *	Routine: hw_lock_try
761  *
762  *	returns with preemption disabled on success.
763  */
764 unsigned
765 int
766 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
767 {
768 	thread_t thread = current_thread();
769 	lock_disable_preemption_for_thread(thread);
770 	unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
771 	if (!success) {
772 		lock_enable_preemption();
773 	}
774 	return success;
775 }
776 
777 unsigned
778 int
779 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
780 {
781 	thread_t thread = current_thread();
782 	__lck_require_preemption_disabled(lock, thread);
783 	return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
784 }
785 
786 #if DEBUG || DEVELOPMENT
787 __abortlike
788 static void
__hw_lock_unlock_unowned_panic(hw_lock_t lock)789 __hw_lock_unlock_unowned_panic(hw_lock_t lock)
790 {
791 	panic("hwlock: thread %p is trying to lock %p recursively",
792 	    current_thread(), lock);
793 }
794 #endif /* DEBUG || DEVELOPMENT */
795 
796 /*
797  *	Routine: hw_lock_unlock
798  *
799  *	Unconditionally release lock, release preemption level.
800  */
801 static inline void
hw_lock_unlock_internal(hw_lock_t lock)802 hw_lock_unlock_internal(hw_lock_t lock)
803 {
804 #if DEBUG || DEVELOPMENT
805 	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
806 	    LOCK_CORRECTNESS_PANIC()) {
807 		__hw_lock_unlock_unowned_panic(lock);
808 	}
809 #endif /* DEBUG || DEVELOPMENT */
810 
811 	os_atomic_store(&lock->lock_data, 0, release);
812 #if     CONFIG_DTRACE
813 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
814 #endif /* CONFIG_DTRACE */
815 }
816 
817 void
818 (hw_lock_unlock)(hw_lock_t lock)
819 {
820 	hw_lock_unlock_internal(lock);
821 	lock_enable_preemption();
822 }
823 
824 void
825 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
826 {
827 	hw_lock_unlock_internal(lock);
828 }
829 
830 void
hw_lock_assert(__assert_only hw_lock_t lock,__assert_only unsigned int type)831 hw_lock_assert(__assert_only hw_lock_t lock, __assert_only unsigned int type)
832 {
833 #if MACH_ASSERT
834 	thread_t thread, holder;
835 
836 	holder = HW_LOCK_STATE_TO_THREAD(lock->lock_data);
837 	thread = current_thread();
838 
839 	if (type == LCK_ASSERT_OWNED) {
840 		if (holder == 0) {
841 			panic("Lock not owned %p = %p", lock, holder);
842 		}
843 		if (holder != thread) {
844 			panic("Lock not owned by current thread %p = %p", lock, holder);
845 		}
846 	} else if (type == LCK_ASSERT_NOTOWNED) {
847 		if (holder != THREAD_NULL && holder == thread) {
848 			panic("Lock owned by current thread %p = %p", lock, holder);
849 		}
850 	} else {
851 		panic("hw_lock_assert(): invalid arg (%u)", type);
852 	}
853 #endif /* MACH_ASSERT */
854 }
855 
856 /*
857  *	Routine hw_lock_held, doesn't change preemption state.
858  *	N.B.  Racy, of course.
859  */
860 unsigned int
hw_lock_held(hw_lock_t lock)861 hw_lock_held(hw_lock_t lock)
862 {
863 	return ordered_load_hw(lock) != 0;
864 }
865 
866 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))867 hw_lock_bit_to_contended(
868 	hw_lock_bit_t          *lock,
869 	uint32_t                bit,
870 	hw_spin_policy_t        pol
871 	LCK_GRP_ARG(lck_grp_t *grp))
872 {
873 	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
874 	hw_spin_state_t   state = { };
875 	hw_lock_status_t  rc = HW_LOCK_CONTENDED;
876 
877 #if CONFIG_DTRACE || LOCK_STATS
878 	uint64_t begin = 0;
879 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
880 
881 	if (__improbable(stat_enabled)) {
882 		begin = mach_absolute_time();
883 	}
884 #endif /* LOCK_STATS || CONFIG_DTRACE */
885 
886 	do {
887 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
888 			rc = hw_lock_trylock_bit(lock, bit, true);
889 
890 			if (rc == HW_LOCK_ACQUIRED) {
891 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
892 				goto end;
893 			}
894 		}
895 
896 		assert(rc == HW_LOCK_CONTENDED);
897 	} while (hw_spin_should_keep_spinning(lock, pol, to, &state));
898 
899 end:
900 #if CONFIG_DTRACE || LOCK_STATS
901 	if (__improbable(stat_enabled)) {
902 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
903 		    mach_absolute_time() - begin);
904 	}
905 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
906 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
907 	return rc;
908 }
909 
910 __result_use_check
911 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,hw_spin_policy_t pol LCK_GRP_ARG (lck_grp_t * grp))912 hw_lock_bit_to_internal(
913 	hw_lock_bit_t          *lock,
914 	unsigned int            bit,
915 	hw_spin_policy_t        pol
916 	LCK_GRP_ARG(lck_grp_t *grp))
917 {
918 	if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
919 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
920 		return HW_LOCK_ACQUIRED;
921 	}
922 
923 	return (unsigned)hw_lock_bit_to_contended(lock, bit, pol LCK_GRP_ARG(grp));
924 }
925 
926 /*
927  *	Routine: hw_lock_bit_to
928  *
929  *	Acquire bit lock, spinning until it becomes available or timeout.
930  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
931  *	preemption disabled.
932  */
933 unsigned
934 int
935 (hw_lock_bit_to)(
936 	hw_lock_bit_t          * lock,
937 	uint32_t                bit,
938 	hw_spin_policy_t        pol
939 	LCK_GRP_ARG(lck_grp_t *grp))
940 {
941 	_disable_preemption();
942 	return hw_lock_bit_to_internal(lock, bit, pol LCK_GRP_ARG(grp));
943 }
944 
945 /*
946  *	Routine: hw_lock_bit
947  *
948  *	Acquire bit lock, spinning until it becomes available,
949  *	return with preemption disabled.
950  */
951 void
952 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
953 {
954 	_disable_preemption();
955 	(void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
956 }
957 
958 /*
959  *	Routine: hw_lock_bit_nopreempt
960  *
961  *	Acquire bit lock, spinning until it becomes available.
962  */
963 void
964 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
965 {
966 	__lck_require_preemption_disabled(lock, current_thread());
967 	(void)hw_lock_bit_to_internal(lock, bit, &hw_lock_bit_policy LCK_GRP_ARG(grp));
968 }
969 
970 
971 unsigned
972 int
973 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
974 {
975 	boolean_t success = false;
976 
977 	_disable_preemption();
978 	success = hw_lock_trylock_bit(lock, bit, false);
979 	if (!success) {
980 		lock_enable_preemption();
981 	}
982 
983 	if (success) {
984 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
985 	}
986 
987 	return success;
988 }
989 
990 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)991 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
992 {
993 	os_atomic_andnot(lock, 1u << bit, release);
994 #if CONFIG_DTRACE
995 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
996 #endif
997 }
998 
999 /*
1000  *	Routine:	hw_unlock_bit
1001  *
1002  *		Release spin-lock. The second parameter is the bit number to test and set.
1003  *		Decrement the preemption level.
1004  */
1005 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)1006 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1007 {
1008 	hw_unlock_bit_internal(lock, bit);
1009 	lock_enable_preemption();
1010 }
1011 
1012 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)1013 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1014 {
1015 	__lck_require_preemption_disabled(lock, current_thread());
1016 	hw_unlock_bit_internal(lock, bit);
1017 }
1018 
1019 
1020 #pragma mark - lck_*_sleep
1021 
1022 /*
1023  * Routine:	lck_spin_sleep
1024  */
1025 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1026 lck_spin_sleep_grp(
1027 	lck_spin_t              *lck,
1028 	lck_sleep_action_t      lck_sleep_action,
1029 	event_t                 event,
1030 	wait_interrupt_t        interruptible,
1031 	lck_grp_t               *grp)
1032 {
1033 	wait_result_t   res;
1034 
1035 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1036 		panic("Invalid lock sleep action %x", lck_sleep_action);
1037 	}
1038 
1039 	res = assert_wait(event, interruptible);
1040 	if (res == THREAD_WAITING) {
1041 		lck_spin_unlock(lck);
1042 		res = thread_block(THREAD_CONTINUE_NULL);
1043 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1044 			lck_spin_lock_grp(lck, grp);
1045 		}
1046 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1047 		lck_spin_unlock(lck);
1048 	}
1049 
1050 	return res;
1051 }
1052 
1053 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1054 lck_spin_sleep(
1055 	lck_spin_t              *lck,
1056 	lck_sleep_action_t      lck_sleep_action,
1057 	event_t                 event,
1058 	wait_interrupt_t        interruptible)
1059 {
1060 	return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1061 }
1062 
1063 /*
1064  * Routine:	lck_spin_sleep_deadline
1065  */
1066 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1067 lck_spin_sleep_deadline(
1068 	lck_spin_t              *lck,
1069 	lck_sleep_action_t      lck_sleep_action,
1070 	event_t                 event,
1071 	wait_interrupt_t        interruptible,
1072 	uint64_t                deadline)
1073 {
1074 	wait_result_t   res;
1075 
1076 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1077 		panic("Invalid lock sleep action %x", lck_sleep_action);
1078 	}
1079 
1080 	res = assert_wait_deadline(event, interruptible, deadline);
1081 	if (res == THREAD_WAITING) {
1082 		lck_spin_unlock(lck);
1083 		res = thread_block(THREAD_CONTINUE_NULL);
1084 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1085 			lck_spin_lock(lck);
1086 		}
1087 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1088 		lck_spin_unlock(lck);
1089 	}
1090 
1091 	return res;
1092 }
1093 
1094 /*
1095  * Routine:	lck_mtx_sleep
1096  */
1097 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1098 lck_mtx_sleep(
1099 	lck_mtx_t               *lck,
1100 	lck_sleep_action_t      lck_sleep_action,
1101 	event_t                 event,
1102 	wait_interrupt_t        interruptible)
1103 {
1104 	wait_result_t           res;
1105 	thread_pri_floor_t      token;
1106 
1107 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1108 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1109 
1110 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1111 		panic("Invalid lock sleep action %x", lck_sleep_action);
1112 	}
1113 
1114 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1115 		/*
1116 		 * We get a priority floor
1117 		 * during the time that this thread is asleep, so that when it
1118 		 * is re-awakened (and not yet contending on the mutex), it is
1119 		 * runnable at a reasonably high priority.
1120 		 */
1121 		token = thread_priority_floor_start();
1122 	}
1123 
1124 	res = assert_wait(event, interruptible);
1125 	if (res == THREAD_WAITING) {
1126 		lck_mtx_unlock(lck);
1127 		res = thread_block(THREAD_CONTINUE_NULL);
1128 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1129 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1130 				lck_mtx_lock_spin(lck);
1131 			} else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1132 				lck_mtx_lock_spin_always(lck);
1133 			} else {
1134 				lck_mtx_lock(lck);
1135 			}
1136 		}
1137 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1138 		lck_mtx_unlock(lck);
1139 	}
1140 
1141 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1142 		thread_priority_floor_end(&token);
1143 	}
1144 
1145 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1146 
1147 	return res;
1148 }
1149 
1150 
1151 /*
1152  * Routine:	lck_mtx_sleep_deadline
1153  */
1154 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1155 lck_mtx_sleep_deadline(
1156 	lck_mtx_t               *lck,
1157 	lck_sleep_action_t      lck_sleep_action,
1158 	event_t                 event,
1159 	wait_interrupt_t        interruptible,
1160 	uint64_t                deadline)
1161 {
1162 	wait_result_t           res;
1163 	thread_pri_floor_t      token;
1164 
1165 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1166 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1167 
1168 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1169 		panic("Invalid lock sleep action %x", lck_sleep_action);
1170 	}
1171 
1172 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1173 		/*
1174 		 * See lck_mtx_sleep().
1175 		 */
1176 		token = thread_priority_floor_start();
1177 	}
1178 
1179 	res = assert_wait_deadline(event, interruptible, deadline);
1180 	if (res == THREAD_WAITING) {
1181 		lck_mtx_unlock(lck);
1182 		res = thread_block(THREAD_CONTINUE_NULL);
1183 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1184 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1185 				lck_mtx_lock_spin(lck);
1186 			} else {
1187 				lck_mtx_lock(lck);
1188 			}
1189 		}
1190 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1191 		lck_mtx_unlock(lck);
1192 	}
1193 
1194 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1195 		thread_priority_floor_end(&token);
1196 	}
1197 
1198 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1199 
1200 	return res;
1201 }
1202 
1203 /*
1204  * sleep_with_inheritor and wakeup_with_inheritor KPI
1205  *
1206  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1207  * the latest thread specified as inheritor.
1208  *
1209  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1210  * direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1211  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1212  *
1213  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1214  *
1215  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1216  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1217  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1218  * invoking any turnstile operation.
1219  *
1220  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1221  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1222  * is instantiated for this KPI to manage the hash without interrupt disabled.
1223  * Also:
1224  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1225  * - every event will have a dedicated wait_queue.
1226  *
1227  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1228  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1229  */
1230 
1231 static kern_return_t
wakeup_with_inheritor_and_turnstile(event_t event,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1232 wakeup_with_inheritor_and_turnstile(
1233 	event_t                 event,
1234 	wait_result_t           result,
1235 	bool                    wake_one,
1236 	lck_wake_action_t       action,
1237 	thread_t               *thread_wokenup)
1238 {
1239 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1240 	uint32_t index;
1241 	struct turnstile *ts = NULL;
1242 	kern_return_t ret = KERN_NOT_WAITING;
1243 	thread_t wokeup;
1244 
1245 	/*
1246 	 * the hash bucket spinlock is used as turnstile interlock
1247 	 */
1248 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1249 
1250 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1251 
1252 	if (wake_one) {
1253 		waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1254 
1255 		if (action == LCK_WAKE_DEFAULT) {
1256 			flags = WAITQ_UPDATE_INHERITOR;
1257 		} else {
1258 			assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1259 		}
1260 
1261 		/*
1262 		 * WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1263 		 * if it finds a thread
1264 		 */
1265 		wokeup = waitq_wakeup64_identify(&ts->ts_waitq,
1266 		    CAST_EVENT64_T(event), result, flags);
1267 		if (wokeup != NULL) {
1268 			if (thread_wokenup != NULL) {
1269 				*thread_wokenup = wokeup;
1270 			} else {
1271 				thread_deallocate_safe(wokeup);
1272 			}
1273 			ret = KERN_SUCCESS;
1274 			if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1275 				goto complete;
1276 			}
1277 		} else {
1278 			if (thread_wokenup != NULL) {
1279 				*thread_wokenup = NULL;
1280 			}
1281 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1282 			ret = KERN_NOT_WAITING;
1283 		}
1284 	} else {
1285 		ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event),
1286 		    result, WAITQ_UPDATE_INHERITOR);
1287 	}
1288 
1289 	/*
1290 	 * turnstile_update_inheritor_complete could be called while holding the interlock.
1291 	 * In this case the new inheritor or is null, or is a thread that is just been woken up
1292 	 * and have not blocked because it is racing with the same interlock used here
1293 	 * after the wait.
1294 	 * So there is no chain to update for the new inheritor.
1295 	 *
1296 	 * However unless the current thread is the old inheritor,
1297 	 * old inheritor can be blocked and requires a chain update.
1298 	 *
1299 	 * The chain should be short because kernel turnstiles cannot have user turnstiles
1300 	 * chained after them.
1301 	 *
1302 	 * We can anyway optimize this by asking turnstile to tell us
1303 	 * if old inheritor needs an update and drop the lock
1304 	 * just in that case.
1305 	 */
1306 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1307 
1308 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1309 
1310 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1311 
1312 complete:
1313 	turnstile_complete_hash((uintptr_t)event, type);
1314 
1315 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1316 
1317 	turnstile_cleanup();
1318 
1319 	return ret;
1320 }
1321 
1322 static wait_result_t
1323 sleep_with_inheritor_and_turnstile(
1324 	event_t                 event,
1325 	thread_t                inheritor,
1326 	wait_interrupt_t        interruptible,
1327 	uint64_t                deadline,
1328 	void                  (^primitive_lock)(void),
1329 	void                  (^primitive_unlock)(void))
1330 {
1331 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1332 	wait_result_t ret;
1333 	uint32_t index;
1334 	struct turnstile *ts = NULL;
1335 
1336 	/*
1337 	 * the hash bucket spinlock is used as turnstile interlock,
1338 	 * lock it before releasing the primitive lock
1339 	 */
1340 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1341 
1342 	primitive_unlock();
1343 
1344 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1345 
1346 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1347 	/*
1348 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1349 	 * waitq_assert_wait64 after.
1350 	 */
1351 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1352 
1353 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1354 
1355 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1356 
1357 	/*
1358 	 * Update new and old inheritor chains outside the interlock;
1359 	 */
1360 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1361 
1362 	if (ret == THREAD_WAITING) {
1363 		ret = thread_block(THREAD_CONTINUE_NULL);
1364 	}
1365 
1366 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1367 
1368 	turnstile_complete_hash((uintptr_t)event, type);
1369 
1370 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1371 
1372 	turnstile_cleanup();
1373 
1374 	primitive_lock();
1375 
1376 	return ret;
1377 }
1378 
1379 /*
1380  * change_sleep_inheritor is independent from the locking primitive.
1381  */
1382 
1383 /*
1384  * Name: change_sleep_inheritor
1385  *
1386  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1387  *
1388  * Args:
1389  *   Arg1: event to redirect the push.
1390  *   Arg2: new inheritor for event.
1391  *
1392  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1393  *
1394  * Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1395  *             wakeup for the event is called.
1396  *             NOTE: this cannot be called from interrupt context.
1397  */
1398 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1399 change_sleep_inheritor(event_t event, thread_t inheritor)
1400 {
1401 	uint32_t index;
1402 	struct turnstile *ts = NULL;
1403 	kern_return_t ret =  KERN_SUCCESS;
1404 	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1405 
1406 	/*
1407 	 * the hash bucket spinlock is used as turnstile interlock
1408 	 */
1409 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1410 
1411 	ts = turnstile_prepare_hash((uintptr_t)event, type);
1412 
1413 	if (!turnstile_has_waiters(ts)) {
1414 		ret = KERN_NOT_WAITING;
1415 	}
1416 
1417 	/*
1418 	 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1419 	 */
1420 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1421 
1422 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1423 
1424 	/*
1425 	 * update the chains outside the interlock
1426 	 */
1427 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1428 
1429 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1430 
1431 	turnstile_complete_hash((uintptr_t)event, type);
1432 
1433 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1434 
1435 	turnstile_cleanup();
1436 
1437 	return ret;
1438 }
1439 
1440 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1441 lck_spin_sleep_with_inheritor(
1442 	lck_spin_t *lock,
1443 	lck_sleep_action_t lck_sleep_action,
1444 	event_t event,
1445 	thread_t inheritor,
1446 	wait_interrupt_t interruptible,
1447 	uint64_t deadline)
1448 {
1449 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1450 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1451 		           interruptible, deadline,
1452 		           ^{}, ^{ lck_spin_unlock(lock); });
1453 	} else {
1454 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1455 		           interruptible, deadline,
1456 		           ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1457 	}
1458 }
1459 
1460 wait_result_t
hw_lck_ticket_sleep_with_inheritor(hw_lck_ticket_t * lock,lck_grp_t * grp __unused,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1461 hw_lck_ticket_sleep_with_inheritor(
1462 	hw_lck_ticket_t *lock,
1463 	lck_grp_t *grp __unused,
1464 	lck_sleep_action_t lck_sleep_action,
1465 	event_t event,
1466 	thread_t inheritor,
1467 	wait_interrupt_t interruptible,
1468 	uint64_t deadline)
1469 {
1470 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1471 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1472 		           interruptible, deadline,
1473 		           ^{}, ^{ hw_lck_ticket_unlock(lock); });
1474 	} else {
1475 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1476 		           interruptible, deadline,
1477 		           ^{ hw_lck_ticket_lock(lock, grp); }, ^{ hw_lck_ticket_unlock(lock); });
1478 	}
1479 }
1480 
1481 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1482 lck_ticket_sleep_with_inheritor(
1483 	lck_ticket_t *lock,
1484 	lck_grp_t *grp,
1485 	lck_sleep_action_t lck_sleep_action,
1486 	event_t event,
1487 	thread_t inheritor,
1488 	wait_interrupt_t interruptible,
1489 	uint64_t deadline)
1490 {
1491 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1492 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1493 		           interruptible, deadline,
1494 		           ^{}, ^{ lck_ticket_unlock(lock); });
1495 	} else {
1496 		return sleep_with_inheritor_and_turnstile(event, inheritor,
1497 		           interruptible, deadline,
1498 		           ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1499 	}
1500 }
1501 
1502 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1503 lck_mtx_sleep_with_inheritor(
1504 	lck_mtx_t              *lock,
1505 	lck_sleep_action_t      lck_sleep_action,
1506 	event_t                 event,
1507 	thread_t                inheritor,
1508 	wait_interrupt_t        interruptible,
1509 	uint64_t                deadline)
1510 {
1511 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1512 
1513 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1514 		return sleep_with_inheritor_and_turnstile(event,
1515 		           inheritor,
1516 		           interruptible,
1517 		           deadline,
1518 		           ^{;},
1519 		           ^{lck_mtx_unlock(lock);});
1520 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1521 		return sleep_with_inheritor_and_turnstile(event,
1522 		           inheritor,
1523 		           interruptible,
1524 		           deadline,
1525 		           ^{lck_mtx_lock_spin(lock);},
1526 		           ^{lck_mtx_unlock(lock);});
1527 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1528 		return sleep_with_inheritor_and_turnstile(event,
1529 		           inheritor,
1530 		           interruptible,
1531 		           deadline,
1532 		           ^{lck_mtx_lock_spin_always(lock);},
1533 		           ^{lck_mtx_unlock(lock);});
1534 	} else {
1535 		return sleep_with_inheritor_and_turnstile(event,
1536 		           inheritor,
1537 		           interruptible,
1538 		           deadline,
1539 		           ^{lck_mtx_lock(lock);},
1540 		           ^{lck_mtx_unlock(lock);});
1541 	}
1542 }
1543 
1544 /*
1545  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1546  */
1547 
1548 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1549 lck_rw_sleep_with_inheritor(
1550 	lck_rw_t               *lock,
1551 	lck_sleep_action_t      lck_sleep_action,
1552 	event_t                 event,
1553 	thread_t                inheritor,
1554 	wait_interrupt_t        interruptible,
1555 	uint64_t                deadline)
1556 {
1557 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1558 
1559 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1560 
1561 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1562 		return sleep_with_inheritor_and_turnstile(event,
1563 		           inheritor,
1564 		           interruptible,
1565 		           deadline,
1566 		           ^{;},
1567 		           ^{lck_rw_type = lck_rw_done(lock);});
1568 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1569 		return sleep_with_inheritor_and_turnstile(event,
1570 		           inheritor,
1571 		           interruptible,
1572 		           deadline,
1573 		           ^{lck_rw_lock(lock, lck_rw_type);},
1574 		           ^{lck_rw_type = lck_rw_done(lock);});
1575 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1576 		return sleep_with_inheritor_and_turnstile(event,
1577 		           inheritor,
1578 		           interruptible,
1579 		           deadline,
1580 		           ^{lck_rw_lock_exclusive(lock);},
1581 		           ^{lck_rw_type = lck_rw_done(lock);});
1582 	} else {
1583 		return sleep_with_inheritor_and_turnstile(event,
1584 		           inheritor,
1585 		           interruptible,
1586 		           deadline,
1587 		           ^{lck_rw_lock_shared(lock);},
1588 		           ^{lck_rw_type = lck_rw_done(lock);});
1589 	}
1590 }
1591 
1592 /*
1593  * wakeup_with_inheritor functions are independent from the locking primitive.
1594  */
1595 
1596 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1597 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1598 {
1599 	return wakeup_with_inheritor_and_turnstile(event,
1600 	           result,
1601 	           TRUE,
1602 	           action,
1603 	           thread_wokenup);
1604 }
1605 
1606 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1607 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1608 {
1609 	return wakeup_with_inheritor_and_turnstile(event,
1610 	           result,
1611 	           FALSE,
1612 	           0,
1613 	           NULL);
1614 }
1615 
1616 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1617 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1618 {
1619 	assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1620 	assert(waitq_type(waitq) == WQT_TURNSTILE);
1621 	waitinfo->owner = 0;
1622 	waitinfo->context = 0;
1623 
1624 	if (waitq_held(waitq)) {
1625 		return;
1626 	}
1627 
1628 	struct turnstile *turnstile = waitq_to_turnstile(waitq);
1629 	assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1630 	waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1631 }
1632 
1633 static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1634 static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1635 static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1636 
1637 static wait_result_t
1638 cond_sleep_with_inheritor_and_turnstile_type(
1639 	cond_swi_var_t cond,
1640 	bool (^cond_sleep_check)(ctid_t*),
1641 	wait_interrupt_t interruptible,
1642 	uint64_t deadline,
1643 	turnstile_type_t type)
1644 {
1645 	wait_result_t ret;
1646 	uint32_t index;
1647 	struct turnstile *ts = NULL;
1648 	ctid_t ctid = 0;
1649 	thread_t inheritor;
1650 
1651 	/*
1652 	 * the hash bucket spinlock is used as turnstile interlock,
1653 	 * lock it before checking the sleep condition
1654 	 */
1655 	turnstile_hash_bucket_lock((uintptr_t)cond, &index, type);
1656 
1657 	/*
1658 	 * In case the sleep check succeeds, the block will
1659 	 * provide us the ctid observed on the variable.
1660 	 */
1661 	if (!cond_sleep_check(&ctid)) {
1662 		turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1663 		return THREAD_NOT_WAITING;
1664 	}
1665 
1666 	/*
1667 	 * We can translate the ctid to a thread_t only
1668 	 * if cond_sleep_check succeded.
1669 	 */
1670 	inheritor = ctid_get_thread(ctid);
1671 	assert(inheritor != NULL);
1672 
1673 	ts = turnstile_prepare_hash((uintptr_t)cond, type);
1674 
1675 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1676 	/*
1677 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1678 	 * waitq_assert_wait64 after.
1679 	 */
1680 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1681 
1682 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1683 
1684 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1685 
1686 	/*
1687 	 * Update new and old inheritor chains outside the interlock;
1688 	 */
1689 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1690 	if (ret == THREAD_WAITING) {
1691 		ret = thread_block(THREAD_CONTINUE_NULL);
1692 	}
1693 
1694 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1695 
1696 	turnstile_complete_hash((uintptr_t)cond, type);
1697 
1698 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1699 
1700 	turnstile_cleanup();
1701 	return ret;
1702 }
1703 
1704 /*
1705  * Name: cond_sleep_with_inheritor32_mask
1706  *
1707  * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1708  *              Allows a thread to conditionally sleep while indicating which thread should
1709  *              inherit the priority push associated with the condition.
1710  *              The condition should be expressed through a cond_swi_var32_s pointer.
1711  *              The condition needs to be populated by the caller with the ctid of the
1712  *              thread that should inherit the push. The remaining bits of the condition
1713  *              can be used by the caller to implement its own synchronization logic.
1714  *              A copy of the condition value observed by the caller when it decided to call
1715  *              this function should be provided to prevent races with matching wakeups.
1716  *              This function will atomically check the value stored in the condition against
1717  *              the expected/observed one provided only for the bits that are set in the mask.
1718  *              If the check doesn't pass the thread will not sleep and the function will return.
1719  *              The ctid provided in the condition will be used only after a successful
1720  *              check.
1721  *
1722  * Args:
1723  *   Arg1: cond_swi_var32_s pointer that stores the condition to check.
1724  *   Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1725  *   Arg3: mask to apply to the condition to check.
1726  *   Arg4: interruptible flag for wait.
1727  *   Arg5: deadline for wait.
1728  *
1729  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1730  *             wakeup for the cond is called.
1731  *
1732  * Returns: result of the wait.
1733  */
1734 static wait_result_t
cond_sleep_with_inheritor32_mask(cond_swi_var_t cond,cond_swi_var32_s expected_cond,uint32_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1735 cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1736 {
1737 	bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1738 		cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1739 		bool ret;
1740 		if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1741 			ret = true;
1742 			*ctid = cond_val.cond32_owner;
1743 		} else {
1744 			ret = false;
1745 		}
1746 		return ret;
1747 	};
1748 
1749 	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1750 }
1751 
1752 /*
1753  * Name: cond_sleep_with_inheritor64_mask
1754  *
1755  * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1756  *              Allows a thread to conditionally sleep while indicating which thread should
1757  *              inherit the priority push associated with the condition.
1758  *              The condition should be expressed through a cond_swi_var64_s pointer.
1759  *              The condition needs to be populated by the caller with the ctid of the
1760  *              thread that should inherit the push. The remaining bits of the condition
1761  *              can be used by the caller to implement its own synchronization logic.
1762  *              A copy of the condition value observed by the caller when it decided to call
1763  *              this function should be provided to prevent races with matching wakeups.
1764  *              This function will atomically check the value stored in the condition against
1765  *              the expected/observed one provided only for the bits that are set in the mask.
1766  *              If the check doesn't pass the thread will not sleep and the function will return.
1767  *              The ctid provided in the condition will be used only after a successful
1768  *              check.
1769  *
1770  * Args:
1771  *   Arg1: cond_swi_var64_s pointer that stores the condition to check.
1772  *   Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1773  *   Arg3: mask to apply to the condition to check.
1774  *   Arg4: interruptible flag for wait.
1775  *   Arg5: deadline for wait.
1776  *
1777  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1778  *             wakeup for the cond is called.
1779  *
1780  * Returns: result of the wait.
1781  */
1782 wait_result_t
cond_sleep_with_inheritor64_mask(cond_swi_var_t cond,cond_swi_var64_s expected_cond,uint64_t check_mask,wait_interrupt_t interruptible,uint64_t deadline)1783 cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1784 {
1785 	bool (^cond_sleep_check)(uint32_t*) = ^(ctid_t *ctid) {
1786 		cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1787 		bool ret;
1788 		if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1789 			ret = true;
1790 			*ctid = cond_val.cond64_owner;
1791 		} else {
1792 			ret = false;
1793 		}
1794 		return ret;
1795 	};
1796 
1797 	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1798 }
1799 
1800 /*
1801  * Name: cond_sleep_with_inheritor32
1802  *
1803  * Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1804  *              Allows a thread to conditionally sleep while indicating which thread should
1805  *              inherit the priority push associated with the condition.
1806  *              The condition should be expressed through a cond_swi_var32_s pointer.
1807  *              The condition needs to be populated by the caller with the ctid of the
1808  *              thread that should inherit the push. The remaining bits of the condition
1809  *              can be used by the caller to implement its own synchronization logic.
1810  *              A copy of the condition value observed by the caller when it decided to call
1811  *              this function should be provided to prevent races with matching wakeups.
1812  *              This function will atomically check the value stored in the condition against
1813  *              the expected/observed one provided. If the check doesn't pass the thread will not
1814  *              sleep and the function will return.
1815  *              The ctid provided in the condition will be used only after a successful
1816  *              check.
1817  *
1818  * Args:
1819  *   Arg1: cond_swi_var32_s pointer that stores the condition to check.
1820  *   Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1821  *   Arg3: interruptible flag for wait.
1822  *   Arg4: deadline for wait.
1823  *
1824  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1825  *             wakeup for the cond is called.
1826  *
1827  * Returns: result of the wait.
1828  */
1829 wait_result_t
cond_sleep_with_inheritor32(cond_swi_var_t cond,cond_swi_var32_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1830 cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1831 {
1832 	return cond_sleep_with_inheritor32_mask(cond, expected_cond, ~0u, interruptible, deadline);
1833 }
1834 
1835 /*
1836  * Name: cond_sleep_with_inheritor64
1837  *
1838  * Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1839  *              Allows a thread to conditionally sleep while indicating which thread should
1840  *              inherit the priority push associated with the condition.
1841  *              The condition should be expressed through a cond_swi_var64_s pointer.
1842  *              The condition needs to be populated by the caller with the ctid of the
1843  *              thread that should inherit the push. The remaining bits of the condition
1844  *              can be used by the caller to implement its own synchronization logic.
1845  *              A copy of the condition value observed by the caller when it decided to call
1846  *              this function should be provided to prevent races with matching wakeups.
1847  *              This function will atomically check the value stored in the condition against
1848  *              the expected/observed one provided. If the check doesn't pass the thread will not
1849  *              sleep and the function will return.
1850  *              The ctid provided in the condition will be used only after a successful
1851  *              check.
1852  *
1853  * Args:
1854  *   Arg1: cond_swi_var64_s pointer that stores the condition to check.
1855  *   Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1856  *   Arg3: interruptible flag for wait.
1857  *   Arg4: deadline for wait.
1858  *
1859  * Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1860  *             wakeup for the cond is called.
1861  *
1862  * Returns: result of the wait.
1863  */
1864 wait_result_t
cond_sleep_with_inheritor64(cond_swi_var_t cond,cond_swi_var64_s expected_cond,wait_interrupt_t interruptible,uint64_t deadline)1865 cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1866 {
1867 	return cond_sleep_with_inheritor64_mask(cond, expected_cond, ~0ull, interruptible, deadline);
1868 }
1869 
1870 /*
1871  * Name: cond_wakeup_one_with_inheritor
1872  *
1873  * Description: Wake up one waiter waiting on the condition (if any).
1874  *              The thread woken up will be the one with the higher sched priority waiting on the condition.
1875  *              The push for the condition will be transferred from the last inheritor to the woken up thread.
1876  *
1877  * Args:
1878  *   Arg1: condition to wake from.
1879  *   Arg2: wait result to pass to the woken up thread.
1880  *   Arg3: pointer for storing the thread wokenup.
1881  *
1882  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1883  *
1884  * Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1885  *             condition or a wakeup for the event is called.
1886  *             A reference for the wokenup thread is acquired.
1887  *             NOTE: this cannot be called from interrupt context.
1888  */
1889 kern_return_t
cond_wakeup_one_with_inheritor(cond_swi_var_t cond,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1890 cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1891 {
1892 	return wakeup_with_inheritor_and_turnstile((event_t)cond,
1893 	           result,
1894 	           TRUE,
1895 	           action,
1896 	           thread_wokenup);
1897 }
1898 
1899 /*
1900  * Name: cond_wakeup_all_with_inheritor
1901  *
1902  * Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1903  *
1904  * Args:
1905  *   Arg1: condition to wake from.
1906  *   Arg2: wait result to pass to the woken up threads.
1907  *
1908  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1909  *
1910  * Conditions: NOTE: this cannot be called from interrupt context.
1911  */
1912 kern_return_t
cond_wakeup_all_with_inheritor(cond_swi_var_t cond,wait_result_t result)1913 cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1914 {
1915 	return wakeup_with_inheritor_and_turnstile((event_t)cond,
1916 	           result,
1917 	           FALSE,
1918 	           0,
1919 	           NULL);
1920 }
1921 
1922 
1923 #pragma mark - gates
1924 
1925 #define GATE_TYPE        3
1926 #define GATE_ILOCK_BIT   0
1927 #define GATE_WAITERS_BIT 1
1928 
1929 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1930 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1931 
1932 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1933 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1934 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1935 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1936 #define ordered_store_gate(gate, value)  os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1937 
1938 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1939 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1940 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1941 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1942 
1943 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1944 
1945 #define GATE_EVENT(gate)     ((event_t) gate)
1946 #define EVENT_TO_GATE(event) ((gate_t *) event)
1947 
1948 typedef void (*void_func_void)(void);
1949 
1950 __abortlike
1951 static void
gate_verify_tag_panic(gate_t * gate)1952 gate_verify_tag_panic(gate_t *gate)
1953 {
1954 	panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1955 }
1956 
1957 __abortlike
1958 static void
gate_verify_destroy_panic(gate_t * gate)1959 gate_verify_destroy_panic(gate_t *gate)
1960 {
1961 	panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1962 }
1963 
1964 static void
gate_verify(gate_t * gate)1965 gate_verify(gate_t *gate)
1966 {
1967 	if (gate->gt_type != GATE_TYPE) {
1968 		gate_verify_tag_panic(gate);
1969 	}
1970 	if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
1971 		gate_verify_destroy_panic(gate);
1972 	}
1973 
1974 	assert(gate->gt_refs > 0);
1975 }
1976 
1977 __abortlike
1978 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)1979 gate_already_owned_panic(gate_t *gate, thread_t holder)
1980 {
1981 	panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
1982 }
1983 
1984 static kern_return_t
gate_try_close(gate_t * gate)1985 gate_try_close(gate_t *gate)
1986 {
1987 	uintptr_t state;
1988 	thread_t holder;
1989 	kern_return_t ret;
1990 	thread_t thread = current_thread();
1991 
1992 	gate_verify(gate);
1993 
1994 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
1995 		return KERN_SUCCESS;
1996 	}
1997 
1998 	gate_ilock(gate);
1999 	state = ordered_load_gate(gate);
2000 	holder = GATE_STATE_TO_THREAD(state);
2001 
2002 	if (holder == NULL) {
2003 		assert(gate_has_waiter_bit(state) == FALSE);
2004 
2005 		state = GATE_THREAD_TO_STATE(current_thread());
2006 		state |= GATE_ILOCK;
2007 		ordered_store_gate(gate, state);
2008 		ret = KERN_SUCCESS;
2009 	} else {
2010 		if (holder == current_thread()) {
2011 			gate_already_owned_panic(gate, holder);
2012 		}
2013 		ret = KERN_FAILURE;
2014 	}
2015 
2016 	gate_iunlock(gate);
2017 	return ret;
2018 }
2019 
2020 static void
gate_close(gate_t * gate)2021 gate_close(gate_t* gate)
2022 {
2023 	uintptr_t state;
2024 	thread_t holder;
2025 	thread_t thread = current_thread();
2026 
2027 	gate_verify(gate);
2028 
2029 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2030 		return;
2031 	}
2032 
2033 	gate_ilock(gate);
2034 	state = ordered_load_gate(gate);
2035 	holder = GATE_STATE_TO_THREAD(state);
2036 
2037 	if (holder != NULL) {
2038 		gate_already_owned_panic(gate, holder);
2039 	}
2040 
2041 	assert(gate_has_waiter_bit(state) == FALSE);
2042 
2043 	state = GATE_THREAD_TO_STATE(thread);
2044 	state |= GATE_ILOCK;
2045 	ordered_store_gate(gate, state);
2046 
2047 	gate_iunlock(gate);
2048 }
2049 
2050 static void
gate_open_turnstile(gate_t * gate)2051 gate_open_turnstile(gate_t *gate)
2052 {
2053 	struct turnstile *ts = NULL;
2054 
2055 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile,
2056 	    TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2057 	waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2058 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2059 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2060 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2061 	/*
2062 	 * We can do the cleanup while holding the interlock.
2063 	 * It is ok because:
2064 	 * 1. current_thread is the previous inheritor and it is running
2065 	 * 2. new inheritor is NULL.
2066 	 * => No chain of turnstiles needs to be updated.
2067 	 */
2068 	turnstile_cleanup();
2069 }
2070 
2071 __abortlike
2072 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2073 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2074 {
2075 	if (open) {
2076 		panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2077 	} else {
2078 		panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2079 	}
2080 }
2081 
2082 static void
gate_open(gate_t * gate)2083 gate_open(gate_t *gate)
2084 {
2085 	uintptr_t state;
2086 	thread_t holder;
2087 	bool waiters;
2088 	thread_t thread = current_thread();
2089 
2090 	gate_verify(gate);
2091 	if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2092 		return;
2093 	}
2094 
2095 	gate_ilock(gate);
2096 	state = ordered_load_gate(gate);
2097 	holder = GATE_STATE_TO_THREAD(state);
2098 	waiters = gate_has_waiter_bit(state);
2099 
2100 	if (holder != thread) {
2101 		gate_not_owned_panic(gate, holder, true);
2102 	}
2103 
2104 	if (waiters) {
2105 		gate_open_turnstile(gate);
2106 	}
2107 
2108 	state = GATE_ILOCK;
2109 	ordered_store_gate(gate, state);
2110 
2111 	gate_iunlock(gate);
2112 }
2113 
2114 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2115 gate_handoff_turnstile(gate_t *gate,
2116     int flags,
2117     thread_t *thread_woken_up,
2118     bool *waiters)
2119 {
2120 	struct turnstile *ts = NULL;
2121 	kern_return_t ret = KERN_FAILURE;
2122 	thread_t hp_thread;
2123 
2124 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2125 	/*
2126 	 * Wake up the higest priority thread waiting on the gate
2127 	 */
2128 	hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2129 	    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
2130 
2131 	if (hp_thread != NULL) {
2132 		/*
2133 		 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2134 		 */
2135 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2136 		*thread_woken_up = hp_thread;
2137 		*waiters = turnstile_has_waiters(ts);
2138 		/*
2139 		 * Note: hp_thread is the new holder and the new inheritor.
2140 		 * In case there are no more waiters, it doesn't need to be the inheritor
2141 		 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2142 		 * handoff can go through the fast path.
2143 		 * We could set the inheritor to NULL here, or the new holder itself can set it
2144 		 * on its way back from the sleep. In the latter case there are more chanses that
2145 		 * new waiters will come by, avoiding to do the opearation at all.
2146 		 */
2147 		ret = KERN_SUCCESS;
2148 	} else {
2149 		/*
2150 		 * waiters can have been woken up by an interrupt and still not
2151 		 * have updated gate->waiters, so we couldn't find them on the waitq.
2152 		 * Update the inheritor to NULL here, so that the current thread can return to userspace
2153 		 * indipendently from when the interrupted waiters will finish the wait.
2154 		 */
2155 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2156 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2157 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2158 		}
2159 		// there are no waiters.
2160 		ret = KERN_NOT_WAITING;
2161 	}
2162 
2163 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2164 
2165 	/*
2166 	 * We can do the cleanup while holding the interlock.
2167 	 * It is ok because:
2168 	 * 1. current_thread is the previous inheritor and it is running
2169 	 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2170 	 *    of the gate before trying to sleep.
2171 	 * => No chain of turnstiles needs to be updated.
2172 	 */
2173 	turnstile_cleanup();
2174 
2175 	return ret;
2176 }
2177 
2178 static kern_return_t
gate_handoff(gate_t * gate,int flags)2179 gate_handoff(gate_t *gate,
2180     int flags)
2181 {
2182 	kern_return_t ret;
2183 	thread_t new_holder = NULL;
2184 	uintptr_t state;
2185 	thread_t holder;
2186 	bool waiters;
2187 	thread_t thread = current_thread();
2188 
2189 	assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2190 	gate_verify(gate);
2191 
2192 	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2193 		if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2194 			//gate opened but there were no waiters, so return KERN_NOT_WAITING.
2195 			return KERN_NOT_WAITING;
2196 		}
2197 	}
2198 
2199 	gate_ilock(gate);
2200 	state = ordered_load_gate(gate);
2201 	holder = GATE_STATE_TO_THREAD(state);
2202 	waiters = gate_has_waiter_bit(state);
2203 
2204 	if (holder != current_thread()) {
2205 		gate_not_owned_panic(gate, holder, false);
2206 	}
2207 
2208 	if (waiters) {
2209 		ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2210 		if (ret == KERN_SUCCESS) {
2211 			state = GATE_THREAD_TO_STATE(new_holder);
2212 			if (waiters) {
2213 				state |= GATE_WAITERS;
2214 			}
2215 		} else {
2216 			if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2217 				state = 0;
2218 			}
2219 		}
2220 	} else {
2221 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2222 			state = 0;
2223 		}
2224 		ret = KERN_NOT_WAITING;
2225 	}
2226 	state |= GATE_ILOCK;
2227 	ordered_store_gate(gate, state);
2228 
2229 	gate_iunlock(gate);
2230 
2231 	if (new_holder) {
2232 		thread_deallocate(new_holder);
2233 	}
2234 	return ret;
2235 }
2236 
2237 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2238 gate_steal_turnstile(gate_t *gate,
2239     thread_t new_inheritor)
2240 {
2241 	struct turnstile *ts = NULL;
2242 
2243 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2244 
2245 	turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2246 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2247 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2248 
2249 	/*
2250 	 * turnstile_cleanup might need to update the chain of the old holder.
2251 	 * This operation should happen without the turnstile interlock held.
2252 	 */
2253 	return turnstile_cleanup;
2254 }
2255 
2256 __abortlike
2257 static void
gate_not_closed_panic(gate_t * gate,bool wait)2258 gate_not_closed_panic(gate_t *gate, bool wait)
2259 {
2260 	if (wait) {
2261 		panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2262 	} else {
2263 		panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2264 	}
2265 }
2266 
2267 static void
gate_steal(gate_t * gate)2268 gate_steal(gate_t *gate)
2269 {
2270 	uintptr_t state;
2271 	thread_t holder;
2272 	thread_t thread = current_thread();
2273 	bool waiters;
2274 
2275 	void_func_void func_after_interlock_unlock;
2276 
2277 	gate_verify(gate);
2278 
2279 	gate_ilock(gate);
2280 	state = ordered_load_gate(gate);
2281 	holder = GATE_STATE_TO_THREAD(state);
2282 	waiters = gate_has_waiter_bit(state);
2283 
2284 	if (holder == NULL) {
2285 		gate_not_closed_panic(gate, false);
2286 	}
2287 
2288 	state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2289 	if (waiters) {
2290 		state |= GATE_WAITERS;
2291 		ordered_store_gate(gate, state);
2292 		func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2293 		gate_iunlock(gate);
2294 
2295 		func_after_interlock_unlock();
2296 	} else {
2297 		ordered_store_gate(gate, state);
2298 		gate_iunlock(gate);
2299 	}
2300 }
2301 
2302 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2303 gate_wait_turnstile(gate_t *gate,
2304     wait_interrupt_t interruptible,
2305     uint64_t deadline,
2306     thread_t holder,
2307     wait_result_t* wait,
2308     bool* waiters)
2309 {
2310 	struct turnstile *ts;
2311 	uintptr_t state;
2312 
2313 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2314 
2315 	turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2316 	waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2317 
2318 	gate_iunlock(gate);
2319 
2320 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2321 
2322 	*wait = thread_block(THREAD_CONTINUE_NULL);
2323 
2324 	gate_ilock(gate);
2325 
2326 	*waiters = turnstile_has_waiters(ts);
2327 
2328 	if (!*waiters) {
2329 		/*
2330 		 * We want to enable the fast path as soon as we see that there are no more waiters.
2331 		 * On the fast path the holder will not do any turnstile operations.
2332 		 * Set the inheritor as NULL here.
2333 		 *
2334 		 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2335 		 * already been set to NULL.
2336 		 */
2337 		state = ordered_load_gate(gate);
2338 		holder = GATE_STATE_TO_THREAD(state);
2339 		if (holder &&
2340 		    ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2341 		    holder == current_thread())) {     // thread was woken up and it is the new holder
2342 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2343 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2344 		}
2345 	}
2346 
2347 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2348 
2349 	/*
2350 	 * turnstile_cleanup might need to update the chain of the old holder.
2351 	 * This operation should happen without the turnstile primitive interlock held.
2352 	 */
2353 	return turnstile_cleanup;
2354 }
2355 
2356 static void
gate_free_internal(gate_t * gate)2357 gate_free_internal(gate_t *gate)
2358 {
2359 	zfree(KT_GATE, gate);
2360 }
2361 
2362 __abortlike
2363 static void
gate_too_many_refs_panic(gate_t * gate)2364 gate_too_many_refs_panic(gate_t *gate)
2365 {
2366 	panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2367 }
2368 
2369 static gate_wait_result_t
2370 gate_wait(gate_t* gate,
2371     wait_interrupt_t interruptible,
2372     uint64_t deadline,
2373     void (^primitive_unlock)(void),
2374     void (^primitive_lock)(void))
2375 {
2376 	gate_wait_result_t ret;
2377 	void_func_void func_after_interlock_unlock;
2378 	wait_result_t wait_result;
2379 	uintptr_t state;
2380 	thread_t holder;
2381 	bool waiters;
2382 
2383 	gate_verify(gate);
2384 
2385 	gate_ilock(gate);
2386 	state = ordered_load_gate(gate);
2387 	holder = GATE_STATE_TO_THREAD(state);
2388 
2389 	if (holder == NULL) {
2390 		gate_not_closed_panic(gate, true);
2391 	}
2392 
2393 	/*
2394 	 * Get a ref on the gate so it will not
2395 	 * be freed while we are coming back from the sleep.
2396 	 */
2397 	if (gate->gt_refs == UINT16_MAX) {
2398 		gate_too_many_refs_panic(gate);
2399 	}
2400 	gate->gt_refs++;
2401 	state |= GATE_WAITERS;
2402 	ordered_store_gate(gate, state);
2403 
2404 	/*
2405 	 * Release the primitive lock before any
2406 	 * turnstile operation. Turnstile
2407 	 * does not support a blocking primitive as
2408 	 * interlock.
2409 	 *
2410 	 * In this way, concurrent threads will be
2411 	 * able to acquire the primitive lock
2412 	 * but still will wait for me through the
2413 	 * gate interlock.
2414 	 */
2415 	primitive_unlock();
2416 
2417 	func_after_interlock_unlock = gate_wait_turnstile(    gate,
2418 	    interruptible,
2419 	    deadline,
2420 	    holder,
2421 	    &wait_result,
2422 	    &waiters);
2423 
2424 	state = ordered_load_gate(gate);
2425 	holder = GATE_STATE_TO_THREAD(state);
2426 
2427 	switch (wait_result) {
2428 	case THREAD_INTERRUPTED:
2429 	case THREAD_TIMED_OUT:
2430 		assert(holder != current_thread());
2431 
2432 		if (waiters) {
2433 			state |= GATE_WAITERS;
2434 		} else {
2435 			state &= ~GATE_WAITERS;
2436 		}
2437 		ordered_store_gate(gate, state);
2438 
2439 		if (wait_result == THREAD_INTERRUPTED) {
2440 			ret = GATE_INTERRUPTED;
2441 		} else {
2442 			ret = GATE_TIMED_OUT;
2443 		}
2444 		break;
2445 	default:
2446 		/*
2447 		 * Note it is possible that even if the gate was handed off to
2448 		 * me, someone called gate_steal() before I woke up.
2449 		 *
2450 		 * As well as it is possible that the gate was opened, but someone
2451 		 * closed it while I was waking up.
2452 		 *
2453 		 * In both cases we return GATE_OPENED, as the gate was opened to me
2454 		 * at one point, it is the caller responsibility to check again if
2455 		 * the gate is open.
2456 		 */
2457 		if (holder == current_thread()) {
2458 			ret = GATE_HANDOFF;
2459 		} else {
2460 			ret = GATE_OPENED;
2461 		}
2462 		break;
2463 	}
2464 
2465 	assert(gate->gt_refs > 0);
2466 	uint32_t ref = --gate->gt_refs;
2467 	bool to_free = gate->gt_alloc;
2468 	gate_iunlock(gate);
2469 
2470 	if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2471 		if (to_free == true) {
2472 			assert(!waiters);
2473 			if (ref == 0) {
2474 				gate_free_internal(gate);
2475 			}
2476 			ret = GATE_OPENED;
2477 		} else {
2478 			gate_verify_destroy_panic(gate);
2479 		}
2480 	}
2481 
2482 	/*
2483 	 * turnstile func that needs to be executed without
2484 	 * holding the primitive interlock
2485 	 */
2486 	func_after_interlock_unlock();
2487 
2488 	primitive_lock();
2489 
2490 	return ret;
2491 }
2492 
2493 static void
gate_assert(gate_t * gate,int flags)2494 gate_assert(gate_t *gate, int flags)
2495 {
2496 	uintptr_t state;
2497 	thread_t holder;
2498 
2499 	gate_verify(gate);
2500 
2501 	gate_ilock(gate);
2502 	state = ordered_load_gate(gate);
2503 	holder = GATE_STATE_TO_THREAD(state);
2504 
2505 	switch (flags) {
2506 	case GATE_ASSERT_CLOSED:
2507 		assert(holder != NULL);
2508 		break;
2509 	case GATE_ASSERT_OPEN:
2510 		assert(holder == NULL);
2511 		break;
2512 	case GATE_ASSERT_HELD:
2513 		assert(holder == current_thread());
2514 		break;
2515 	default:
2516 		panic("invalid %s flag %d", __func__, flags);
2517 	}
2518 
2519 	gate_iunlock(gate);
2520 }
2521 
2522 enum {
2523 	GT_INIT_DEFAULT = 0,
2524 	GT_INIT_ALLOC
2525 };
2526 
2527 static void
gate_init(gate_t * gate,uint type)2528 gate_init(gate_t *gate, uint type)
2529 {
2530 	bzero(gate, sizeof(gate_t));
2531 
2532 	gate->gt_data = 0;
2533 	gate->gt_turnstile = NULL;
2534 	gate->gt_refs = 1;
2535 	switch (type) {
2536 	case GT_INIT_ALLOC:
2537 		gate->gt_alloc = 1;
2538 		break;
2539 	default:
2540 		gate->gt_alloc = 0;
2541 		break;
2542 	}
2543 	gate->gt_type = GATE_TYPE;
2544 	gate->gt_flags_pad = 0;
2545 }
2546 
2547 static gate_t*
gate_alloc_init(void)2548 gate_alloc_init(void)
2549 {
2550 	gate_t *gate;
2551 	gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2552 	gate_init(gate, GT_INIT_ALLOC);
2553 	return gate;
2554 }
2555 
2556 __abortlike
2557 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2558 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2559 {
2560 	panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2561 }
2562 
2563 __abortlike
2564 static void
gate_destroy_waiter_panic(gate_t * gate)2565 gate_destroy_waiter_panic(gate_t *gate)
2566 {
2567 	panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2568 }
2569 
2570 static uint16_t
gate_destroy_internal(gate_t * gate)2571 gate_destroy_internal(gate_t *gate)
2572 {
2573 	uintptr_t state;
2574 	thread_t holder;
2575 	uint16_t ref;
2576 
2577 	gate_ilock(gate);
2578 	state = ordered_load_gate(gate);
2579 	holder = GATE_STATE_TO_THREAD(state);
2580 
2581 	/*
2582 	 * The gate must be open
2583 	 * and all the threads must
2584 	 * have been woken up by this time
2585 	 */
2586 	if (holder != NULL) {
2587 		gate_destroy_owned_panic(gate, holder);
2588 	}
2589 	if (gate_has_waiter_bit(state)) {
2590 		gate_destroy_waiter_panic(gate);
2591 	}
2592 
2593 	assert(gate->gt_refs > 0);
2594 
2595 	ref = --gate->gt_refs;
2596 
2597 	/*
2598 	 * Mark the gate as destroyed.
2599 	 * The interlock bit still need
2600 	 * to be available to let the
2601 	 * last wokenup threads to clear
2602 	 * the wait.
2603 	 */
2604 	state = GATE_DESTROYED;
2605 	state |= GATE_ILOCK;
2606 	ordered_store_gate(gate, state);
2607 	gate_iunlock(gate);
2608 	return ref;
2609 }
2610 
2611 __abortlike
2612 static void
gate_destroy_panic(gate_t * gate)2613 gate_destroy_panic(gate_t *gate)
2614 {
2615 	panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2616 }
2617 
2618 static void
gate_destroy(gate_t * gate)2619 gate_destroy(gate_t *gate)
2620 {
2621 	gate_verify(gate);
2622 	if (gate->gt_alloc == 1) {
2623 		gate_destroy_panic(gate);
2624 	}
2625 	gate_destroy_internal(gate);
2626 }
2627 
2628 __abortlike
2629 static void
gate_free_panic(gate_t * gate)2630 gate_free_panic(gate_t *gate)
2631 {
2632 	panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2633 }
2634 
2635 static void
gate_free(gate_t * gate)2636 gate_free(gate_t *gate)
2637 {
2638 	uint16_t ref;
2639 
2640 	gate_verify(gate);
2641 
2642 	if (gate->gt_alloc == 0) {
2643 		gate_free_panic(gate);
2644 	}
2645 
2646 	ref = gate_destroy_internal(gate);
2647 	/*
2648 	 * Some of the threads waiting on the gate
2649 	 * might still need to run after being woken up.
2650 	 * They will access the gate to cleanup the
2651 	 * state, so we cannot free it.
2652 	 * The last waiter will free the gate in this case.
2653 	 */
2654 	if (ref == 0) {
2655 		gate_free_internal(gate);
2656 	}
2657 }
2658 
2659 /*
2660  * Name: lck_rw_gate_init
2661  *
2662  * Description: initializes a variable declared with decl_lck_rw_gate_data.
2663  *
2664  * Args:
2665  *   Arg1: lck_rw_t lock used to protect the gate.
2666  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2667  */
2668 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2669 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2670 {
2671 	(void) lock;
2672 	gate_init(gate, GT_INIT_DEFAULT);
2673 }
2674 
2675 /*
2676  * Name: lck_rw_gate_alloc_init
2677  *
2678  * Description: allocates and initializes a gate_t.
2679  *
2680  * Args:
2681  *   Arg1: lck_rw_t lock used to protect the gate.
2682  *
2683  * Returns:
2684  *         gate_t allocated.
2685  */
2686 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2687 lck_rw_gate_alloc_init(lck_rw_t *lock)
2688 {
2689 	(void) lock;
2690 	return gate_alloc_init();
2691 }
2692 
2693 /*
2694  * Name: lck_rw_gate_destroy
2695  *
2696  * Description: destroys a variable previously initialized
2697  *              with lck_rw_gate_init().
2698  *
2699  * Args:
2700  *   Arg1: lck_rw_t lock used to protect the gate.
2701  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2702  */
2703 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2704 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2705 {
2706 	(void) lock;
2707 	gate_destroy(gate);
2708 }
2709 
2710 /*
2711  * Name: lck_rw_gate_free
2712  *
2713  * Description: destroys and tries to free a gate previously allocated
2714  *              with lck_rw_gate_alloc_init().
2715  *              The gate free might be delegated to the last thread returning
2716  *              from the gate_wait().
2717  *
2718  * Args:
2719  *   Arg1: lck_rw_t lock used to protect the gate.
2720  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2721  */
2722 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2723 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2724 {
2725 	(void) lock;
2726 	gate_free(gate);
2727 }
2728 
2729 /*
2730  * Name: lck_rw_gate_try_close
2731  *
2732  * Description: Tries to close the gate.
2733  *              In case of success the current thread will be set as
2734  *              the holder of the gate.
2735  *
2736  * Args:
2737  *   Arg1: lck_rw_t lock used to protect the gate.
2738  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2739  *
2740  * Conditions: Lock must be held. Returns with the lock held.
2741  *
2742  * Returns:
2743  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2744  *          of the gate.
2745  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2746  *          to wake up possible waiters on the gate before returning to userspace.
2747  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2748  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2749  *
2750  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2751  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2752  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2753  *          be done without dropping the lock that is protecting the gate in between.
2754  */
2755 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2756 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2757 {
2758 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2759 
2760 	return gate_try_close(gate);
2761 }
2762 
2763 /*
2764  * Name: lck_rw_gate_close
2765  *
2766  * Description: Closes the gate. The current thread will be set as
2767  *              the holder of the gate. Will panic if the gate is already closed.
2768  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2769  *              to wake up possible waiters on the gate before returning to userspace.
2770  *
2771  * Args:
2772  *   Arg1: lck_rw_t lock used to protect the gate.
2773  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2774  *
2775  * Conditions: Lock must be held. Returns with the lock held.
2776  *             The gate must be open.
2777  *
2778  */
2779 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2780 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2781 {
2782 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2783 
2784 	return gate_close(gate);
2785 }
2786 
2787 /*
2788  * Name: lck_rw_gate_open
2789  *
2790  * Description: Opens the gate and wakes up possible waiters.
2791  *
2792  * Args:
2793  *   Arg1: lck_rw_t lock used to protect the gate.
2794  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2795  *
2796  * Conditions: Lock must be held. Returns with the lock held.
2797  *             The current thread must be the holder of the gate.
2798  *
2799  */
2800 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2801 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2802 {
2803 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2804 
2805 	gate_open(gate);
2806 }
2807 
2808 /*
2809  * Name: lck_rw_gate_handoff
2810  *
2811  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2812  *              priority will be selected as the new holder of the gate, and woken up,
2813  *              with the gate remaining in the closed state throughout.
2814  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2815  *              will be returned.
2816  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2817  *              case no waiters were found.
2818  *
2819  *
2820  * Args:
2821  *   Arg1: lck_rw_t lock used to protect the gate.
2822  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2823  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2824  *
2825  * Conditions: Lock must be held. Returns with the lock held.
2826  *             The current thread must be the holder of the gate.
2827  *
2828  * Returns:
2829  *          KERN_SUCCESS in case one of the waiters became the new holder.
2830  *          KERN_NOT_WAITING in case there were no waiters.
2831  *
2832  */
2833 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2834 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2835 {
2836 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2837 
2838 	return gate_handoff(gate, flags);
2839 }
2840 
2841 /*
2842  * Name: lck_rw_gate_steal
2843  *
2844  * Description: Set the current ownership of the gate. It sets the current thread as the
2845  *              new holder of the gate.
2846  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2847  *              to wake up possible waiters on the gate before returning to userspace.
2848  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2849  *              anymore.
2850  *
2851  *
2852  * Args:
2853  *   Arg1: lck_rw_t lock used to protect the gate.
2854  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2855  *
2856  * Conditions: Lock must be held. Returns with the lock held.
2857  *             The gate must be closed and the current thread must not already be the holder.
2858  *
2859  */
2860 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2861 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2862 {
2863 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2864 
2865 	gate_steal(gate);
2866 }
2867 
2868 /*
2869  * Name: lck_rw_gate_wait
2870  *
2871  * Description: Waits for the current thread to become the holder of the gate or for the
2872  *              gate to become open. An interruptible mode and deadline can be specified
2873  *              to return earlier from the wait.
2874  *
2875  * Args:
2876  *   Arg1: lck_rw_t lock used to protect the gate.
2877  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2878  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2879  *   Arg3: interruptible flag for wait.
2880  *   Arg4: deadline
2881  *
2882  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2883  *             Lock will be dropped while waiting.
2884  *             The gate must be closed.
2885  *
2886  * Returns: Reason why the thread was woken up.
2887  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2888  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2889  *                         to wake up possible waiters on the gate before returning to userspace.
2890  *          GATE_OPENED - the gate was opened by the holder.
2891  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
2892  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
2893  */
2894 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2895 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2896 {
2897 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2898 
2899 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2900 
2901 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2902 		return gate_wait(gate,
2903 		           interruptible,
2904 		           deadline,
2905 		           ^{lck_rw_type = lck_rw_done(lock);},
2906 		           ^{;});
2907 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2908 		return gate_wait(gate,
2909 		           interruptible,
2910 		           deadline,
2911 		           ^{lck_rw_type = lck_rw_done(lock);},
2912 		           ^{lck_rw_lock(lock, lck_rw_type);});
2913 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2914 		return gate_wait(gate,
2915 		           interruptible,
2916 		           deadline,
2917 		           ^{lck_rw_type = lck_rw_done(lock);},
2918 		           ^{lck_rw_lock_exclusive(lock);});
2919 	} else {
2920 		return gate_wait(gate,
2921 		           interruptible,
2922 		           deadline,
2923 		           ^{lck_rw_type = lck_rw_done(lock);},
2924 		           ^{lck_rw_lock_shared(lock);});
2925 	}
2926 }
2927 
2928 /*
2929  * Name: lck_rw_gate_assert
2930  *
2931  * Description: asserts that the gate is in the specified state.
2932  *
2933  * Args:
2934  *   Arg1: lck_rw_t lock used to protect the gate.
2935  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2936  *   Arg3: flags to specified assert type.
2937  *         GATE_ASSERT_CLOSED - the gate is currently closed
2938  *         GATE_ASSERT_OPEN - the gate is currently opened
2939  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2940  */
2941 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2942 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2943 {
2944 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2945 
2946 	gate_assert(gate, flags);
2947 	return;
2948 }
2949 
2950 /*
2951  * Name: lck_mtx_gate_init
2952  *
2953  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2954  *
2955  * Args:
2956  *   Arg1: lck_mtx_t lock used to protect the gate.
2957  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2958  */
2959 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)2960 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2961 {
2962 	(void) lock;
2963 	gate_init(gate, GT_INIT_DEFAULT);
2964 }
2965 
2966 /*
2967  * Name: lck_mtx_gate_alloc_init
2968  *
2969  * Description: allocates and initializes a gate_t.
2970  *
2971  * Args:
2972  *   Arg1: lck_mtx_t lock used to protect the gate.
2973  *
2974  * Returns:
2975  *         gate_t allocated.
2976  */
2977 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)2978 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
2979 {
2980 	(void) lock;
2981 	return gate_alloc_init();
2982 }
2983 
2984 /*
2985  * Name: lck_mtx_gate_destroy
2986  *
2987  * Description: destroys a variable previously initialized
2988  *              with lck_mtx_gate_init().
2989  *
2990  * Args:
2991  *   Arg1: lck_mtx_t lock used to protect the gate.
2992  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2993  */
2994 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)2995 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
2996 {
2997 	(void) lock;
2998 	gate_destroy(gate);
2999 }
3000 
3001 /*
3002  * Name: lck_mtx_gate_free
3003  *
3004  * Description: destroys and tries to free a gate previously allocated
3005  *	        with lck_mtx_gate_alloc_init().
3006  *              The gate free might be delegated to the last thread returning
3007  *              from the gate_wait().
3008  *
3009  * Args:
3010  *   Arg1: lck_mtx_t lock used to protect the gate.
3011  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3012  */
3013 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3014 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3015 {
3016 	(void) lock;
3017 	gate_free(gate);
3018 }
3019 
3020 /*
3021  * Name: lck_mtx_gate_try_close
3022  *
3023  * Description: Tries to close the gate.
3024  *              In case of success the current thread will be set as
3025  *              the holder of the gate.
3026  *
3027  * Args:
3028  *   Arg1: lck_mtx_t lock used to protect the gate.
3029  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3030  *
3031  * Conditions: Lock must be held. Returns with the lock held.
3032  *
3033  * Returns:
3034  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3035  *          of the gate.
3036  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3037  *          to wake up possible waiters on the gate before returning to userspace.
3038  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3039  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3040  *
3041  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3042  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3043  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3044  *          be done without dropping the lock that is protecting the gate in between.
3045  */
3046 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3047 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3048 {
3049 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3050 
3051 	return gate_try_close(gate);
3052 }
3053 
3054 /*
3055  * Name: lck_mtx_gate_close
3056  *
3057  * Description: Closes the gate. The current thread will be set as
3058  *              the holder of the gate. Will panic if the gate is already closed.
3059  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3060  *              to wake up possible waiters on the gate before returning to userspace.
3061  *
3062  * Args:
3063  *   Arg1: lck_mtx_t lock used to protect the gate.
3064  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3065  *
3066  * Conditions: Lock must be held. Returns with the lock held.
3067  *             The gate must be open.
3068  *
3069  */
3070 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3071 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3072 {
3073 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3074 
3075 	return gate_close(gate);
3076 }
3077 
3078 /*
3079  * Name: lck_mtx_gate_open
3080  *
3081  * Description: Opens of the gate and wakes up possible waiters.
3082  *
3083  * Args:
3084  *   Arg1: lck_mtx_t lock used to protect the gate.
3085  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3086  *
3087  * Conditions: Lock must be held. Returns with the lock held.
3088  *             The current thread must be the holder of the gate.
3089  *
3090  */
3091 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3092 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3093 {
3094 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3095 
3096 	gate_open(gate);
3097 }
3098 
3099 /*
3100  * Name: lck_mtx_gate_handoff
3101  *
3102  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3103  *              priority will be selected as the new holder of the gate, and woken up,
3104  *              with the gate remaining in the closed state throughout.
3105  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3106  *              will be returned.
3107  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3108  *              case no waiters were found.
3109  *
3110  *
3111  * Args:
3112  *   Arg1: lck_mtx_t lock used to protect the gate.
3113  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3114  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3115  *
3116  * Conditions: Lock must be held. Returns with the lock held.
3117  *             The current thread must be the holder of the gate.
3118  *
3119  * Returns:
3120  *          KERN_SUCCESS in case one of the waiters became the new holder.
3121  *          KERN_NOT_WAITING in case there were no waiters.
3122  *
3123  */
3124 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3125 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3126 {
3127 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3128 
3129 	return gate_handoff(gate, flags);
3130 }
3131 
3132 /*
3133  * Name: lck_mtx_gate_steal
3134  *
3135  * Description: Steals the ownership of the gate. It sets the current thread as the
3136  *              new holder of the gate.
3137  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3138  *              to wake up possible waiters on the gate before returning to userspace.
3139  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3140  *              anymore.
3141  *
3142  *
3143  * Args:
3144  *   Arg1: lck_mtx_t lock used to protect the gate.
3145  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3146  *
3147  * Conditions: Lock must be held. Returns with the lock held.
3148  *             The gate must be closed and the current thread must not already be the holder.
3149  *
3150  */
3151 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3152 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3153 {
3154 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3155 
3156 	gate_steal(gate);
3157 }
3158 
3159 /*
3160  * Name: lck_mtx_gate_wait
3161  *
3162  * Description: Waits for the current thread to become the holder of the gate or for the
3163  *              gate to become open. An interruptible mode and deadline can be specified
3164  *              to return earlier from the wait.
3165  *
3166  * Args:
3167  *   Arg1: lck_mtx_t lock used to protect the gate.
3168  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3169  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3170  *   Arg3: interruptible flag for wait.
3171  *   Arg4: deadline
3172  *
3173  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3174  *             Lock will be dropped while waiting.
3175  *             The gate must be closed.
3176  *
3177  * Returns: Reason why the thread was woken up.
3178  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3179  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3180  *                         to wake up possible waiters on the gate before returning to userspace.
3181  *          GATE_OPENED - the gate was opened by the holder.
3182  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3183  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3184  */
3185 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3186 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3187 {
3188 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3189 
3190 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3191 		return gate_wait(gate,
3192 		           interruptible,
3193 		           deadline,
3194 		           ^{lck_mtx_unlock(lock);},
3195 		           ^{;});
3196 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3197 		return gate_wait(gate,
3198 		           interruptible,
3199 		           deadline,
3200 		           ^{lck_mtx_unlock(lock);},
3201 		           ^{lck_mtx_lock_spin(lock);});
3202 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3203 		return gate_wait(gate,
3204 		           interruptible,
3205 		           deadline,
3206 		           ^{lck_mtx_unlock(lock);},
3207 		           ^{lck_mtx_lock_spin_always(lock);});
3208 	} else {
3209 		return gate_wait(gate,
3210 		           interruptible,
3211 		           deadline,
3212 		           ^{lck_mtx_unlock(lock);},
3213 		           ^{lck_mtx_lock(lock);});
3214 	}
3215 }
3216 
3217 /*
3218  * Name: lck_mtx_gate_assert
3219  *
3220  * Description: asserts that the gate is in the specified state.
3221  *
3222  * Args:
3223  *   Arg1: lck_mtx_t lock used to protect the gate.
3224  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3225  *   Arg3: flags to specified assert type.
3226  *         GATE_ASSERT_CLOSED - the gate is currently closed
3227  *         GATE_ASSERT_OPEN - the gate is currently opened
3228  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3229  */
3230 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3231 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3232 {
3233 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3234 
3235 	gate_assert(gate, flags);
3236 }
3237 
3238 #pragma mark - LCK_*_DECLARE support
3239 
3240 __startup_func
3241 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3242 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3243 {
3244 	lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3245 }
3246 
3247 __startup_func
3248 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3249 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3250 {
3251 	lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3252 }
3253 
3254 __startup_func
3255 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3256 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3257 {
3258 	lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3259 }
3260 
3261 __startup_func
3262 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3263 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3264 {
3265 	simple_lock_init(sp->lck, sp->lck_init_arg);
3266 }
3267 
3268 __startup_func
3269 void
lck_ticket_startup_init(struct lck_ticket_startup_spec * sp)3270 lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3271 {
3272 	lck_ticket_init(sp->lck, sp->lck_grp);
3273 }
3274