xref: /xnu-8020.140.41/osfmk/kern/locks.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 
57 #define LOCK_PRIVATE 1
58 
59 #include <mach_ldebug.h>
60 #include <debug.h>
61 
62 #include <mach/kern_return.h>
63 
64 #include <kern/lock_stat.h>
65 #include <kern/locks.h>
66 #include <kern/misc_protos.h>
67 #include <kern/zalloc.h>
68 #include <kern/thread.h>
69 #include <kern/processor.h>
70 #include <kern/sched_prim.h>
71 #include <kern/debug.h>
72 #include <libkern/section_keywords.h>
73 #include <machine/atomic.h>
74 #include <machine/machine_cpu.h>
75 #include <string.h>
76 #include <vm/pmap.h>
77 
78 #include <sys/kdebug.h>
79 
80 #define LCK_MTX_SLEEP_CODE              0
81 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
82 #define LCK_MTX_LCK_WAIT_CODE           2
83 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
84 
85 #if MACH_LDEBUG
86 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
87 #else
88 #define ALIGN_TEST(p, t) do{}while(0)
89 #endif
90 
91 #define NOINLINE                __attribute__((noinline))
92 
93 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
94 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
95 
96 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
97 
98 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
99 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
100 
101 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
102 
103 #if DEBUG
104 TUNABLE(uint32_t, LcksOpts, "lcks", enaLkDeb);
105 #else
106 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
107 #endif
108 
109 #if CONFIG_DTRACE
110 #if defined (__x86_64__)
111 uint32_t _Atomic dtrace_spin_threshold = 500; // 500ns
112 #define lock_enable_preemption enable_preemption
113 #elif defined(__arm__) || defined(__arm64__)
114 MACHINE_TIMEOUT32(dtrace_spin_threshold, "dtrace-spin-threshold",
115     0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
116 #endif
117 #endif
118 
119 uintptr_t
unslide_for_kdebug(void * object)120 unslide_for_kdebug(void* object)
121 {
122 	if (__improbable(kdebug_enable)) {
123 		return VM_KERNEL_UNSLIDE_OR_PERM(object);
124 	} else {
125 		return 0;
126 	}
127 }
128 
129 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)130 __lck_require_preemption_disabled_panic(void *lock)
131 {
132 	panic("Attempt to take no-preempt lock %p in preemptible context", lock);
133 }
134 
135 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)136 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
137 {
138 	if (__improbable(!lock_preemption_disabled_for_thread(self))) {
139 		__lck_require_preemption_disabled_panic(lock);
140 	}
141 }
142 
143 /*
144  * Routine:	hw_lock_init
145  *
146  *	Initialize a hardware lock.
147  */
148 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)149 hw_lock_init(hw_lock_t lock)
150 {
151 	ordered_store_hw(lock, 0);
152 }
153 
154 __result_use_check
155 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)156 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
157 {
158 #if OS_ATOMIC_USE_LLSC
159 	uintptr_t oldval;
160 	os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
161 		if (oldval != 0) {
162 		        wait_for_event(); // clears the monitor so we don't need give_up()
163 		        return false;
164 		}
165 	});
166 	return true;
167 #else // !OS_ATOMIC_USE_LLSC
168 #if OS_ATOMIC_HAS_LLSC
169 	uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
170 	if (oldval != 0) {
171 		wait_for_event(); // clears the monitor so we don't need give_up()
172 		return false;
173 	}
174 #elif LOCK_PRETEST
175 	if (ordered_load_hw(lock) != 0) {
176 		return false;
177 	}
178 #endif
179 	return os_atomic_cmpxchg(&lock->lock_data, 0, newval, acquire);
180 #endif // !OS_ATOMIC_USE_LLSC
181 }
182 
183 /*
184  * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
185  */
186 __attribute__((always_inline))
187 uint64_t
188 #if INTERRUPT_MASKED_DEBUG
hw_lock_compute_timeout(uint64_t in_timeout,uint64_t default_timeout,__unused bool in_ppl,__unused bool interruptible)189 hw_lock_compute_timeout(uint64_t in_timeout, uint64_t default_timeout, __unused bool in_ppl, __unused bool interruptible)
190 #else
191 hw_lock_compute_timeout(uint64_t in_timeout, uint64_t default_timeout)
192 #endif /* INTERRUPT_MASKED_DEBUG */
193 {
194 	uint64_t timeout = in_timeout;
195 	if (timeout == 0) {
196 		timeout = default_timeout;
197 #if INTERRUPT_MASKED_DEBUG
198 #ifndef KASAN
199 		if (timeout > 0 && !in_ppl) {
200 			if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC && !interruptible) {
201 				uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
202 				if (int_timeout < timeout) {
203 					timeout = int_timeout;
204 				}
205 			}
206 		}
207 #endif /* !KASAN */
208 #endif /* INTERRUPT_MASKED_DEBUG */
209 	}
210 
211 	return timeout;
212 }
213 
214 __attribute__((always_inline))
215 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)216 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
217 {
218 #if DEBUG || DEVELOPMENT
219 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
220 #else
221 	(void)owner;
222 #endif
223 }
224 
225 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)226 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
227 {
228 	lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
229 
230 	/* strip possible bits used by the lock implementations */
231 	owner &= ~0x7ul;
232 
233 	lsti->lock = lck;
234 	lsti->owner_thread_cur = owner;
235 	lsti->owner_cpu = ~0u;
236 	os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
237 
238 	if (owner == 0) {
239 		/* if the owner isn't known, just bail */
240 		goto out;
241 	}
242 
243 	for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
244 		cpu_data_t *data = cpu_datap(i);
245 		if (data && (uintptr_t)data->cpu_active_thread == owner) {
246 			lsti->owner_cpu = i;
247 			os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
248 #if __x86_64__
249 			if ((uint32_t)cpu_number() != i) {
250 				/* Cause NMI and panic on the owner's cpu */
251 				NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
252 			}
253 #endif
254 			break;
255 		}
256 	}
257 
258 out:
259 	return lsti;
260 }
261 
262 __result_use_check
263 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)264 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
265 {
266 	uint32_t mask = 1u << bit;
267 
268 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
269 	uint32_t oldval, newval;
270 	os_atomic_rmw_loop(target, oldval, newval, acquire, {
271 		newval = oldval | mask;
272 		if (__improbable(oldval & mask)) {
273 #if OS_ATOMIC_HAS_LLSC
274 		        if (wait) {
275 		                wait_for_event(); // clears the monitor so we don't need give_up()
276 			} else {
277 		                os_atomic_clear_exclusive();
278 			}
279 #else
280 		        if (wait) {
281 		                cpu_pause();
282 			}
283 #endif
284 		        return false;
285 		}
286 	});
287 	return true;
288 #else
289 	uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
290 	if (__improbable(oldval & mask)) {
291 		if (wait) {
292 			wait_for_event(); // clears the monitor so we don't need give_up()
293 		} else {
294 			os_atomic_clear_exclusive();
295 		}
296 		return false;
297 	}
298 	return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
299 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
300 }
301 
302 static hw_lock_timeout_status_t
hw_lock_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)303 hw_lock_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
304 {
305 #pragma unused(interrupt_time)
306 
307 	hw_lock_t lock  = _lock;
308 	uintptr_t owner = lock->lock_data & ~0x7ul;
309 	lck_spinlock_to_info_t lsti;
310 
311 	if (!spinlock_timeout_panic) {
312 		/* keep spinning rather than panicing */
313 		return HW_LOCK_TIMEOUT_CONTINUE;
314 	}
315 
316 	if (pmap_in_ppl()) {
317 		/*
318 		 * This code is used by the PPL and can't write to globals.
319 		 */
320 		panic("Spinlock[%p] timeout after %llu ticks; "
321 		    "current owner: %p, "
322 		    "start time: %llu, now: %llu, timeout: %llu",
323 		    lock, now - start, (void *)owner,
324 		    start, now, timeout);
325 	}
326 
327 	// Capture the actual time spent blocked, which may be higher than the timeout
328 	// if a misbehaving interrupt stole this thread's CPU time.
329 	lsti = lck_spinlock_timeout_hit(lock, owner);
330 	panic("Spinlock[%p] timeout after %llu ticks; "
331 	    "current owner: %p (on cpu %d), "
332 #if DEBUG || DEVELOPMENT
333 	    "initial owner: %p, "
334 #endif /* DEBUG || DEVELOPMENT */
335 #if INTERRUPT_MASKED_DEBUG
336 	    "interrupt time: %llu, "
337 #endif /* INTERRUPT_MASKED_DEBUG */
338 	    "start time: %llu, now: %llu, timeout: %llu",
339 	    lock, now - start,
340 	    (void *)lsti->owner_thread_cur, lsti->owner_cpu,
341 #if DEBUG || DEVELOPMENT
342 	    (void *)lsti->owner_thread_orig,
343 #endif /* DEBUG || DEVELOPMENT */
344 #if INTERRUPT_MASKED_DEBUG
345 	    interrupt_time,
346 #endif /* INTERRUPT_MASKED_DEBUG */
347 	    start, now, timeout);
348 }
349 
350 static hw_lock_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)351 hw_lock_bit_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
352 {
353 #pragma unused(interrupt_time)
354 
355 	hw_lock_t lock  = _lock;
356 	uintptr_t state = lock->lock_data;
357 
358 	if (!spinlock_timeout_panic) {
359 		/* keep spinning rather than panicing */
360 		return HW_LOCK_TIMEOUT_CONTINUE;
361 	}
362 
363 	panic("Spinlock[%p] timeout after %llu ticks; "
364 	    "current state: %p, "
365 #if INTERRUPT_MASKED_DEBUG
366 	    "interrupt time: %llu, "
367 #endif /* INTERRUPT_MASKED_DEBUG */
368 	    "start time: %llu, now: %llu, timeout: %llu",
369 	    lock, now - start, (void*) state,
370 #if INTERRUPT_MASKED_DEBUG
371 	    interrupt_time,
372 #endif /* INTERRUPT_MASKED_DEBUG */
373 	    start, now, timeout);
374 }
375 
376 /*
377  *	Routine: hw_lock_lock_contended
378  *
379  *	Spin until lock is acquired or timeout expires.
380  *	timeout is in mach_absolute_time ticks. Called with
381  *	preemption disabled.
382  */
383 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,thread_t thread,uintptr_t data,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))384 hw_lock_lock_contended(hw_lock_t lock, thread_t thread, uintptr_t data, uint64_t timeout,
385     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
386 {
387 #pragma unused(thread)
388 
389 	uint64_t        end = 0, start = 0, interrupts = 0;
390 	uint64_t        default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
391 	bool            has_timeout = true, in_ppl = pmap_in_ppl();
392 #if INTERRUPT_MASKED_DEBUG
393 	/* Note we can't check if we are interruptible if in ppl */
394 	bool            interruptible = !in_ppl && ml_get_interrupts_enabled();
395 	uint64_t        start_interrupts = 0;
396 #endif /* INTERRUPT_MASKED_DEBUG */
397 
398 #if CONFIG_DTRACE || LOCK_STATS
399 	uint64_t begin = 0;
400 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
401 
402 	if (__improbable(stat_enabled)) {
403 		begin = mach_absolute_time();
404 	}
405 #endif /* CONFIG_DTRACE || LOCK_STATS */
406 
407 	if (!in_ppl) {
408 		/*
409 		 * This code is used by the PPL and can't write to globals.
410 		 */
411 		lck_spinlock_timeout_set_orig_owner(lock->lock_data);
412 	}
413 
414 #if INTERRUPT_MASKED_DEBUG
415 	timeout = hw_lock_compute_timeout(timeout, default_timeout, in_ppl, interruptible);
416 #else
417 	timeout = hw_lock_compute_timeout(timeout, default_timeout);
418 #endif /* INTERRUPT_MASKED_DEBUG */
419 	if (timeout == 0) {
420 		has_timeout = false;
421 	}
422 
423 	for (;;) {
424 		for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
425 			cpu_pause();
426 			if (hw_lock_trylock_contended(lock, data)) {
427 #if CONFIG_DTRACE || LOCK_STATS
428 				if (__improbable(stat_enabled)) {
429 					lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
430 					    mach_absolute_time() - begin);
431 				}
432 				lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
433 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
434 #endif /* CONFIG_DTRACE || LOCK_STATS */
435 				return HW_LOCK_ACQUIRED;
436 			}
437 		}
438 		if (has_timeout) {
439 			uint64_t now = ml_get_timebase();
440 			if (end == 0) {
441 #if INTERRUPT_MASKED_DEBUG
442 				if (interruptible) {
443 					start_interrupts = thread->machine.int_time_mt;
444 				}
445 #endif /* INTERRUPT_MASKED_DEBUG */
446 				start = now;
447 				end = now + timeout;
448 			} else if (now < end) {
449 				/* keep spinning */
450 			} else {
451 #if INTERRUPT_MASKED_DEBUG
452 				if (interruptible) {
453 					interrupts = thread->machine.int_time_mt - start_interrupts;
454 				}
455 #endif /* INTERRUPT_MASKED_DEBUG */
456 				if (handler(lock, timeout, start, now, interrupts)) {
457 					/* push the deadline */
458 					end += timeout;
459 				} else {
460 #if CONFIG_DTRACE || LOCK_STATS
461 					if (__improbable(stat_enabled)) {
462 						lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
463 						    mach_absolute_time() - begin);
464 					}
465 					lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
466 #endif /* CONFIG_DTRACE || LOCK_STATS */
467 					return HW_LOCK_CONTENDED;
468 				}
469 			}
470 		}
471 	}
472 }
473 
474 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)475 hw_wait_while_equals32(uint32_t *address, uint32_t current)
476 {
477 	uint32_t v;
478 	uint64_t end = 0, timeout = 0;
479 	uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
480 	bool has_timeout = true;
481 #if INTERRUPT_MASKED_DEBUG
482 	thread_t thread = current_thread();
483 	bool in_ppl = pmap_in_ppl();
484 	/* Note we can't check if we are interruptible if in ppl */
485 	bool interruptible = !in_ppl && ml_get_interrupts_enabled();
486 	uint64_t interrupts = 0, start_interrupts = 0;
487 
488 	timeout = hw_lock_compute_timeout(0, default_timeout, in_ppl, interruptible);
489 #else
490 	timeout = hw_lock_compute_timeout(0, default_timeout);
491 #endif /* INTERRUPT_MASKED_DEBUG */
492 	if (timeout == 0) {
493 		has_timeout = false;
494 	}
495 
496 	for (;;) {
497 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
498 			cpu_pause();
499 #if OS_ATOMIC_HAS_LLSC
500 			v = os_atomic_load_exclusive(address, relaxed);
501 			if (__probable(v != current)) {
502 				os_atomic_clear_exclusive();
503 				return v;
504 			}
505 			wait_for_event();
506 #else
507 			v = os_atomic_load(address, relaxed);
508 			if (__probable(v != current)) {
509 				return v;
510 			}
511 #endif // OS_ATOMIC_HAS_LLSC
512 		}
513 		if (has_timeout) {
514 			if (end == 0) {
515 				end = ml_get_timebase() + timeout;
516 #if INTERRUPT_MASKED_DEBUG
517 				if (interruptible) {
518 					start_interrupts = thread->machine.int_time_mt;
519 				}
520 #endif /* INTERRUPT_MASKED_DEBUG */
521 			} else if (ml_get_timebase() >= end) {
522 #if INTERRUPT_MASKED_DEBUG
523 				if (interruptible) {
524 					interrupts = thread->machine.int_time_mt - start_interrupts;
525 					panic("Wait while equals timeout @ *%p == 0x%x, "
526 					    "interrupt_time %llu", address, v, interrupts);
527 				}
528 #endif /* INTERRUPT_MASKED_DEBUG */
529 				panic("Wait while equals timeout @ *%p == 0x%x",
530 				    address, v);
531 			}
532 		}
533 	}
534 }
535 
536 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)537 hw_wait_while_equals64(uint64_t *address, uint64_t current)
538 {
539 	uint64_t v;
540 	uint64_t end = 0, timeout = 0;
541 	uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
542 	bool has_timeout = true;
543 
544 #if INTERRUPT_MASKED_DEBUG
545 	thread_t thread = current_thread();
546 	bool in_ppl = pmap_in_ppl();
547 	/* Note we can't check if we are interruptible if in ppl */
548 	bool interruptible = !in_ppl && ml_get_interrupts_enabled();
549 	uint64_t interrupts = 0, start_interrupts = 0;
550 
551 	timeout = hw_lock_compute_timeout(0, default_timeout, in_ppl, interruptible);
552 #else
553 	timeout = hw_lock_compute_timeout(0, default_timeout);
554 #endif /* INTERRUPT_MASKED_DEBUG */
555 	if (timeout == 0) {
556 		has_timeout = false;
557 	}
558 
559 	for (;;) {
560 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
561 			cpu_pause();
562 #if OS_ATOMIC_HAS_LLSC
563 			v = os_atomic_load_exclusive(address, relaxed);
564 			if (__probable(v != current)) {
565 				os_atomic_clear_exclusive();
566 				return v;
567 			}
568 			wait_for_event();
569 #else
570 			v = os_atomic_load(address, relaxed);
571 			if (__probable(v != current)) {
572 				return v;
573 			}
574 #endif // OS_ATOMIC_HAS_LLSC
575 		}
576 		if (has_timeout) {
577 			if (end == 0) {
578 				end = ml_get_timebase() + timeout;
579 #if INTERRUPT_MASKED_DEBUG
580 				if (interruptible) {
581 					start_interrupts = thread->machine.int_time_mt;
582 				}
583 #endif /* INTERRUPT_MASKED_DEBUG */
584 			} else if (ml_get_timebase() >= end) {
585 #if INTERRUPT_MASKED_DEBUG
586 				if (interruptible) {
587 					interrupts = thread->machine.int_time_mt - start_interrupts;
588 					panic("Wait while equals timeout @ *%p == 0x%llx, "
589 					    "interrupt_time %llu", address, v, interrupts);
590 				}
591 #endif /* INTERRUPT_MASKED_DEBUG */
592 				panic("Wait while equals timeout @ *%p == 0x%llx",
593 				    address, v);
594 			}
595 		}
596 	}
597 }
598 
599 __result_use_check
600 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))601 hw_lock_to_internal(hw_lock_t lock, thread_t thread, uint64_t timeout,
602     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
603 {
604 	uintptr_t state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
605 
606 	if (__probable(hw_lock_trylock_contended(lock, state))) {
607 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
608 		return HW_LOCK_ACQUIRED;
609 	}
610 
611 	return hw_lock_lock_contended(lock, thread, state, timeout, handler LCK_GRP_ARG(grp));
612 }
613 
614 /*
615  *	Routine: hw_lock_lock
616  *
617  *	Acquire lock, spinning until it becomes available,
618  *	return with preemption disabled.
619  */
620 void
621 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
622 {
623 	thread_t thread = current_thread();
624 	lock_disable_preemption_for_thread(thread);
625 	(void)hw_lock_to_internal(lock, thread, 0, hw_lock_timeout_panic LCK_GRP_ARG(grp));
626 }
627 
628 /*
629  *	Routine: hw_lock_lock_nopreempt
630  *
631  *	Acquire lock, spinning until it becomes available.
632  */
633 void
634 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
635 {
636 	thread_t thread = current_thread();
637 	__lck_require_preemption_disabled(lock, thread);
638 	(void)hw_lock_to_internal(lock, thread, 0, hw_lock_timeout_panic LCK_GRP_ARG(grp));
639 }
640 
641 /*
642  *	Routine: hw_lock_to
643  *
644  *	Acquire lock, spinning until it becomes available or timeout.
645  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
646  *	preemption disabled.
647  */
648 unsigned
649 int
650 (hw_lock_to)(hw_lock_t lock, uint64_t timeout, hw_lock_timeout_handler_t handler
651     LCK_GRP_ARG(lck_grp_t *grp))
652 {
653 	thread_t thread = current_thread();
654 	lock_disable_preemption_for_thread(thread);
655 	return (unsigned)hw_lock_to_internal(lock, thread, timeout, handler LCK_GRP_ARG(grp));
656 }
657 
658 /*
659  *	Routine: hw_lock_to_nopreempt
660  *
661  *	Acquire lock, spinning until it becomes available or timeout.
662  *	Timeout is in mach_absolute_time ticks, called and return with
663  *	preemption disabled.
664  */
665 unsigned
666 int
667 (hw_lock_to_nopreempt)(hw_lock_t lock, uint64_t timeout,
668     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
669 {
670 	thread_t thread = current_thread();
671 	__lck_require_preemption_disabled(lock, thread);
672 	return (unsigned)hw_lock_to_internal(lock, thread, timeout, handler LCK_GRP_ARG(grp));
673 }
674 
675 __result_use_check
676 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))677 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
678 {
679 	int success = 0;
680 
681 #if LOCK_PRETEST
682 	if (__improbable(ordered_load_hw(lock) != 0)) {
683 		return 0;
684 	}
685 #endif  // LOCK_PRETEST
686 
687 	success = os_atomic_cmpxchg(&lock->lock_data, 0,
688 	    LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK, acquire);
689 
690 	if (success) {
691 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
692 	}
693 	return success;
694 }
695 
696 /*
697  *	Routine: hw_lock_try
698  *
699  *	returns with preemption disabled on success.
700  */
701 unsigned
702 int
703 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
704 {
705 	thread_t thread = current_thread();
706 	lock_disable_preemption_for_thread(thread);
707 	unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
708 	if (!success) {
709 		lock_enable_preemption();
710 	}
711 	return success;
712 }
713 
714 unsigned
715 int
716 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
717 {
718 	thread_t thread = current_thread();
719 	__lck_require_preemption_disabled(lock, thread);
720 	return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
721 }
722 
723 /*
724  *	Routine: hw_lock_unlock
725  *
726  *	Unconditionally release lock, release preemption level.
727  */
728 static inline void
hw_lock_unlock_internal(hw_lock_t lock)729 hw_lock_unlock_internal(hw_lock_t lock)
730 {
731 	os_atomic_store(&lock->lock_data, 0, release);
732 #if __arm__ || __arm64__
733 	// ARM tests are only for open-source exclusion
734 	set_event();
735 #endif  // __arm__ || __arm64__
736 #if     CONFIG_DTRACE
737 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
738 #endif /* CONFIG_DTRACE */
739 }
740 
741 void
742 (hw_lock_unlock)(hw_lock_t lock)
743 {
744 	hw_lock_unlock_internal(lock);
745 	lock_enable_preemption();
746 }
747 
748 void
749 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
750 {
751 	hw_lock_unlock_internal(lock);
752 }
753 
754 /*
755  *	Routine hw_lock_held, doesn't change preemption state.
756  *	N.B.  Racy, of course.
757  */
758 unsigned int
hw_lock_held(hw_lock_t lock)759 hw_lock_held(hw_lock_t lock)
760 {
761 	return ordered_load_hw(lock) != 0;
762 }
763 
764 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))765 hw_lock_bit_to_contended(
766 	hw_lock_bit_t *lock,
767 	uint32_t       bit,
768 	uint64_t       timeout,
769 	hw_lock_timeout_handler_t handler
770 	LCK_GRP_ARG(lck_grp_t *grp))
771 {
772 	uint64_t        end = 0, start = 0, interrupts = 0;
773 	uint64_t        default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
774 	bool            has_timeout = true;
775 	hw_lock_status_t rc;
776 #if INTERRUPT_MASKED_DEBUG
777 	thread_t        thread = current_thread();
778 	bool            in_ppl = pmap_in_ppl();
779 	/* Note we can't check if we are interruptible if in ppl */
780 	bool            interruptible = !in_ppl && ml_get_interrupts_enabled();
781 	uint64_t        start_interrupts = 0;
782 #endif /* INTERRUPT_MASKED_DEBUG */
783 
784 #if CONFIG_DTRACE || LOCK_STATS
785 	uint64_t begin = 0;
786 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
787 
788 	if (__improbable(stat_enabled)) {
789 		begin = mach_absolute_time();
790 	}
791 #endif /* LOCK_STATS || CONFIG_DTRACE */
792 
793 #if INTERRUPT_MASKED_DEBUG
794 	timeout = hw_lock_compute_timeout(timeout, default_timeout, in_ppl, interruptible);
795 #else
796 	timeout = hw_lock_compute_timeout(timeout, default_timeout);
797 #endif /* INTERRUPT_MASKED_DEBUG */
798 	if (timeout == 0) {
799 		has_timeout = false;
800 	}
801 
802 	for (;;) {
803 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
804 			// Always load-exclusive before wfe
805 			// This grabs the monitor and wakes up on a release event
806 			rc = hw_lock_trylock_bit(lock, bit, true);
807 			if (rc == HW_LOCK_ACQUIRED) {
808 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
809 				goto end;
810 			}
811 		}
812 		if (has_timeout) {
813 			uint64_t now = ml_get_timebase();
814 			if (end == 0) {
815 #if INTERRUPT_MASKED_DEBUG
816 				if (interruptible) {
817 					start_interrupts = thread->machine.int_time_mt;
818 				}
819 #endif /* INTERRUPT_MASKED_DEBUG */
820 				start = now;
821 				end = now + timeout;
822 			} else if (now < end) {
823 				/* keep spinning */
824 			} else {
825 #if INTERRUPT_MASKED_DEBUG
826 				if (interruptible) {
827 					interrupts = thread->machine.int_time_mt - start_interrupts;
828 				}
829 #endif /* INTERRUPT_MASKED_DEBUG */
830 				if (handler(lock, timeout, start, now, interrupts)) {
831 					/* push the deadline */
832 					end += timeout;
833 				} else {
834 					assert(rc == HW_LOCK_CONTENDED);
835 					break;
836 				}
837 			}
838 		}
839 	}
840 
841 end:
842 #if CONFIG_DTRACE || LOCK_STATS
843 	if (__improbable(stat_enabled)) {
844 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
845 		    mach_absolute_time() - begin);
846 	}
847 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
848 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
849 	return rc;
850 }
851 
852 __result_use_check
853 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))854 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint64_t timeout,
855     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
856 {
857 	if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
858 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
859 		return HW_LOCK_ACQUIRED;
860 	}
861 
862 	return (unsigned)hw_lock_bit_to_contended(lock, bit, timeout, handler
863 	           LCK_GRP_ARG(grp));
864 }
865 
866 /*
867  *	Routine: hw_lock_bit_to
868  *
869  *	Acquire bit lock, spinning until it becomes available or timeout.
870  *	Timeout is in mach_absolute_time ticks (TSC in Intel), return with
871  *	preemption disabled.
872  */
873 unsigned
874 int
875 (hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint64_t timeout,
876     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
877 {
878 	_disable_preemption();
879 	return hw_lock_bit_to_internal(lock, bit, timeout, handler LCK_GRP_ARG(grp));
880 }
881 
882 /*
883  *	Routine: hw_lock_bit
884  *
885  *	Acquire bit lock, spinning until it becomes available,
886  *	return with preemption disabled.
887  */
888 void
889 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
890 {
891 	_disable_preemption();
892 	(void)hw_lock_bit_to_internal(lock, bit, 0, hw_lock_bit_timeout_panic LCK_GRP_ARG(grp));
893 }
894 
895 /*
896  *	Routine: hw_lock_bit_nopreempt
897  *
898  *	Acquire bit lock, spinning until it becomes available.
899  */
900 void
901 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
902 {
903 	__lck_require_preemption_disabled(lock, current_thread());
904 	(void)hw_lock_bit_to_internal(lock, bit, 0, hw_lock_bit_timeout_panic LCK_GRP_ARG(grp));
905 }
906 
907 
908 unsigned
909 int
910 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
911 {
912 	boolean_t success = false;
913 
914 	_disable_preemption();
915 	success = hw_lock_trylock_bit(lock, bit, false);
916 	if (!success) {
917 		lock_enable_preemption();
918 	}
919 
920 	if (success) {
921 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
922 	}
923 
924 	return success;
925 }
926 
927 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)928 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
929 {
930 	os_atomic_andnot(lock, 1u << bit, release);
931 #if __arm__
932 	set_event();
933 #endif
934 #if CONFIG_DTRACE
935 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
936 #endif
937 }
938 
939 /*
940  *	Routine:	hw_unlock_bit
941  *
942  *		Release spin-lock. The second parameter is the bit number to test and set.
943  *		Decrement the preemption level.
944  */
945 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)946 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
947 {
948 	hw_unlock_bit_internal(lock, bit);
949 	lock_enable_preemption();
950 }
951 
952 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)953 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
954 {
955 	__lck_require_preemption_disabled(lock, current_thread());
956 	hw_unlock_bit_internal(lock, bit);
957 }
958 
959 /*
960  * Routine:	lck_spin_sleep
961  */
962 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)963 lck_spin_sleep_grp(
964 	lck_spin_t              *lck,
965 	lck_sleep_action_t      lck_sleep_action,
966 	event_t                 event,
967 	wait_interrupt_t        interruptible,
968 	lck_grp_t               *grp)
969 {
970 	wait_result_t   res;
971 
972 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
973 		panic("Invalid lock sleep action %x", lck_sleep_action);
974 	}
975 
976 	res = assert_wait(event, interruptible);
977 	if (res == THREAD_WAITING) {
978 		lck_spin_unlock(lck);
979 		res = thread_block(THREAD_CONTINUE_NULL);
980 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
981 			lck_spin_lock_grp(lck, grp);
982 		}
983 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
984 		lck_spin_unlock(lck);
985 	}
986 
987 	return res;
988 }
989 
990 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)991 lck_spin_sleep(
992 	lck_spin_t              *lck,
993 	lck_sleep_action_t      lck_sleep_action,
994 	event_t                 event,
995 	wait_interrupt_t        interruptible)
996 {
997 	return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
998 }
999 
1000 /*
1001  * Routine:	lck_spin_sleep_deadline
1002  */
1003 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1004 lck_spin_sleep_deadline(
1005 	lck_spin_t              *lck,
1006 	lck_sleep_action_t      lck_sleep_action,
1007 	event_t                 event,
1008 	wait_interrupt_t        interruptible,
1009 	uint64_t                deadline)
1010 {
1011 	wait_result_t   res;
1012 
1013 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1014 		panic("Invalid lock sleep action %x", lck_sleep_action);
1015 	}
1016 
1017 	res = assert_wait_deadline(event, interruptible, deadline);
1018 	if (res == THREAD_WAITING) {
1019 		lck_spin_unlock(lck);
1020 		res = thread_block(THREAD_CONTINUE_NULL);
1021 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1022 			lck_spin_lock(lck);
1023 		}
1024 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1025 		lck_spin_unlock(lck);
1026 	}
1027 
1028 	return res;
1029 }
1030 
1031 /*
1032  * Routine:	lck_mtx_sleep
1033  */
1034 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1035 lck_mtx_sleep(
1036 	lck_mtx_t               *lck,
1037 	lck_sleep_action_t      lck_sleep_action,
1038 	event_t                 event,
1039 	wait_interrupt_t        interruptible)
1040 {
1041 	wait_result_t           res;
1042 	thread_pri_floor_t      token;
1043 
1044 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1045 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1046 
1047 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1048 		panic("Invalid lock sleep action %x", lck_sleep_action);
1049 	}
1050 
1051 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1052 		/*
1053 		 * We get a priority floor
1054 		 * during the time that this thread is asleep, so that when it
1055 		 * is re-awakened (and not yet contending on the mutex), it is
1056 		 * runnable at a reasonably high priority.
1057 		 */
1058 		token = thread_priority_floor_start();
1059 	}
1060 
1061 	res = assert_wait(event, interruptible);
1062 	if (res == THREAD_WAITING) {
1063 		lck_mtx_unlock(lck);
1064 		res = thread_block(THREAD_CONTINUE_NULL);
1065 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1066 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1067 				lck_mtx_lock_spin(lck);
1068 			} else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1069 				lck_mtx_lock_spin_always(lck);
1070 			} else {
1071 				lck_mtx_lock(lck);
1072 			}
1073 		}
1074 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1075 		lck_mtx_unlock(lck);
1076 	}
1077 
1078 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1079 		thread_priority_floor_end(&token);
1080 	}
1081 
1082 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1083 
1084 	return res;
1085 }
1086 
1087 
1088 /*
1089  * Routine:	lck_mtx_sleep_deadline
1090  */
1091 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1092 lck_mtx_sleep_deadline(
1093 	lck_mtx_t               *lck,
1094 	lck_sleep_action_t      lck_sleep_action,
1095 	event_t                 event,
1096 	wait_interrupt_t        interruptible,
1097 	uint64_t                deadline)
1098 {
1099 	wait_result_t           res;
1100 	thread_pri_floor_t      token;
1101 
1102 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1103 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1104 
1105 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1106 		panic("Invalid lock sleep action %x", lck_sleep_action);
1107 	}
1108 
1109 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1110 		/*
1111 		 * See lck_mtx_sleep().
1112 		 */
1113 		token = thread_priority_floor_start();
1114 	}
1115 
1116 	res = assert_wait_deadline(event, interruptible, deadline);
1117 	if (res == THREAD_WAITING) {
1118 		lck_mtx_unlock(lck);
1119 		res = thread_block(THREAD_CONTINUE_NULL);
1120 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1121 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1122 				lck_mtx_lock_spin(lck);
1123 			} else {
1124 				lck_mtx_lock(lck);
1125 			}
1126 		}
1127 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1128 		lck_mtx_unlock(lck);
1129 	}
1130 
1131 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1132 		thread_priority_floor_end(&token);
1133 	}
1134 
1135 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1136 
1137 	return res;
1138 }
1139 
1140 /*
1141  * Lock Boosting Invariants:
1142  *
1143  * The lock owner is always promoted to the max priority of all its waiters.
1144  * Max priority is capped at MAXPRI_PROMOTE.
1145  *
1146  * The last waiter is not given a promotion when it wakes up or acquires the lock.
1147  * When the last waiter is waking up, a new contender can always come in and
1148  * steal the lock without having to wait for the last waiter to make forward progress.
1149  */
1150 
1151 /*
1152  * Routine: lck_mtx_lock_wait
1153  *
1154  * Invoked in order to wait on contention.
1155  *
1156  * Called with the interlock locked and
1157  * returns it unlocked.
1158  *
1159  * Always aggressively sets the owning thread to promoted,
1160  * even if it's the same or higher priority
1161  * This prevents it from lowering its own priority while holding a lock
1162  *
1163  * TODO: Come up with a more efficient way to handle same-priority promotions
1164  *      <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
1165  */
1166 void
lck_mtx_lock_wait(lck_mtx_t * lck,thread_t holder,struct turnstile ** ts)1167 lck_mtx_lock_wait(
1168 	lck_mtx_t                       *lck,
1169 	thread_t                        holder,
1170 	struct turnstile                **ts)
1171 {
1172 	thread_t                thread = current_thread();
1173 	lck_mtx_t               *mutex = lck;
1174 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1175 
1176 #if     CONFIG_DTRACE
1177 	uint64_t                sleep_start = 0;
1178 
1179 	if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1180 		sleep_start = mach_absolute_time();
1181 	}
1182 #endif
1183 
1184 #if LOCKS_INDIRECT_ALLOW
1185 	if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1186 		mutex = &lck->lck_mtx_ptr->lck_mtx;
1187 	}
1188 #endif /* LOCKS_INDIRECT_ALLOW */
1189 
1190 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1191 	    trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1192 
1193 	mutex->lck_mtx_waiters++;
1194 
1195 	if (*ts == NULL) {
1196 		*ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1197 	}
1198 
1199 	struct turnstile *turnstile = *ts;
1200 	thread_set_pending_block_hint(thread, kThreadWaitKernelMutex);
1201 	turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1202 
1203 	waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
1204 
1205 	lck_mtx_ilk_unlock(mutex);
1206 
1207 	turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
1208 
1209 	thread_block(THREAD_CONTINUE_NULL);
1210 
1211 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1212 #if     CONFIG_DTRACE
1213 	/*
1214 	 * Record the DTrace lockstat probe for blocking, block time
1215 	 * measured from when we were entered.
1216 	 */
1217 	if (sleep_start) {
1218 #if LOCKS_INDIRECT_ALLOW
1219 		if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) {
1220 			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1221 			    mach_absolute_time() - sleep_start);
1222 		} else
1223 #endif /* LOCKS_INDIRECT_ALLOW */
1224 		{
1225 			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1226 			    mach_absolute_time() - sleep_start);
1227 		}
1228 	}
1229 #endif
1230 }
1231 
1232 /*
1233  * Routine:     lck_mtx_lock_acquire
1234  *
1235  * Invoked on acquiring the mutex when there is
1236  * contention.
1237  *
1238  * Returns the current number of waiters.
1239  *
1240  * Called with the interlock locked.
1241  */
1242 int
lck_mtx_lock_acquire(lck_mtx_t * lck,struct turnstile * ts)1243 lck_mtx_lock_acquire(
1244 	lck_mtx_t               *lck,
1245 	struct turnstile        *ts)
1246 {
1247 	thread_t                thread = current_thread();
1248 	lck_mtx_t               *mutex = lck;
1249 
1250 #if LOCKS_INDIRECT_ALLOW
1251 	if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1252 		mutex = &lck->lck_mtx_ptr->lck_mtx;
1253 	}
1254 #endif /* LOCKS_INDIRECT_ALLOW */
1255 
1256 	if (mutex->lck_mtx_waiters > 0) {
1257 		if (ts == NULL) {
1258 			ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1259 		}
1260 
1261 		turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1262 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1263 	}
1264 
1265 	if (ts != NULL) {
1266 		turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1267 	}
1268 
1269 	return mutex->lck_mtx_waiters;
1270 }
1271 
1272 /*
1273  * Routine:     lck_mtx_unlock_wakeup
1274  *
1275  * Invoked on unlock when there is contention.
1276  *
1277  * Called with the interlock locked.
1278  *
1279  * NOTE: callers should call turnstile_clenup after
1280  * dropping the interlock.
1281  */
1282 boolean_t
lck_mtx_unlock_wakeup(lck_mtx_t * lck,thread_t holder)1283 lck_mtx_unlock_wakeup(
1284 	lck_mtx_t                       *lck,
1285 	thread_t                        holder)
1286 {
1287 	thread_t                thread = current_thread();
1288 	lck_mtx_t               *mutex = lck;
1289 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1290 	struct turnstile *ts;
1291 	kern_return_t did_wake;
1292 
1293 #if LOCKS_INDIRECT_ALLOW
1294 	if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1295 		mutex = &lck->lck_mtx_ptr->lck_mtx;
1296 	}
1297 #endif /* LOCKS_INDIRECT_ALLOW */
1298 
1299 
1300 	if (thread != holder) {
1301 		panic("lck_mtx_unlock_wakeup: mutex %p holder %p", mutex, holder);
1302 	}
1303 
1304 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
1305 	    trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1306 
1307 	assert(mutex->lck_mtx_waiters > 0);
1308 
1309 	ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1310 
1311 	if (mutex->lck_mtx_waiters > 1) {
1312 		/* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1313 		did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1314 	} else {
1315 		did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1316 		turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1317 	}
1318 	assert(did_wake == KERN_SUCCESS);
1319 
1320 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1321 	turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1322 
1323 	mutex->lck_mtx_waiters--;
1324 
1325 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1326 
1327 	return mutex->lck_mtx_waiters > 0;
1328 }
1329 
1330 /*
1331  * Routine:     mutex_pause
1332  *
1333  * Called by former callers of simple_lock_pause().
1334  */
1335 #define MAX_COLLISION_COUNTS    32
1336 #define MAX_COLLISION   8
1337 
1338 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1339 
1340 uint32_t collision_backoffs[MAX_COLLISION] = {
1341 	10, 50, 100, 200, 400, 600, 800, 1000
1342 };
1343 
1344 
1345 void
mutex_pause(uint32_t collisions)1346 mutex_pause(uint32_t collisions)
1347 {
1348 	wait_result_t wait_result;
1349 	uint32_t        back_off;
1350 
1351 	if (collisions >= MAX_COLLISION_COUNTS) {
1352 		collisions = MAX_COLLISION_COUNTS - 1;
1353 	}
1354 	max_collision_count[collisions]++;
1355 
1356 	if (collisions >= MAX_COLLISION) {
1357 		collisions = MAX_COLLISION - 1;
1358 	}
1359 	back_off = collision_backoffs[collisions];
1360 
1361 	wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1362 	assert(wait_result == THREAD_WAITING);
1363 
1364 	wait_result = thread_block(THREAD_CONTINUE_NULL);
1365 	assert(wait_result == THREAD_TIMED_OUT);
1366 }
1367 
1368 
1369 unsigned int mutex_yield_wait = 0;
1370 unsigned int mutex_yield_no_wait = 0;
1371 
1372 void
lck_mtx_yield(lck_mtx_t * lck)1373 lck_mtx_yield(
1374 	lck_mtx_t   *lck)
1375 {
1376 	int     waiters;
1377 
1378 #if DEBUG
1379 	lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1380 #endif /* DEBUG */
1381 
1382 #if LOCKS_INDIRECT_ALLOW
1383 	if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1384 		waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1385 	} else
1386 #endif /* LOCKS_INDIRECT_ALLOW */
1387 	{
1388 		waiters = lck->lck_mtx_waiters;
1389 	}
1390 
1391 	if (!waiters) {
1392 		mutex_yield_no_wait++;
1393 	} else {
1394 		mutex_yield_wait++;
1395 		lck_mtx_unlock(lck);
1396 		mutex_pause(0);
1397 		lck_mtx_lock(lck);
1398 	}
1399 }
1400 
1401 /*
1402  * sleep_with_inheritor and wakeup_with_inheritor KPI
1403  *
1404  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1405  * the latest thread specified as inheritor.
1406  *
1407  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1408  * direct the push. The inheritor cannot run in user space while holding a push from an event. Therefore is the caller responsibility to call a
1409  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1410  *
1411  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1412  *
1413  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1414  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1415  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1416  * invoking any turnstile operation.
1417  *
1418  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1419  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1420  * is instantiated for this KPI to manage the hash without interrupt disabled.
1421  * Also:
1422  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1423  * - every event will have a dedicated wait_queue.
1424  *
1425  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1426  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1427  */
1428 
1429 kern_return_t
wakeup_with_inheritor_and_turnstile_type(event_t event,turnstile_type_t type,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1430 wakeup_with_inheritor_and_turnstile_type(event_t event, turnstile_type_t type, wait_result_t result, bool wake_one, lck_wake_action_t action, thread_t *thread_wokenup)
1431 {
1432 	uint32_t index;
1433 	struct turnstile *ts = NULL;
1434 	kern_return_t ret = KERN_NOT_WAITING;
1435 	int priority;
1436 	thread_t wokeup;
1437 
1438 	/*
1439 	 * the hash bucket spinlock is used as turnstile interlock
1440 	 */
1441 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1442 
1443 	ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1444 
1445 	if (wake_one) {
1446 		if (action == LCK_WAKE_DEFAULT) {
1447 			priority = WAITQ_PROMOTE_ON_WAKE;
1448 		} else {
1449 			assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1450 			priority = WAITQ_ALL_PRIORITIES;
1451 		}
1452 
1453 		/*
1454 		 * WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor
1455 		 * if it finds a thread
1456 		 */
1457 		wokeup = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(event), result, priority);
1458 		if (wokeup != NULL) {
1459 			if (thread_wokenup != NULL) {
1460 				*thread_wokenup = wokeup;
1461 			} else {
1462 				thread_deallocate_safe(wokeup);
1463 			}
1464 			ret = KERN_SUCCESS;
1465 			if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1466 				goto complete;
1467 			}
1468 		} else {
1469 			if (thread_wokenup != NULL) {
1470 				*thread_wokenup = NULL;
1471 			}
1472 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1473 			ret = KERN_NOT_WAITING;
1474 		}
1475 	} else {
1476 		ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
1477 		turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1478 	}
1479 
1480 	/*
1481 	 * turnstile_update_inheritor_complete could be called while holding the interlock.
1482 	 * In this case the new inheritor or is null, or is a thread that is just been woken up
1483 	 * and have not blocked because it is racing with the same interlock used here
1484 	 * after the wait.
1485 	 * So there is no chain to update for the new inheritor.
1486 	 *
1487 	 * However unless the current thread is the old inheritor,
1488 	 * old inheritor can be blocked and requires a chain update.
1489 	 *
1490 	 * The chain should be short because kernel turnstiles cannot have user turnstiles
1491 	 * chained after them.
1492 	 *
1493 	 * We can anyway optimize this by asking turnstile to tell us
1494 	 * if old inheritor needs an update and drop the lock
1495 	 * just in that case.
1496 	 */
1497 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1498 
1499 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1500 
1501 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1502 
1503 complete:
1504 	turnstile_complete((uintptr_t)event, NULL, NULL, type);
1505 
1506 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1507 
1508 	turnstile_cleanup();
1509 
1510 	return ret;
1511 }
1512 
1513 static wait_result_t
1514 sleep_with_inheritor_and_turnstile_type(event_t event,
1515     thread_t inheritor,
1516     wait_interrupt_t interruptible,
1517     uint64_t deadline,
1518     turnstile_type_t type,
1519     void (^primitive_lock)(void),
1520     void (^primitive_unlock)(void))
1521 {
1522 	wait_result_t ret;
1523 	uint32_t index;
1524 	struct turnstile *ts = NULL;
1525 
1526 	/*
1527 	 * the hash bucket spinlock is used as turnstile interlock,
1528 	 * lock it before releasing the primitive lock
1529 	 */
1530 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1531 
1532 	primitive_unlock();
1533 
1534 	ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1535 
1536 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1537 	/*
1538 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1539 	 * waitq_assert_wait64 after.
1540 	 */
1541 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1542 
1543 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1544 
1545 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1546 
1547 	/*
1548 	 * Update new and old inheritor chains outside the interlock;
1549 	 */
1550 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1551 
1552 	if (ret == THREAD_WAITING) {
1553 		ret = thread_block(THREAD_CONTINUE_NULL);
1554 	}
1555 
1556 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1557 
1558 	turnstile_complete((uintptr_t)event, NULL, NULL, type);
1559 
1560 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1561 
1562 	turnstile_cleanup();
1563 
1564 	primitive_lock();
1565 
1566 	return ret;
1567 }
1568 
1569 kern_return_t
change_sleep_inheritor_and_turnstile_type(event_t event,thread_t inheritor,turnstile_type_t type)1570 change_sleep_inheritor_and_turnstile_type(event_t event,
1571     thread_t inheritor,
1572     turnstile_type_t type)
1573 {
1574 	uint32_t index;
1575 	struct turnstile *ts = NULL;
1576 	kern_return_t ret =  KERN_SUCCESS;
1577 	/*
1578 	 * the hash bucket spinlock is used as turnstile interlock
1579 	 */
1580 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1581 
1582 	ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1583 
1584 	if (!turnstile_has_waiters(ts)) {
1585 		ret = KERN_NOT_WAITING;
1586 	}
1587 
1588 	/*
1589 	 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1590 	 */
1591 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1592 
1593 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1594 
1595 	/*
1596 	 * update the chains outside the interlock
1597 	 */
1598 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1599 
1600 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1601 
1602 	turnstile_complete((uintptr_t)event, NULL, NULL, type);
1603 
1604 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1605 
1606 	turnstile_cleanup();
1607 
1608 	return ret;
1609 }
1610 
1611 typedef void (^void_block_void)(void);
1612 
1613 /*
1614  * sleep_with_inheritor functions with lck_mtx_t as locking primitive.
1615  */
1616 
1617 wait_result_t
lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline,turnstile_type_t type)1618 lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1619 {
1620 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1621 
1622 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1623 		return sleep_with_inheritor_and_turnstile_type(event,
1624 		           inheritor,
1625 		           interruptible,
1626 		           deadline,
1627 		           type,
1628 		           ^{;},
1629 		           ^{lck_mtx_unlock(lock);});
1630 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1631 		return sleep_with_inheritor_and_turnstile_type(event,
1632 		           inheritor,
1633 		           interruptible,
1634 		           deadline,
1635 		           type,
1636 		           ^{lck_mtx_lock_spin(lock);},
1637 		           ^{lck_mtx_unlock(lock);});
1638 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1639 		return sleep_with_inheritor_and_turnstile_type(event,
1640 		           inheritor,
1641 		           interruptible,
1642 		           deadline,
1643 		           type,
1644 		           ^{lck_mtx_lock_spin_always(lock);},
1645 		           ^{lck_mtx_unlock(lock);});
1646 	} else {
1647 		return sleep_with_inheritor_and_turnstile_type(event,
1648 		           inheritor,
1649 		           interruptible,
1650 		           deadline,
1651 		           type,
1652 		           ^{lck_mtx_lock(lock);},
1653 		           ^{lck_mtx_unlock(lock);});
1654 	}
1655 }
1656 
1657 /*
1658  * Name: lck_spin_sleep_with_inheritor
1659  *
1660  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1661  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1662  *              be directed to the inheritor specified.
1663  *              An interruptible mode and deadline can be specified to return earlier from the wait.
1664  *
1665  * Args:
1666  *   Arg1: lck_spin_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1667  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1668  *   Arg3: event to wait on.
1669  *   Arg4: thread to propagate the event push to.
1670  *   Arg5: interruptible flag for wait.
1671  *   Arg6: deadline for wait.
1672  *
1673  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1674  *             Lock will be dropped while waiting.
1675  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1676  *             wakeup for the event is called.
1677  *
1678  * Returns: result of the wait.
1679  */
1680 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1681 lck_spin_sleep_with_inheritor(
1682 	lck_spin_t *lock,
1683 	lck_sleep_action_t lck_sleep_action,
1684 	event_t event,
1685 	thread_t inheritor,
1686 	wait_interrupt_t interruptible,
1687 	uint64_t deadline)
1688 {
1689 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1690 		return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1691 		           interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1692 		           ^{}, ^{ lck_spin_unlock(lock); });
1693 	} else {
1694 		return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1695 		           interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1696 		           ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1697 	}
1698 }
1699 
1700 /*
1701  * Name: lck_ticket_sleep_with_inheritor
1702  *
1703  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1704  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1705  *              be directed to the inheritor specified.
1706  *              An interruptible mode and deadline can be specified to return earlier from the wait.
1707  *
1708  * Args:
1709  *   Arg1: lck_ticket_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1710  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1711  *   Arg3: event to wait on.
1712  *   Arg4: thread to propagate the event push to.
1713  *   Arg5: interruptible flag for wait.
1714  *   Arg6: deadline for wait.
1715  *
1716  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1717  *             Lock will be dropped while waiting.
1718  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1719  *             wakeup for the event is called.
1720  *
1721  * Returns: result of the wait.
1722  */
1723 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1724 lck_ticket_sleep_with_inheritor(
1725 	lck_ticket_t *lock,
1726 	lck_grp_t *grp,
1727 	lck_sleep_action_t lck_sleep_action,
1728 	event_t event,
1729 	thread_t inheritor,
1730 	wait_interrupt_t interruptible,
1731 	uint64_t deadline)
1732 {
1733 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1734 		return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1735 		           interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1736 		           ^{}, ^{ lck_ticket_unlock(lock); });
1737 	} else {
1738 		return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1739 		           interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1740 		           ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1741 	}
1742 }
1743 
1744 /*
1745  * Name: lck_mtx_sleep_with_inheritor
1746  *
1747  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1748  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1749  *              be directed to the inheritor specified.
1750  *              An interruptible mode and deadline can be specified to return earlier from the wait.
1751  *
1752  * Args:
1753  *   Arg1: lck_mtx_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1754  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
1755  *   Arg3: event to wait on.
1756  *   Arg4: thread to propagate the event push to.
1757  *   Arg5: interruptible flag for wait.
1758  *   Arg6: deadline for wait.
1759  *
1760  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1761  *             Lock will be dropped while waiting.
1762  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1763  *             wakeup for the event is called.
1764  *
1765  * Returns: result of the wait.
1766  */
1767 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1768 lck_mtx_sleep_with_inheritor(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
1769 {
1770 	return lck_mtx_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1771 }
1772 
1773 /*
1774  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1775  */
1776 
1777 wait_result_t
lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline,turnstile_type_t type)1778 lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1779 {
1780 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1781 
1782 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1783 
1784 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1785 		return sleep_with_inheritor_and_turnstile_type(event,
1786 		           inheritor,
1787 		           interruptible,
1788 		           deadline,
1789 		           type,
1790 		           ^{;},
1791 		           ^{lck_rw_type = lck_rw_done(lock);});
1792 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1793 		return sleep_with_inheritor_and_turnstile_type(event,
1794 		           inheritor,
1795 		           interruptible,
1796 		           deadline,
1797 		           type,
1798 		           ^{lck_rw_lock(lock, lck_rw_type);},
1799 		           ^{lck_rw_type = lck_rw_done(lock);});
1800 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1801 		return sleep_with_inheritor_and_turnstile_type(event,
1802 		           inheritor,
1803 		           interruptible,
1804 		           deadline,
1805 		           type,
1806 		           ^{lck_rw_lock_exclusive(lock);},
1807 		           ^{lck_rw_type = lck_rw_done(lock);});
1808 	} else {
1809 		return sleep_with_inheritor_and_turnstile_type(event,
1810 		           inheritor,
1811 		           interruptible,
1812 		           deadline,
1813 		           type,
1814 		           ^{lck_rw_lock_shared(lock);},
1815 		           ^{lck_rw_type = lck_rw_done(lock);});
1816 	}
1817 }
1818 
1819 /*
1820  * Name: lck_rw_sleep_with_inheritor
1821  *
1822  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1823  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1824  *              be directed to the inheritor specified.
1825  *              An interruptible mode and deadline can be specified to return earlier from the wait.
1826  *
1827  * Args:
1828  *   Arg1: lck_rw_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1829  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
1830  *   Arg3: event to wait on.
1831  *   Arg4: thread to propagate the event push to.
1832  *   Arg5: interruptible flag for wait.
1833  *   Arg6: deadline for wait.
1834  *
1835  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1836  *             Lock will be dropped while waiting.
1837  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1838  *             wakeup for the event is called.
1839  *
1840  * Returns: result of the wait.
1841  */
1842 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1843 lck_rw_sleep_with_inheritor(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
1844 {
1845 	return lck_rw_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1846 }
1847 
1848 /*
1849  * wakeup_with_inheritor functions are independent from the locking primitive.
1850  */
1851 
1852 /*
1853  * Name: wakeup_one_with_inheritor
1854  *
1855  * Description: wake up one waiter for event if any. The thread woken up will be the one with the higher sched priority waiting on event.
1856  *              The push for the event will be transferred from the last inheritor to the woken up thread if LCK_WAKE_DEFAULT is specified.
1857  *              If LCK_WAKE_DO_NOT_TRANSFER_PUSH is specified the push will not be transferred.
1858  *
1859  * Args:
1860  *   Arg1: event to wake from.
1861  *   Arg2: wait result to pass to the woken up thread.
1862  *   Arg3: wake flag. LCK_WAKE_DEFAULT or LCK_WAKE_DO_NOT_TRANSFER_PUSH.
1863  *   Arg4: pointer for storing the thread wokenup.
1864  *
1865  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1866  *
1867  * Conditions: The new inheritor wokenup cannot run in user space until another inheritor is specified for the event or a
1868  *             wakeup for the event is called.
1869  *             A reference for the wokenup thread is acquired.
1870  *             NOTE: this cannot be called from interrupt context.
1871  */
1872 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1873 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1874 {
1875 	return wakeup_with_inheritor_and_turnstile_type(event,
1876 	           TURNSTILE_SLEEP_INHERITOR,
1877 	           result,
1878 	           TRUE,
1879 	           action,
1880 	           thread_wokenup);
1881 }
1882 
1883 /*
1884  * Name: wakeup_all_with_inheritor
1885  *
1886  * Description: wake up all waiters waiting for event. The old inheritor will lose the push.
1887  *
1888  * Args:
1889  *   Arg1: event to wake from.
1890  *   Arg2: wait result to pass to the woken up threads.
1891  *
1892  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1893  *
1894  * Conditions: NOTE: this cannot be called from interrupt context.
1895  */
1896 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1897 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1898 {
1899 	return wakeup_with_inheritor_and_turnstile_type(event,
1900 	           TURNSTILE_SLEEP_INHERITOR,
1901 	           result,
1902 	           FALSE,
1903 	           0,
1904 	           NULL);
1905 }
1906 
1907 /*
1908  * change_sleep_inheritor is independent from the locking primitive.
1909  */
1910 
1911 /*
1912  * Name: change_sleep_inheritor
1913  *
1914  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1915  *
1916  * Args:
1917  *   Arg1: event to redirect the push.
1918  *   Arg2: new inheritor for event.
1919  *
1920  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1921  *
1922  * Conditions: In case of success, the new inheritor cannot run in user space until another inheritor is specified for the event or a
1923  *             wakeup for the event is called.
1924  *             NOTE: this cannot be called from interrupt context.
1925  */
1926 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1927 change_sleep_inheritor(event_t event, thread_t inheritor)
1928 {
1929 	return change_sleep_inheritor_and_turnstile_type(event,
1930 	           inheritor,
1931 	           TURNSTILE_SLEEP_INHERITOR);
1932 }
1933 
1934 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1935 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1936 {
1937 	assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1938 	assert(waitq_type(waitq) == WQT_TURNSTILE);
1939 	waitinfo->owner = 0;
1940 	waitinfo->context = 0;
1941 
1942 	if (waitq_held(waitq)) {
1943 		return;
1944 	}
1945 
1946 	struct turnstile *turnstile = waitq_to_turnstile(waitq);
1947 	assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1948 	waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1949 }
1950 
1951 #define GATE_TYPE        3
1952 #define GATE_ILOCK_BIT   0
1953 #define GATE_WAITERS_BIT 1
1954 
1955 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1956 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1957 
1958 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1959 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1960 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1961 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1962 #define ordered_store_gate(gate, value)  os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1963 
1964 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1965 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1966 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1967 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1968 
1969 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1970 
1971 #define GATE_EVENT(gate)     ((event_t) gate)
1972 #define EVENT_TO_GATE(event) ((gate_t *) event)
1973 
1974 typedef void (*void_func_void)(void);
1975 
1976 __abortlike
1977 static void
gate_verify_tag_panic(gate_t * gate)1978 gate_verify_tag_panic(gate_t *gate)
1979 {
1980 	panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1981 }
1982 
1983 __abortlike
1984 static void
gate_verify_destroy_panic(gate_t * gate)1985 gate_verify_destroy_panic(gate_t *gate)
1986 {
1987 	panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1988 }
1989 
1990 static void
gate_verify(gate_t * gate)1991 gate_verify(gate_t *gate)
1992 {
1993 	if (gate->gt_type != GATE_TYPE) {
1994 		gate_verify_tag_panic(gate);
1995 	}
1996 	if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
1997 		gate_verify_destroy_panic(gate);
1998 	}
1999 
2000 	assert(gate->gt_refs > 0);
2001 }
2002 
2003 __abortlike
2004 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)2005 gate_already_owned_panic(gate_t *gate, thread_t holder)
2006 {
2007 	panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2008 }
2009 
2010 static kern_return_t
gate_try_close(gate_t * gate)2011 gate_try_close(gate_t *gate)
2012 {
2013 	uintptr_t state;
2014 	thread_t holder;
2015 	kern_return_t ret;
2016 	thread_t thread = current_thread();
2017 
2018 	gate_verify(gate);
2019 
2020 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2021 		return KERN_SUCCESS;
2022 	}
2023 
2024 	gate_ilock(gate);
2025 	state = ordered_load_gate(gate);
2026 	holder = GATE_STATE_TO_THREAD(state);
2027 
2028 	if (holder == NULL) {
2029 		assert(gate_has_waiter_bit(state) == FALSE);
2030 
2031 		state = GATE_THREAD_TO_STATE(current_thread());
2032 		state |= GATE_ILOCK;
2033 		ordered_store_gate(gate, state);
2034 		ret = KERN_SUCCESS;
2035 	} else {
2036 		if (holder == current_thread()) {
2037 			gate_already_owned_panic(gate, holder);
2038 		}
2039 		ret = KERN_FAILURE;
2040 	}
2041 
2042 	gate_iunlock(gate);
2043 	return ret;
2044 }
2045 
2046 static void
gate_close(gate_t * gate)2047 gate_close(gate_t* gate)
2048 {
2049 	uintptr_t state;
2050 	thread_t holder;
2051 	thread_t thread = current_thread();
2052 
2053 	gate_verify(gate);
2054 
2055 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2056 		return;
2057 	}
2058 
2059 	gate_ilock(gate);
2060 	state = ordered_load_gate(gate);
2061 	holder = GATE_STATE_TO_THREAD(state);
2062 
2063 	if (holder != NULL) {
2064 		gate_already_owned_panic(gate, holder);
2065 	}
2066 
2067 	assert(gate_has_waiter_bit(state) == FALSE);
2068 
2069 	state = GATE_THREAD_TO_STATE(thread);
2070 	state |= GATE_ILOCK;
2071 	ordered_store_gate(gate, state);
2072 
2073 	gate_iunlock(gate);
2074 }
2075 
2076 static void
gate_open_turnstile(gate_t * gate)2077 gate_open_turnstile(gate_t *gate)
2078 {
2079 	struct turnstile *ts = NULL;
2080 
2081 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2082 	waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2083 	turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2084 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2085 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2086 	/*
2087 	 * We can do the cleanup while holding the interlock.
2088 	 * It is ok because:
2089 	 * 1. current_thread is the previous inheritor and it is running
2090 	 * 2. new inheritor is NULL.
2091 	 * => No chain of turnstiles needs to be updated.
2092 	 */
2093 	turnstile_cleanup();
2094 }
2095 
2096 __abortlike
2097 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2098 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2099 {
2100 	if (open) {
2101 		panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2102 	} else {
2103 		panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2104 	}
2105 }
2106 
2107 static void
gate_open(gate_t * gate)2108 gate_open(gate_t *gate)
2109 {
2110 	uintptr_t state;
2111 	thread_t holder;
2112 	bool waiters;
2113 	thread_t thread = current_thread();
2114 
2115 	gate_verify(gate);
2116 	if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2117 		return;
2118 	}
2119 
2120 	gate_ilock(gate);
2121 	state = ordered_load_gate(gate);
2122 	holder = GATE_STATE_TO_THREAD(state);
2123 	waiters = gate_has_waiter_bit(state);
2124 
2125 	if (holder != thread) {
2126 		gate_not_owned_panic(gate, holder, true);
2127 	}
2128 
2129 	if (waiters) {
2130 		gate_open_turnstile(gate);
2131 	}
2132 
2133 	state = GATE_ILOCK;
2134 	ordered_store_gate(gate, state);
2135 
2136 	gate_iunlock(gate);
2137 }
2138 
2139 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2140 gate_handoff_turnstile(gate_t *gate,
2141     int flags,
2142     thread_t *thread_woken_up,
2143     bool *waiters)
2144 {
2145 	struct turnstile *ts = NULL;
2146 	kern_return_t ret = KERN_FAILURE;
2147 	thread_t hp_thread;
2148 
2149 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2150 	/*
2151 	 * Wake up the higest priority thread waiting on the gate
2152 	 */
2153 	hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
2154 
2155 	if (hp_thread != NULL) {
2156 		/*
2157 		 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2158 		 */
2159 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2160 		*thread_woken_up = hp_thread;
2161 		*waiters = turnstile_has_waiters(ts);
2162 		/*
2163 		 * Note: hp_thread is the new holder and the new inheritor.
2164 		 * In case there are no more waiters, it doesn't need to be the inheritor
2165 		 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2166 		 * handoff can go through the fast path.
2167 		 * We could set the inheritor to NULL here, or the new holder itself can set it
2168 		 * on its way back from the sleep. In the latter case there are more chanses that
2169 		 * new waiters will come by, avoiding to do the opearation at all.
2170 		 */
2171 		ret = KERN_SUCCESS;
2172 	} else {
2173 		/*
2174 		 * waiters can have been woken up by an interrupt and still not
2175 		 * have updated gate->waiters, so we couldn't find them on the waitq.
2176 		 * Update the inheritor to NULL here, so that the current thread can return to userspace
2177 		 * indipendently from when the interrupted waiters will finish the wait.
2178 		 */
2179 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2180 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2181 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2182 		}
2183 		// there are no waiters.
2184 		ret = KERN_NOT_WAITING;
2185 	}
2186 
2187 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2188 
2189 	/*
2190 	 * We can do the cleanup while holding the interlock.
2191 	 * It is ok because:
2192 	 * 1. current_thread is the previous inheritor and it is running
2193 	 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2194 	 *    of the gate before trying to sleep.
2195 	 * => No chain of turnstiles needs to be updated.
2196 	 */
2197 	turnstile_cleanup();
2198 
2199 	return ret;
2200 }
2201 
2202 static kern_return_t
gate_handoff(gate_t * gate,int flags)2203 gate_handoff(gate_t *gate,
2204     int flags)
2205 {
2206 	kern_return_t ret;
2207 	thread_t new_holder = NULL;
2208 	uintptr_t state;
2209 	thread_t holder;
2210 	bool waiters;
2211 	thread_t thread = current_thread();
2212 
2213 	assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2214 	gate_verify(gate);
2215 
2216 	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2217 		if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2218 			//gate opened but there were no waiters, so return KERN_NOT_WAITING.
2219 			return KERN_NOT_WAITING;
2220 		}
2221 	}
2222 
2223 	gate_ilock(gate);
2224 	state = ordered_load_gate(gate);
2225 	holder = GATE_STATE_TO_THREAD(state);
2226 	waiters = gate_has_waiter_bit(state);
2227 
2228 	if (holder != current_thread()) {
2229 		gate_not_owned_panic(gate, holder, false);
2230 	}
2231 
2232 	if (waiters) {
2233 		ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2234 		if (ret == KERN_SUCCESS) {
2235 			state = GATE_THREAD_TO_STATE(new_holder);
2236 			if (waiters) {
2237 				state |= GATE_WAITERS;
2238 			}
2239 		} else {
2240 			if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2241 				state = 0;
2242 			}
2243 		}
2244 	} else {
2245 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2246 			state = 0;
2247 		}
2248 		ret = KERN_NOT_WAITING;
2249 	}
2250 	state |= GATE_ILOCK;
2251 	ordered_store_gate(gate, state);
2252 
2253 	gate_iunlock(gate);
2254 
2255 	if (new_holder) {
2256 		thread_deallocate(new_holder);
2257 	}
2258 	return ret;
2259 }
2260 
2261 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2262 gate_steal_turnstile(gate_t *gate,
2263     thread_t new_inheritor)
2264 {
2265 	struct turnstile *ts = NULL;
2266 
2267 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2268 
2269 	turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2270 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2271 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2272 
2273 	/*
2274 	 * turnstile_cleanup might need to update the chain of the old holder.
2275 	 * This operation should happen without the turnstile interlock held.
2276 	 */
2277 	return turnstile_cleanup;
2278 }
2279 
2280 __abortlike
2281 static void
gate_not_closed_panic(gate_t * gate,bool wait)2282 gate_not_closed_panic(gate_t *gate, bool wait)
2283 {
2284 	if (wait) {
2285 		panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2286 	} else {
2287 		panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2288 	}
2289 }
2290 
2291 static void
gate_steal(gate_t * gate)2292 gate_steal(gate_t *gate)
2293 {
2294 	uintptr_t state;
2295 	thread_t holder;
2296 	thread_t thread = current_thread();
2297 	bool waiters;
2298 
2299 	void_func_void func_after_interlock_unlock;
2300 
2301 	gate_verify(gate);
2302 
2303 	gate_ilock(gate);
2304 	state = ordered_load_gate(gate);
2305 	holder = GATE_STATE_TO_THREAD(state);
2306 	waiters = gate_has_waiter_bit(state);
2307 
2308 	if (holder == NULL) {
2309 		gate_not_closed_panic(gate, false);
2310 	}
2311 
2312 	state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2313 	if (waiters) {
2314 		state |= GATE_WAITERS;
2315 		ordered_store_gate(gate, state);
2316 		func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2317 		gate_iunlock(gate);
2318 
2319 		func_after_interlock_unlock();
2320 	} else {
2321 		ordered_store_gate(gate, state);
2322 		gate_iunlock(gate);
2323 	}
2324 }
2325 
2326 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2327 gate_wait_turnstile(gate_t *gate,
2328     wait_interrupt_t interruptible,
2329     uint64_t deadline,
2330     thread_t holder,
2331     wait_result_t* wait,
2332     bool* waiters)
2333 {
2334 	struct turnstile *ts;
2335 	uintptr_t state;
2336 
2337 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2338 
2339 	turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2340 	waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2341 
2342 	gate_iunlock(gate);
2343 
2344 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2345 
2346 	*wait = thread_block(THREAD_CONTINUE_NULL);
2347 
2348 	gate_ilock(gate);
2349 
2350 	*waiters = turnstile_has_waiters(ts);
2351 
2352 	if (!*waiters) {
2353 		/*
2354 		 * We want to enable the fast path as soon as we see that there are no more waiters.
2355 		 * On the fast path the holder will not do any turnstile operations.
2356 		 * Set the inheritor as NULL here.
2357 		 *
2358 		 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2359 		 * already been set to NULL.
2360 		 */
2361 		state = ordered_load_gate(gate);
2362 		holder = GATE_STATE_TO_THREAD(state);
2363 		if (holder &&
2364 		    ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2365 		    holder == current_thread())) {     // thread was woken up and it is the new holder
2366 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2367 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2368 		}
2369 	}
2370 
2371 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2372 
2373 	/*
2374 	 * turnstile_cleanup might need to update the chain of the old holder.
2375 	 * This operation should happen without the turnstile primitive interlock held.
2376 	 */
2377 	return turnstile_cleanup;
2378 }
2379 
2380 static void
gate_free_internal(gate_t * gate)2381 gate_free_internal(gate_t *gate)
2382 {
2383 	zfree(KT_GATE, gate);
2384 }
2385 
2386 __abortlike
2387 static void
gate_too_many_refs_panic(gate_t * gate)2388 gate_too_many_refs_panic(gate_t *gate)
2389 {
2390 	panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2391 }
2392 
2393 static gate_wait_result_t
2394 gate_wait(gate_t* gate,
2395     wait_interrupt_t interruptible,
2396     uint64_t deadline,
2397     void (^primitive_unlock)(void),
2398     void (^primitive_lock)(void))
2399 {
2400 	gate_wait_result_t ret;
2401 	void_func_void func_after_interlock_unlock;
2402 	wait_result_t wait_result;
2403 	uintptr_t state;
2404 	thread_t holder;
2405 	bool waiters;
2406 
2407 	gate_verify(gate);
2408 
2409 	gate_ilock(gate);
2410 	state = ordered_load_gate(gate);
2411 	holder = GATE_STATE_TO_THREAD(state);
2412 
2413 	if (holder == NULL) {
2414 		gate_not_closed_panic(gate, true);
2415 	}
2416 
2417 	/*
2418 	 * Get a ref on the gate so it will not
2419 	 * be freed while we are coming back from the sleep.
2420 	 */
2421 	if (gate->gt_refs == UINT16_MAX) {
2422 		gate_too_many_refs_panic(gate);
2423 	}
2424 	gate->gt_refs++;
2425 	state |= GATE_WAITERS;
2426 	ordered_store_gate(gate, state);
2427 
2428 	/*
2429 	 * Release the primitive lock before any
2430 	 * turnstile operation. Turnstile
2431 	 * does not support a blocking primitive as
2432 	 * interlock.
2433 	 *
2434 	 * In this way, concurrent threads will be
2435 	 * able to acquire the primitive lock
2436 	 * but still will wait for me through the
2437 	 * gate interlock.
2438 	 */
2439 	primitive_unlock();
2440 
2441 	func_after_interlock_unlock = gate_wait_turnstile(    gate,
2442 	    interruptible,
2443 	    deadline,
2444 	    holder,
2445 	    &wait_result,
2446 	    &waiters);
2447 
2448 	state = ordered_load_gate(gate);
2449 	holder = GATE_STATE_TO_THREAD(state);
2450 
2451 	switch (wait_result) {
2452 	case THREAD_INTERRUPTED:
2453 	case THREAD_TIMED_OUT:
2454 		assert(holder != current_thread());
2455 
2456 		if (waiters) {
2457 			state |= GATE_WAITERS;
2458 		} else {
2459 			state &= ~GATE_WAITERS;
2460 		}
2461 		ordered_store_gate(gate, state);
2462 
2463 		if (wait_result == THREAD_INTERRUPTED) {
2464 			ret = GATE_INTERRUPTED;
2465 		} else {
2466 			ret = GATE_TIMED_OUT;
2467 		}
2468 		break;
2469 	default:
2470 		/*
2471 		 * Note it is possible that even if the gate was handed off to
2472 		 * me, someone called gate_steal() before I woke up.
2473 		 *
2474 		 * As well as it is possible that the gate was opened, but someone
2475 		 * closed it while I was waking up.
2476 		 *
2477 		 * In both cases we return GATE_OPENED, as the gate was opened to me
2478 		 * at one point, it is the caller responsibility to check again if
2479 		 * the gate is open.
2480 		 */
2481 		if (holder == current_thread()) {
2482 			ret = GATE_HANDOFF;
2483 		} else {
2484 			ret = GATE_OPENED;
2485 		}
2486 		break;
2487 	}
2488 
2489 	assert(gate->gt_refs > 0);
2490 	uint32_t ref = --gate->gt_refs;
2491 	bool to_free = gate->gt_alloc;
2492 	gate_iunlock(gate);
2493 
2494 	if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2495 		if (to_free == true) {
2496 			assert(!waiters);
2497 			if (ref == 0) {
2498 				gate_free_internal(gate);
2499 			}
2500 			ret = GATE_OPENED;
2501 		} else {
2502 			gate_verify_destroy_panic(gate);
2503 		}
2504 	}
2505 
2506 	/*
2507 	 * turnstile func that needs to be executed without
2508 	 * holding the primitive interlock
2509 	 */
2510 	func_after_interlock_unlock();
2511 
2512 	primitive_lock();
2513 
2514 	return ret;
2515 }
2516 
2517 static void
gate_assert(gate_t * gate,int flags)2518 gate_assert(gate_t *gate, int flags)
2519 {
2520 	uintptr_t state;
2521 	thread_t holder;
2522 
2523 	gate_verify(gate);
2524 
2525 	gate_ilock(gate);
2526 	state = ordered_load_gate(gate);
2527 	holder = GATE_STATE_TO_THREAD(state);
2528 
2529 	switch (flags) {
2530 	case GATE_ASSERT_CLOSED:
2531 		assert(holder != NULL);
2532 		break;
2533 	case GATE_ASSERT_OPEN:
2534 		assert(holder == NULL);
2535 		break;
2536 	case GATE_ASSERT_HELD:
2537 		assert(holder == current_thread());
2538 		break;
2539 	default:
2540 		panic("invalid %s flag %d", __func__, flags);
2541 	}
2542 
2543 	gate_iunlock(gate);
2544 }
2545 
2546 enum {
2547 	GT_INIT_DEFAULT = 0,
2548 	GT_INIT_ALLOC
2549 };
2550 
2551 static void
gate_init(gate_t * gate,uint type)2552 gate_init(gate_t *gate, uint type)
2553 {
2554 	bzero(gate, sizeof(gate_t));
2555 
2556 	gate->gt_data = 0;
2557 	gate->gt_turnstile = NULL;
2558 	gate->gt_refs = 1;
2559 	switch (type) {
2560 	case GT_INIT_ALLOC:
2561 		gate->gt_alloc = 1;
2562 		break;
2563 	default:
2564 		gate->gt_alloc = 0;
2565 		break;
2566 	}
2567 	gate->gt_type = GATE_TYPE;
2568 	gate->gt_flags_pad = 0;
2569 }
2570 
2571 static gate_t*
gate_alloc_init(void)2572 gate_alloc_init(void)
2573 {
2574 	gate_t *gate;
2575 	gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2576 	gate_init(gate, GT_INIT_ALLOC);
2577 	return gate;
2578 }
2579 
2580 __abortlike
2581 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2582 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2583 {
2584 	panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2585 }
2586 
2587 __abortlike
2588 static void
gate_destroy_waiter_panic(gate_t * gate)2589 gate_destroy_waiter_panic(gate_t *gate)
2590 {
2591 	panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2592 }
2593 
2594 static uint16_t
gate_destroy_internal(gate_t * gate)2595 gate_destroy_internal(gate_t *gate)
2596 {
2597 	uintptr_t state;
2598 	thread_t holder;
2599 	uint16_t ref;
2600 
2601 	gate_ilock(gate);
2602 	state = ordered_load_gate(gate);
2603 	holder = GATE_STATE_TO_THREAD(state);
2604 
2605 	/*
2606 	 * The gate must be open
2607 	 * and all the threads must
2608 	 * have been woken up by this time
2609 	 */
2610 	if (holder != NULL) {
2611 		gate_destroy_owned_panic(gate, holder);
2612 	}
2613 	if (gate_has_waiter_bit(state)) {
2614 		gate_destroy_waiter_panic(gate);
2615 	}
2616 
2617 	assert(gate->gt_refs > 0);
2618 
2619 	ref = --gate->gt_refs;
2620 
2621 	/*
2622 	 * Mark the gate as destroyed.
2623 	 * The interlock bit still need
2624 	 * to be available to let the
2625 	 * last wokenup threads to clear
2626 	 * the wait.
2627 	 */
2628 	state = GATE_DESTROYED;
2629 	state |= GATE_ILOCK;
2630 	ordered_store_gate(gate, state);
2631 	gate_iunlock(gate);
2632 	return ref;
2633 }
2634 
2635 __abortlike
2636 static void
gate_destroy_panic(gate_t * gate)2637 gate_destroy_panic(gate_t *gate)
2638 {
2639 	panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2640 }
2641 
2642 static void
gate_destroy(gate_t * gate)2643 gate_destroy(gate_t *gate)
2644 {
2645 	gate_verify(gate);
2646 	if (gate->gt_alloc == 1) {
2647 		gate_destroy_panic(gate);
2648 	}
2649 	gate_destroy_internal(gate);
2650 }
2651 
2652 __abortlike
2653 static void
gate_free_panic(gate_t * gate)2654 gate_free_panic(gate_t *gate)
2655 {
2656 	panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2657 }
2658 
2659 static void
gate_free(gate_t * gate)2660 gate_free(gate_t *gate)
2661 {
2662 	uint16_t ref;
2663 
2664 	gate_verify(gate);
2665 
2666 	if (gate->gt_alloc == 0) {
2667 		gate_free_panic(gate);
2668 	}
2669 
2670 	ref = gate_destroy_internal(gate);
2671 	/*
2672 	 * Some of the threads waiting on the gate
2673 	 * might still need to run after being woken up.
2674 	 * They will access the gate to cleanup the
2675 	 * state, so we cannot free it.
2676 	 * The last waiter will free the gate in this case.
2677 	 */
2678 	if (ref == 0) {
2679 		gate_free_internal(gate);
2680 	}
2681 }
2682 
2683 /*
2684  * Name: lck_rw_gate_init
2685  *
2686  * Description: initializes a variable declared with decl_lck_rw_gate_data.
2687  *
2688  * Args:
2689  *   Arg1: lck_rw_t lock used to protect the gate.
2690  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2691  */
2692 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2693 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2694 {
2695 	(void) lock;
2696 	gate_init(gate, GT_INIT_DEFAULT);
2697 }
2698 
2699 /*
2700  * Name: lck_rw_gate_alloc_init
2701  *
2702  * Description: allocates and initializes a gate_t.
2703  *
2704  * Args:
2705  *   Arg1: lck_rw_t lock used to protect the gate.
2706  *
2707  * Returns:
2708  *         gate_t allocated.
2709  */
2710 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2711 lck_rw_gate_alloc_init(lck_rw_t *lock)
2712 {
2713 	(void) lock;
2714 	return gate_alloc_init();
2715 }
2716 
2717 /*
2718  * Name: lck_rw_gate_destroy
2719  *
2720  * Description: destroys a variable previously initialized
2721  *              with lck_rw_gate_init().
2722  *
2723  * Args:
2724  *   Arg1: lck_rw_t lock used to protect the gate.
2725  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2726  */
2727 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2728 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2729 {
2730 	(void) lock;
2731 	gate_destroy(gate);
2732 }
2733 
2734 /*
2735  * Name: lck_rw_gate_free
2736  *
2737  * Description: destroys and tries to free a gate previously allocated
2738  *              with lck_rw_gate_alloc_init().
2739  *              The gate free might be delegated to the last thread returning
2740  *              from the gate_wait().
2741  *
2742  * Args:
2743  *   Arg1: lck_rw_t lock used to protect the gate.
2744  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2745  */
2746 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2747 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2748 {
2749 	(void) lock;
2750 	gate_free(gate);
2751 }
2752 
2753 /*
2754  * Name: lck_rw_gate_try_close
2755  *
2756  * Description: Tries to close the gate.
2757  *              In case of success the current thread will be set as
2758  *              the holder of the gate.
2759  *
2760  * Args:
2761  *   Arg1: lck_rw_t lock used to protect the gate.
2762  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2763  *
2764  * Conditions: Lock must be held. Returns with the lock held.
2765  *
2766  * Returns:
2767  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2768  *          of the gate.
2769  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2770  *          to wake up possible waiters on the gate before returning to userspace.
2771  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2772  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2773  *
2774  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2775  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2776  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2777  *          be done without dropping the lock that is protecting the gate in between.
2778  */
2779 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2780 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2781 {
2782 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2783 
2784 	return gate_try_close(gate);
2785 }
2786 
2787 /*
2788  * Name: lck_rw_gate_close
2789  *
2790  * Description: Closes the gate. The current thread will be set as
2791  *              the holder of the gate. Will panic if the gate is already closed.
2792  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2793  *              to wake up possible waiters on the gate before returning to userspace.
2794  *
2795  * Args:
2796  *   Arg1: lck_rw_t lock used to protect the gate.
2797  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2798  *
2799  * Conditions: Lock must be held. Returns with the lock held.
2800  *             The gate must be open.
2801  *
2802  */
2803 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2804 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2805 {
2806 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2807 
2808 	return gate_close(gate);
2809 }
2810 
2811 /*
2812  * Name: lck_rw_gate_open
2813  *
2814  * Description: Opens the gate and wakes up possible waiters.
2815  *
2816  * Args:
2817  *   Arg1: lck_rw_t lock used to protect the gate.
2818  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2819  *
2820  * Conditions: Lock must be held. Returns with the lock held.
2821  *             The current thread must be the holder of the gate.
2822  *
2823  */
2824 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2825 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2826 {
2827 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2828 
2829 	gate_open(gate);
2830 }
2831 
2832 /*
2833  * Name: lck_rw_gate_handoff
2834  *
2835  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2836  *              priority will be selected as the new holder of the gate, and woken up,
2837  *              with the gate remaining in the closed state throughout.
2838  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2839  *              will be returned.
2840  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2841  *              case no waiters were found.
2842  *
2843  *
2844  * Args:
2845  *   Arg1: lck_rw_t lock used to protect the gate.
2846  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2847  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2848  *
2849  * Conditions: Lock must be held. Returns with the lock held.
2850  *             The current thread must be the holder of the gate.
2851  *
2852  * Returns:
2853  *          KERN_SUCCESS in case one of the waiters became the new holder.
2854  *          KERN_NOT_WAITING in case there were no waiters.
2855  *
2856  */
2857 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2858 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2859 {
2860 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2861 
2862 	return gate_handoff(gate, flags);
2863 }
2864 
2865 /*
2866  * Name: lck_rw_gate_steal
2867  *
2868  * Description: Set the current ownership of the gate. It sets the current thread as the
2869  *              new holder of the gate.
2870  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2871  *              to wake up possible waiters on the gate before returning to userspace.
2872  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2873  *              anymore.
2874  *
2875  *
2876  * Args:
2877  *   Arg1: lck_rw_t lock used to protect the gate.
2878  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2879  *
2880  * Conditions: Lock must be held. Returns with the lock held.
2881  *             The gate must be closed and the current thread must not already be the holder.
2882  *
2883  */
2884 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2885 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2886 {
2887 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2888 
2889 	gate_steal(gate);
2890 }
2891 
2892 /*
2893  * Name: lck_rw_gate_wait
2894  *
2895  * Description: Waits for the current thread to become the holder of the gate or for the
2896  *              gate to become open. An interruptible mode and deadline can be specified
2897  *              to return earlier from the wait.
2898  *
2899  * Args:
2900  *   Arg1: lck_rw_t lock used to protect the gate.
2901  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2902  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2903  *   Arg3: interruptible flag for wait.
2904  *   Arg4: deadline
2905  *
2906  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2907  *             Lock will be dropped while waiting.
2908  *             The gate must be closed.
2909  *
2910  * Returns: Reason why the thread was woken up.
2911  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2912  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2913  *                         to wake up possible waiters on the gate before returning to userspace.
2914  *          GATE_OPENED - the gate was opened by the holder.
2915  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
2916  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
2917  */
2918 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2919 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2920 {
2921 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2922 
2923 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2924 
2925 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2926 		return gate_wait(gate,
2927 		           interruptible,
2928 		           deadline,
2929 		           ^{lck_rw_type = lck_rw_done(lock);},
2930 		           ^{;});
2931 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2932 		return gate_wait(gate,
2933 		           interruptible,
2934 		           deadline,
2935 		           ^{lck_rw_type = lck_rw_done(lock);},
2936 		           ^{lck_rw_lock(lock, lck_rw_type);});
2937 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2938 		return gate_wait(gate,
2939 		           interruptible,
2940 		           deadline,
2941 		           ^{lck_rw_type = lck_rw_done(lock);},
2942 		           ^{lck_rw_lock_exclusive(lock);});
2943 	} else {
2944 		return gate_wait(gate,
2945 		           interruptible,
2946 		           deadline,
2947 		           ^{lck_rw_type = lck_rw_done(lock);},
2948 		           ^{lck_rw_lock_shared(lock);});
2949 	}
2950 }
2951 
2952 /*
2953  * Name: lck_rw_gate_assert
2954  *
2955  * Description: asserts that the gate is in the specified state.
2956  *
2957  * Args:
2958  *   Arg1: lck_rw_t lock used to protect the gate.
2959  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2960  *   Arg3: flags to specified assert type.
2961  *         GATE_ASSERT_CLOSED - the gate is currently closed
2962  *         GATE_ASSERT_OPEN - the gate is currently opened
2963  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2964  */
2965 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2966 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2967 {
2968 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2969 
2970 	gate_assert(gate, flags);
2971 	return;
2972 }
2973 
2974 /*
2975  * Name: lck_mtx_gate_init
2976  *
2977  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2978  *
2979  * Args:
2980  *   Arg1: lck_mtx_t lock used to protect the gate.
2981  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2982  */
2983 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)2984 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2985 {
2986 	(void) lock;
2987 	gate_init(gate, GT_INIT_DEFAULT);
2988 }
2989 
2990 /*
2991  * Name: lck_mtx_gate_alloc_init
2992  *
2993  * Description: allocates and initializes a gate_t.
2994  *
2995  * Args:
2996  *   Arg1: lck_mtx_t lock used to protect the gate.
2997  *
2998  * Returns:
2999  *         gate_t allocated.
3000  */
3001 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)3002 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3003 {
3004 	(void) lock;
3005 	return gate_alloc_init();
3006 }
3007 
3008 /*
3009  * Name: lck_mtx_gate_destroy
3010  *
3011  * Description: destroys a variable previously initialized
3012  *              with lck_mtx_gate_init().
3013  *
3014  * Args:
3015  *   Arg1: lck_mtx_t lock used to protect the gate.
3016  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3017  */
3018 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)3019 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3020 {
3021 	(void) lock;
3022 	gate_destroy(gate);
3023 }
3024 
3025 /*
3026  * Name: lck_mtx_gate_free
3027  *
3028  * Description: destroys and tries to free a gate previously allocated
3029  *	        with lck_mtx_gate_alloc_init().
3030  *              The gate free might be delegated to the last thread returning
3031  *              from the gate_wait().
3032  *
3033  * Args:
3034  *   Arg1: lck_mtx_t lock used to protect the gate.
3035  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3036  */
3037 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3038 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3039 {
3040 	(void) lock;
3041 	gate_free(gate);
3042 }
3043 
3044 /*
3045  * Name: lck_mtx_gate_try_close
3046  *
3047  * Description: Tries to close the gate.
3048  *              In case of success the current thread will be set as
3049  *              the holder of the gate.
3050  *
3051  * Args:
3052  *   Arg1: lck_mtx_t lock used to protect the gate.
3053  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3054  *
3055  * Conditions: Lock must be held. Returns with the lock held.
3056  *
3057  * Returns:
3058  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3059  *          of the gate.
3060  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3061  *          to wake up possible waiters on the gate before returning to userspace.
3062  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3063  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3064  *
3065  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3066  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3067  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3068  *          be done without dropping the lock that is protecting the gate in between.
3069  */
3070 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3071 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3072 {
3073 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3074 
3075 	return gate_try_close(gate);
3076 }
3077 
3078 /*
3079  * Name: lck_mtx_gate_close
3080  *
3081  * Description: Closes the gate. The current thread will be set as
3082  *              the holder of the gate. Will panic if the gate is already closed.
3083  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3084  *              to wake up possible waiters on the gate before returning to userspace.
3085  *
3086  * Args:
3087  *   Arg1: lck_mtx_t lock used to protect the gate.
3088  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3089  *
3090  * Conditions: Lock must be held. Returns with the lock held.
3091  *             The gate must be open.
3092  *
3093  */
3094 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3095 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3096 {
3097 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3098 
3099 	return gate_close(gate);
3100 }
3101 
3102 /*
3103  * Name: lck_mtx_gate_open
3104  *
3105  * Description: Opens of the gate and wakes up possible waiters.
3106  *
3107  * Args:
3108  *   Arg1: lck_mtx_t lock used to protect the gate.
3109  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3110  *
3111  * Conditions: Lock must be held. Returns with the lock held.
3112  *             The current thread must be the holder of the gate.
3113  *
3114  */
3115 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3116 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3117 {
3118 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3119 
3120 	gate_open(gate);
3121 }
3122 
3123 /*
3124  * Name: lck_mtx_gate_handoff
3125  *
3126  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3127  *              priority will be selected as the new holder of the gate, and woken up,
3128  *              with the gate remaining in the closed state throughout.
3129  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3130  *              will be returned.
3131  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3132  *              case no waiters were found.
3133  *
3134  *
3135  * Args:
3136  *   Arg1: lck_mtx_t lock used to protect the gate.
3137  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3138  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3139  *
3140  * Conditions: Lock must be held. Returns with the lock held.
3141  *             The current thread must be the holder of the gate.
3142  *
3143  * Returns:
3144  *          KERN_SUCCESS in case one of the waiters became the new holder.
3145  *          KERN_NOT_WAITING in case there were no waiters.
3146  *
3147  */
3148 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3149 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3150 {
3151 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3152 
3153 	return gate_handoff(gate, flags);
3154 }
3155 
3156 /*
3157  * Name: lck_mtx_gate_steal
3158  *
3159  * Description: Steals the ownership of the gate. It sets the current thread as the
3160  *              new holder of the gate.
3161  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3162  *              to wake up possible waiters on the gate before returning to userspace.
3163  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3164  *              anymore.
3165  *
3166  *
3167  * Args:
3168  *   Arg1: lck_mtx_t lock used to protect the gate.
3169  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3170  *
3171  * Conditions: Lock must be held. Returns with the lock held.
3172  *             The gate must be closed and the current thread must not already be the holder.
3173  *
3174  */
3175 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3176 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3177 {
3178 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3179 
3180 	gate_steal(gate);
3181 }
3182 
3183 /*
3184  * Name: lck_mtx_gate_wait
3185  *
3186  * Description: Waits for the current thread to become the holder of the gate or for the
3187  *              gate to become open. An interruptible mode and deadline can be specified
3188  *              to return earlier from the wait.
3189  *
3190  * Args:
3191  *   Arg1: lck_mtx_t lock used to protect the gate.
3192  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3193  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3194  *   Arg3: interruptible flag for wait.
3195  *   Arg4: deadline
3196  *
3197  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3198  *             Lock will be dropped while waiting.
3199  *             The gate must be closed.
3200  *
3201  * Returns: Reason why the thread was woken up.
3202  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3203  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3204  *                         to wake up possible waiters on the gate before returning to userspace.
3205  *          GATE_OPENED - the gate was opened by the holder.
3206  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3207  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3208  */
3209 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3210 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3211 {
3212 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3213 
3214 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3215 		return gate_wait(gate,
3216 		           interruptible,
3217 		           deadline,
3218 		           ^{lck_mtx_unlock(lock);},
3219 		           ^{;});
3220 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3221 		return gate_wait(gate,
3222 		           interruptible,
3223 		           deadline,
3224 		           ^{lck_mtx_unlock(lock);},
3225 		           ^{lck_mtx_lock_spin(lock);});
3226 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3227 		return gate_wait(gate,
3228 		           interruptible,
3229 		           deadline,
3230 		           ^{lck_mtx_unlock(lock);},
3231 		           ^{lck_mtx_lock_spin_always(lock);});
3232 	} else {
3233 		return gate_wait(gate,
3234 		           interruptible,
3235 		           deadline,
3236 		           ^{lck_mtx_unlock(lock);},
3237 		           ^{lck_mtx_lock(lock);});
3238 	}
3239 }
3240 
3241 /*
3242  * Name: lck_mtx_gate_assert
3243  *
3244  * Description: asserts that the gate is in the specified state.
3245  *
3246  * Args:
3247  *   Arg1: lck_mtx_t lock used to protect the gate.
3248  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3249  *   Arg3: flags to specified assert type.
3250  *         GATE_ASSERT_CLOSED - the gate is currently closed
3251  *         GATE_ASSERT_OPEN - the gate is currently opened
3252  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3253  */
3254 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3255 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3256 {
3257 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3258 
3259 	gate_assert(gate, flags);
3260 }
3261 
3262 #pragma mark - LCK_*_DECLARE support
3263 
3264 __startup_func
3265 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3266 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3267 {
3268 	lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3269 }
3270 
3271 __startup_func
3272 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3273 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3274 {
3275 	if (sp->lck_ext) {
3276 		lck_mtx_init_ext(sp->lck, sp->lck_ext, sp->lck_grp, sp->lck_attr);
3277 	} else {
3278 		lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3279 	}
3280 }
3281 
3282 __startup_func
3283 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3284 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3285 {
3286 	lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3287 }
3288 
3289 __startup_func
3290 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3291 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3292 {
3293 	simple_lock_init(sp->lck, sp->lck_init_arg);
3294 }
3295