xref: /xnu-8019.80.24/osfmk/kern/locks.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 
57 #define LOCK_PRIVATE 1
58 
59 #include <mach_ldebug.h>
60 #include <debug.h>
61 
62 #include <mach/kern_return.h>
63 #include <mach/mach_host_server.h>
64 #include <mach_debug/lockgroup_info.h>
65 
66 #include <kern/lock_stat.h>
67 #include <kern/locks.h>
68 #include <kern/misc_protos.h>
69 #include <kern/zalloc.h>
70 #include <kern/thread.h>
71 #include <kern/processor.h>
72 #include <kern/sched_prim.h>
73 #include <kern/debug.h>
74 #include <libkern/section_keywords.h>
75 #include <machine/atomic.h>
76 #include <machine/machine_cpu.h>
77 #include <string.h>
78 
79 #include <sys/kdebug.h>
80 
81 #define LCK_MTX_SLEEP_CODE              0
82 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
83 #define LCK_MTX_LCK_WAIT_CODE           2
84 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
85 
86 #if MACH_LDEBUG
87 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
88 #else
89 #define ALIGN_TEST(p, t) do{}while(0)
90 #endif
91 
92 #define NOINLINE                __attribute__((noinline))
93 
94 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
95 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
96 
97 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
98 
99 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
100 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
101 queue_head_t     lck_grp_queue;
102 unsigned int     lck_grp_cnt;
103 
104 decl_lck_mtx_data(, lck_grp_lock);
105 static lck_mtx_ext_t lck_grp_lock_ext;
106 
107 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
108 
109 /* Obtain "lcks" options:this currently controls lock statistics */
110 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
111 
112 KALLOC_TYPE_DEFINE(KT_LCK_GRP_ATTR, lck_grp_attr_t, KT_PRIV_ACCT);
113 
114 KALLOC_TYPE_DEFINE(KT_LCK_GRP, lck_grp_t, KT_PRIV_ACCT);
115 
116 KALLOC_TYPE_DEFINE(KT_LCK_ATTR, lck_attr_t, KT_PRIV_ACCT);
117 
118 lck_grp_t       LockCompatGroup;
119 SECURITY_READ_ONLY_LATE(lck_attr_t)      LockDefaultLckAttr;
120 
121 #if CONFIG_DTRACE
122 #if defined (__x86_64__)
123 uint32_t _Atomic dtrace_spin_threshold = 500; // 500ns
124 #define lock_enable_preemption enable_preemption
125 #elif defined(__arm__) || defined(__arm64__)
126 MACHINE_TIMEOUT32(dtrace_spin_threshold, "dtrace-spin-threshold",
127     0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
128 #endif
129 #endif
130 
131 uintptr_t
unslide_for_kdebug(void * object)132 unslide_for_kdebug(void* object)
133 {
134 	if (__improbable(kdebug_enable)) {
135 		return VM_KERNEL_UNSLIDE_OR_PERM(object);
136 	} else {
137 		return 0;
138 	}
139 }
140 
141 __startup_func
142 static void
lck_mod_init(void)143 lck_mod_init(void)
144 {
145 	queue_init(&lck_grp_queue);
146 
147 	/*
148 	 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
149 	 * grabbing the lck_grp_lock before it is initialized.
150 	 */
151 
152 	bzero(&LockCompatGroup, sizeof(lck_grp_t));
153 	(void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
154 
155 	LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
156 
157 	if (LcksOpts & enaLkStat) {
158 		LockCompatGroup.lck_grp_attr |= LCK_GRP_ATTR_STAT;
159 	}
160 	if (LcksOpts & enaLkTimeStat) {
161 		LockCompatGroup.lck_grp_attr |= LCK_GRP_ATTR_TIME_STAT;
162 	}
163 
164 	os_ref_init(&LockCompatGroup.lck_grp_refcnt, NULL);
165 
166 	enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
167 	lck_grp_cnt = 1;
168 
169 	lck_attr_setdefault(&LockDefaultLckAttr);
170 
171 	lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
172 
173 #if DEBUG_RW
174 	rw_lock_init();
175 #endif /* DEBUG_RW */
176 }
177 STARTUP(LOCKS_EARLY, STARTUP_RANK_FIRST, lck_mod_init);
178 
179 /*
180  * Routine:	lck_grp_attr_alloc_init
181  */
182 
183 lck_grp_attr_t  *
lck_grp_attr_alloc_init(void)184 lck_grp_attr_alloc_init(
185 	void)
186 {
187 	lck_grp_attr_t  *attr;
188 
189 	attr = zalloc(KT_LCK_GRP_ATTR);
190 	lck_grp_attr_setdefault(attr);
191 	return attr;
192 }
193 
194 
195 /*
196  * Routine:	lck_grp_attr_setdefault
197  */
198 
199 void
lck_grp_attr_setdefault(lck_grp_attr_t * attr)200 lck_grp_attr_setdefault(
201 	lck_grp_attr_t  *attr)
202 {
203 	if (LcksOpts & enaLkStat) {
204 		attr->grp_attr_val = LCK_GRP_ATTR_STAT;
205 	} else {
206 		attr->grp_attr_val = 0;
207 	}
208 }
209 
210 
211 /*
212  * Routine:     lck_grp_attr_setstat
213  */
214 
215 void
lck_grp_attr_setstat(lck_grp_attr_t * attr)216 lck_grp_attr_setstat(
217 	lck_grp_attr_t  *attr)
218 {
219 #pragma unused(attr)
220 	os_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT, relaxed);
221 }
222 
223 
224 /*
225  * Routine:     lck_grp_attr_free
226  */
227 
228 void
lck_grp_attr_free(lck_grp_attr_t * attr)229 lck_grp_attr_free(
230 	lck_grp_attr_t  *attr)
231 {
232 	zfree(KT_LCK_GRP_ATTR, attr);
233 }
234 
235 
236 /*
237  * Routine: lck_grp_alloc_init
238  */
239 
240 lck_grp_t *
lck_grp_alloc_init(const char * grp_name,lck_grp_attr_t * attr)241 lck_grp_alloc_init(
242 	const char*     grp_name,
243 	lck_grp_attr_t  *attr)
244 {
245 	lck_grp_t       *grp;
246 
247 	grp = zalloc(KT_LCK_GRP);
248 	lck_grp_init(grp, grp_name, attr);
249 	return grp;
250 }
251 
252 /*
253  * Routine: lck_grp_init
254  */
255 
256 void
lck_grp_init(lck_grp_t * grp,const char * grp_name,lck_grp_attr_t * attr)257 lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
258 {
259 	/* make sure locking infrastructure has been initialized */
260 	assert(lck_grp_cnt > 0);
261 
262 	bzero((void *)grp, sizeof(lck_grp_t));
263 
264 	(void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
265 
266 	if (attr != LCK_GRP_ATTR_NULL) {
267 		grp->lck_grp_attr = attr->grp_attr_val;
268 	} else {
269 		grp->lck_grp_attr = 0;
270 		if (LcksOpts & enaLkStat) {
271 			grp->lck_grp_attr |= LCK_GRP_ATTR_STAT;
272 		}
273 		if (LcksOpts & enaLkTimeStat) {
274 			grp->lck_grp_attr |= LCK_GRP_ATTR_TIME_STAT;
275 		}
276 	}
277 
278 	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
279 		lck_grp_stats_t *stats = &grp->lck_grp_stats;
280 
281 #if LOCK_STATS
282 		lck_grp_stat_enable(&stats->lgss_spin_held);
283 		lck_grp_stat_enable(&stats->lgss_spin_miss);
284 #endif /* LOCK_STATS */
285 
286 		lck_grp_stat_enable(&stats->lgss_mtx_held);
287 		lck_grp_stat_enable(&stats->lgss_mtx_miss);
288 		lck_grp_stat_enable(&stats->lgss_mtx_direct_wait);
289 		lck_grp_stat_enable(&stats->lgss_mtx_wait);
290 	}
291 	if (grp->lck_grp_attr & LCK_GRP_ATTR_TIME_STAT) {
292 #if LOCK_STATS
293 		lck_grp_stats_t *stats = &grp->lck_grp_stats;
294 		lck_grp_stat_enable(&stats->lgss_spin_spin);
295 #endif /* LOCK_STATS */
296 	}
297 
298 	os_ref_init(&grp->lck_grp_refcnt, NULL);
299 
300 	lck_mtx_lock(&lck_grp_lock);
301 	enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
302 	lck_grp_cnt++;
303 	lck_mtx_unlock(&lck_grp_lock);
304 }
305 
306 /*
307  * Routine:     lck_grp_free
308  */
309 
310 void
lck_grp_free(lck_grp_t * grp)311 lck_grp_free(
312 	lck_grp_t       *grp)
313 {
314 	lck_mtx_lock(&lck_grp_lock);
315 	lck_grp_cnt--;
316 	(void)remque((queue_entry_t)grp);
317 	lck_mtx_unlock(&lck_grp_lock);
318 	lck_grp_deallocate(grp);
319 }
320 
321 
322 /*
323  * Routine:     lck_grp_reference
324  */
325 
326 void
lck_grp_reference(lck_grp_t * grp)327 lck_grp_reference(
328 	lck_grp_t       *grp)
329 {
330 	os_ref_retain(&grp->lck_grp_refcnt);
331 }
332 
333 
334 /*
335  * Routine:     lck_grp_deallocate
336  */
337 
338 void
lck_grp_deallocate(lck_grp_t * grp)339 lck_grp_deallocate(
340 	lck_grp_t       *grp)
341 {
342 	if (os_ref_release(&grp->lck_grp_refcnt) != 0) {
343 		return;
344 	}
345 
346 	zfree(KT_LCK_GRP, grp);
347 }
348 
349 /*
350  * Routine:	lck_grp_lckcnt_incr
351  */
352 
353 void
lck_grp_lckcnt_incr(lck_grp_t * grp,lck_type_t lck_type)354 lck_grp_lckcnt_incr(
355 	lck_grp_t       *grp,
356 	lck_type_t      lck_type)
357 {
358 	unsigned int    *lckcnt;
359 
360 	switch (lck_type) {
361 	case LCK_TYPE_SPIN:
362 		lckcnt = &grp->lck_grp_spincnt;
363 		break;
364 	case LCK_TYPE_MTX:
365 		lckcnt = &grp->lck_grp_mtxcnt;
366 		break;
367 	case LCK_TYPE_RW:
368 		lckcnt = &grp->lck_grp_rwcnt;
369 		break;
370 	case LCK_TYPE_TICKET:
371 		lckcnt = &grp->lck_grp_ticketcnt;
372 		break;
373 	default:
374 		return panic("lck_grp_lckcnt_incr(): invalid lock type: %d", lck_type);
375 	}
376 
377 	os_atomic_inc(lckcnt, relaxed);
378 }
379 
380 /*
381  * Routine:	lck_grp_lckcnt_decr
382  */
383 
384 void
lck_grp_lckcnt_decr(lck_grp_t * grp,lck_type_t lck_type)385 lck_grp_lckcnt_decr(
386 	lck_grp_t       *grp,
387 	lck_type_t      lck_type)
388 {
389 	unsigned int    *lckcnt;
390 	int             updated;
391 
392 	switch (lck_type) {
393 	case LCK_TYPE_SPIN:
394 		lckcnt = &grp->lck_grp_spincnt;
395 		break;
396 	case LCK_TYPE_MTX:
397 		lckcnt = &grp->lck_grp_mtxcnt;
398 		break;
399 	case LCK_TYPE_RW:
400 		lckcnt = &grp->lck_grp_rwcnt;
401 		break;
402 	case LCK_TYPE_TICKET:
403 		lckcnt = &grp->lck_grp_ticketcnt;
404 		break;
405 	default:
406 		panic("lck_grp_lckcnt_decr(): invalid lock type: %d", lck_type);
407 		return;
408 	}
409 
410 	updated = os_atomic_dec(lckcnt, relaxed);
411 	assert(updated >= 0);
412 }
413 
414 /*
415  * Routine:	lck_attr_alloc_init
416  */
417 
418 lck_attr_t *
lck_attr_alloc_init(void)419 lck_attr_alloc_init(
420 	void)
421 {
422 	lck_attr_t      *attr;
423 
424 	attr = zalloc(KT_LCK_ATTR);
425 	lck_attr_setdefault(attr);
426 	return attr;
427 }
428 
429 
430 /*
431  * Routine:	lck_attr_setdefault
432  */
433 
434 void
lck_attr_setdefault(lck_attr_t * attr)435 lck_attr_setdefault(
436 	lck_attr_t      *attr)
437 {
438 #if __arm__ || __arm64__
439 	/* <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm */
440 	attr->lck_attr_val =  LCK_ATTR_NONE;
441 #elif __i386__ || __x86_64__
442 #if     !DEBUG
443 	if (LcksOpts & enaLkDeb) {
444 		attr->lck_attr_val =  LCK_ATTR_DEBUG;
445 	} else {
446 		attr->lck_attr_val =  LCK_ATTR_NONE;
447 	}
448 #else
449 	attr->lck_attr_val =  LCK_ATTR_DEBUG;
450 #endif  /* !DEBUG */
451 #else
452 #error Unknown architecture.
453 #endif  /* __arm__ */
454 }
455 
456 
457 /*
458  * Routine:	lck_attr_setdebug
459  */
460 void
lck_attr_setdebug(lck_attr_t * attr)461 lck_attr_setdebug(
462 	lck_attr_t      *attr)
463 {
464 	os_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG, relaxed);
465 }
466 
467 /*
468  * Routine:	lck_attr_setdebug
469  */
470 void
lck_attr_cleardebug(lck_attr_t * attr)471 lck_attr_cleardebug(
472 	lck_attr_t      *attr)
473 {
474 	os_atomic_andnot(&attr->lck_attr_val, LCK_ATTR_DEBUG, relaxed);
475 }
476 
477 
478 /*
479  * Routine:	lck_attr_rw_shared_priority
480  */
481 void
lck_attr_rw_shared_priority(lck_attr_t * attr)482 lck_attr_rw_shared_priority(
483 	lck_attr_t      *attr)
484 {
485 	os_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY, relaxed);
486 }
487 
488 
489 /*
490  * Routine:	lck_attr_free
491  */
492 void
lck_attr_free(lck_attr_t * attr)493 lck_attr_free(
494 	lck_attr_t      *attr)
495 {
496 	zfree(KT_LCK_ATTR, attr);
497 }
498 
499 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)500 __lck_require_preemption_disabled_panic(void *lock)
501 {
502 	panic("Attempt to take no-preempt lock %p in preemptible context", lock);
503 }
504 
505 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)506 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
507 {
508 	if (__improbable(!lock_preemption_disabled_for_thread(self))) {
509 		__lck_require_preemption_disabled_panic(lock);
510 	}
511 }
512 
513 /*
514  * Routine:	hw_lock_init
515  *
516  *	Initialize a hardware lock.
517  */
518 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)519 hw_lock_init(hw_lock_t lock)
520 {
521 	ordered_store_hw(lock, 0);
522 }
523 
524 __result_use_check
525 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)526 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
527 {
528 #if OS_ATOMIC_USE_LLSC
529 	uintptr_t oldval;
530 	os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
531 		if (oldval != 0) {
532 		        wait_for_event(); // clears the monitor so we don't need give_up()
533 		        return false;
534 		}
535 	});
536 	return true;
537 #else // !OS_ATOMIC_USE_LLSC
538 #if OS_ATOMIC_HAS_LLSC
539 	uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
540 	if (oldval != 0) {
541 		wait_for_event(); // clears the monitor so we don't need give_up()
542 		return false;
543 	}
544 #elif LOCK_PRETEST
545 	if (ordered_load_hw(lock) != 0) {
546 		return false;
547 	}
548 #endif
549 	return os_atomic_cmpxchg(&lock->lock_data, 0, newval, acquire);
550 #endif // !OS_ATOMIC_USE_LLSC
551 }
552 
553 __attribute__((always_inline))
554 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)555 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
556 {
557 #if DEBUG || DEVELOPMENT
558 	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
559 #else
560 	(void)owner;
561 #endif
562 }
563 
564 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)565 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
566 {
567 	lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
568 
569 	/* strip possible bits used by the lock implementations */
570 	owner &= ~0x7ul;
571 
572 	lsti->lock = lck;
573 	lsti->owner_thread_cur = owner;
574 	lsti->owner_cpu = ~0u;
575 	os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
576 
577 	if (owner == 0) {
578 		/* if the owner isn't known, just bail */
579 		goto out;
580 	}
581 
582 	for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
583 		cpu_data_t *data = cpu_datap(i);
584 		if (data && (uintptr_t)data->cpu_active_thread == owner) {
585 			lsti->owner_cpu = i;
586 			os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
587 #if __x86_64__
588 			if ((uint32_t)cpu_number() != i) {
589 				/* Cause NMI and panic on the owner's cpu */
590 				NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
591 			}
592 #endif
593 			break;
594 		}
595 	}
596 
597 out:
598 	return lsti;
599 }
600 
601 /*
602  * Routine:	hw_lock_trylock_mask_allow_invalid
603  *
604  *	Tries to acquire a lock of possibly even unmapped memory.
605  *	It assumes a valid lock MUST have another bit set (different from
606  *	the one being set to lock).
607  */
608 __result_use_check
609 extern hw_lock_status_t
610 hw_lock_trylock_mask_allow_invalid(uint32_t *lock, uint32_t mask);
611 
612 __result_use_check
613 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)614 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
615 {
616 	uint32_t mask = 1u << bit;
617 
618 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
619 	uint32_t oldval, newval;
620 	os_atomic_rmw_loop(target, oldval, newval, acquire, {
621 		newval = oldval | mask;
622 		if (__improbable(oldval & mask)) {
623 #if OS_ATOMIC_HAS_LLSC
624 		        if (wait) {
625 		                wait_for_event(); // clears the monitor so we don't need give_up()
626 			} else {
627 		                os_atomic_clear_exclusive();
628 			}
629 #else
630 		        if (wait) {
631 		                cpu_pause();
632 			}
633 #endif
634 		        return false;
635 		}
636 	});
637 	return true;
638 #else
639 	uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
640 	if (__improbable(oldval & mask)) {
641 		if (wait) {
642 			wait_for_event(); // clears the monitor so we don't need give_up()
643 		} else {
644 			os_atomic_clear_exclusive();
645 		}
646 		return false;
647 	}
648 	return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
649 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
650 }
651 
652 static hw_lock_timeout_status_t
hw_lock_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)653 hw_lock_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
654 {
655 #pragma unused(interrupt_time)
656 
657 	hw_lock_t lock  = _lock;
658 	uintptr_t owner = lock->lock_data & ~0x7ul;
659 	lck_spinlock_to_info_t lsti;
660 
661 	if (!spinlock_timeout_panic) {
662 		/* keep spinning rather than panicing */
663 		return HW_LOCK_TIMEOUT_CONTINUE;
664 	}
665 
666 	if (pmap_in_ppl()) {
667 		/*
668 		 * This code is used by the PPL and can't write to globals.
669 		 */
670 		panic("Spinlock[%p] timeout after %llu ticks; "
671 		    "current owner: %p, "
672 		    "start time: %llu, now: %llu, timeout: %llu",
673 		    lock, now - start, (void *)owner,
674 		    start, now, timeout);
675 	}
676 
677 	// Capture the actual time spent blocked, which may be higher than the timeout
678 	// if a misbehaving interrupt stole this thread's CPU time.
679 	lsti = lck_spinlock_timeout_hit(lock, owner);
680 	panic("Spinlock[%p] timeout after %llu ticks; "
681 	    "current owner: %p (on cpu %d), "
682 #if DEBUG || DEVELOPMENT
683 	    "initial owner: %p, "
684 #endif /* DEBUG || DEVELOPMENT */
685 #if INTERRUPT_MASKED_DEBUG
686 	    "interrupt time: %llu, "
687 #endif /* INTERRUPT_MASKED_DEBUG */
688 	    "start time: %llu, now: %llu, timeout: %llu",
689 	    lock, now - start,
690 	    (void *)lsti->owner_thread_cur, lsti->owner_cpu,
691 #if DEBUG || DEVELOPMENT
692 	    (void *)lsti->owner_thread_orig,
693 #endif /* DEBUG || DEVELOPMENT */
694 #if INTERRUPT_MASKED_DEBUG
695 	    interrupt_time,
696 #endif /* INTERRUPT_MASKED_DEBUG */
697 	    start, now, timeout);
698 }
699 
700 static hw_lock_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)701 hw_lock_bit_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
702 {
703 #pragma unused(interrupt_time)
704 
705 	hw_lock_t lock  = _lock;
706 	uintptr_t state = lock->lock_data;
707 
708 	if (!spinlock_timeout_panic) {
709 		/* keep spinning rather than panicing */
710 		return HW_LOCK_TIMEOUT_CONTINUE;
711 	}
712 
713 	panic("Spinlock[%p] timeout after %llu ticks; "
714 	    "current state: %p, "
715 #if INTERRUPT_MASKED_DEBUG
716 	    "interrupt time: %llu, "
717 #endif /* INTERRUPT_MASKED_DEBUG */
718 	    "start time: %llu, now: %llu, timeout: %llu",
719 	    lock, now - start, (void*) state,
720 #if INTERRUPT_MASKED_DEBUG
721 	    interrupt_time,
722 #endif /* INTERRUPT_MASKED_DEBUG */
723 	    start, now, timeout);
724 }
725 
726 /*
727  *	Routine: hw_lock_lock_contended
728  *
729  *	Spin until lock is acquired or timeout expires.
730  *	timeout is in mach_absolute_time ticks. Called with
731  *	preemption disabled.
732  */
733 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,thread_t thread,uintptr_t data,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))734 hw_lock_lock_contended(hw_lock_t lock, thread_t thread, uintptr_t data, uint64_t timeout,
735     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
736 {
737 #pragma unused(thread)
738 
739 	uint64_t        end = 0, start = 0, interrupts = 0;
740 	uint64_t        default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
741 	bool            has_timeout = timeout > 0 || default_timeout > 0;
742 
743 #if CONFIG_DTRACE || LOCK_STATS
744 	uint64_t begin = 0;
745 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
746 
747 	if (__improbable(stat_enabled)) {
748 		begin = mach_absolute_time();
749 	}
750 #endif /* CONFIG_DTRACE || LOCK_STATS */
751 
752 	if (!pmap_in_ppl()) {
753 		/*
754 		 * This code is used by the PPL and can't write to globals.
755 		 */
756 		lck_spinlock_timeout_set_orig_owner(lock->lock_data);
757 	}
758 	if (has_timeout && timeout == 0) {
759 		timeout = default_timeout;
760 	}
761 #if INTERRUPT_MASKED_DEBUG
762 	bool measure_interrupts = !pmap_in_ppl() && ml_get_interrupts_enabled();
763 	uint64_t start_interrupts = 0;
764 #endif /* INTERRUPT_MASKED_DEBUG */
765 	for (;;) {
766 		for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
767 			cpu_pause();
768 			if (hw_lock_trylock_contended(lock, data)) {
769 #if CONFIG_DTRACE || LOCK_STATS
770 				if (__improbable(stat_enabled)) {
771 					lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
772 					    mach_absolute_time() - begin);
773 				}
774 				lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
775 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
776 #endif /* CONFIG_DTRACE || LOCK_STATS */
777 				return HW_LOCK_ACQUIRED;
778 			}
779 		}
780 		if (has_timeout) {
781 			uint64_t now = ml_get_timebase();
782 			if (end == 0) {
783 #if INTERRUPT_MASKED_DEBUG
784 				if (measure_interrupts) {
785 					start_interrupts = thread->machine.int_time_mt;
786 				}
787 #endif /* INTERRUPT_MASKED_DEBUG */
788 				start = now;
789 				end = now + timeout;
790 			} else if (now < end) {
791 				/* keep spinning */
792 			} else {
793 #if INTERRUPT_MASKED_DEBUG
794 				if (measure_interrupts) {
795 					interrupts = thread->machine.int_time_mt - start_interrupts;
796 				}
797 #endif /* INTERRUPT_MASKED_DEBUG */
798 				if (handler(lock, timeout, start, now, interrupts)) {
799 					/* push the deadline */
800 					end += timeout;
801 				} else {
802 #if CONFIG_DTRACE || LOCK_STATS
803 					if (__improbable(stat_enabled)) {
804 						lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
805 						    mach_absolute_time() - begin);
806 					}
807 					lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
808 #endif /* CONFIG_DTRACE || LOCK_STATS */
809 					return HW_LOCK_CONTENDED;
810 				}
811 			}
812 		}
813 	}
814 }
815 
816 void *
hw_wait_while_equals(void ** address,void * current)817 hw_wait_while_equals(void **address, void *current)
818 {
819 	void *v;
820 	uint64_t end = 0;
821 	uint64_t timeout = os_atomic_load(&lock_panic_timeout, relaxed);
822 
823 #if INTERRUPT_MASKED_DEBUG
824 	bool measure_interrupts = !pmap_in_ppl() && ml_get_interrupts_enabled();
825 	thread_t thread = current_thread();
826 	uint64_t interrupts = 0;
827 	uint64_t start_interrupts = 0;
828 #endif /* INTERRUPT_MASKED_DEBUG */
829 
830 	for (;;) {
831 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
832 			cpu_pause();
833 #if OS_ATOMIC_HAS_LLSC
834 			v = os_atomic_load_exclusive(address, relaxed);
835 			if (__probable(v != current)) {
836 				os_atomic_clear_exclusive();
837 				return v;
838 			}
839 			wait_for_event();
840 #else
841 			v = os_atomic_load(address, relaxed);
842 			if (__probable(v != current)) {
843 				return v;
844 			}
845 #endif // OS_ATOMIC_HAS_LLSC
846 		}
847 		if (timeout > 0) {
848 			if (end == 0) {
849 				end = ml_get_timebase() + timeout;
850 #if INTERRUPT_MASKED_DEBUG
851 				if (measure_interrupts) {
852 					start_interrupts = thread->machine.int_time_mt;
853 				}
854 #endif /* INTERRUPT_MASKED_DEBUG */
855 			} else if (ml_get_timebase() >= end) {
856 #if INTERRUPT_MASKED_DEBUG
857 				if (measure_interrupts) {
858 					interrupts = thread->machine.int_time_mt - start_interrupts;
859 					panic("Wait while equals timeout @ *%p == %p, interrupt_time %llu", address, v, interrupts);
860 				}
861 #endif /* INTERRUPT_MASKED_DEBUG */
862 				panic("Wait while equals timeout @ *%p == %p", address, v);
863 			}
864 		}
865 	}
866 }
867 
868 __result_use_check
869 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))870 hw_lock_to_internal(hw_lock_t lock, thread_t thread, uint64_t timeout,
871     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
872 {
873 	uintptr_t state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
874 
875 	if (__probable(hw_lock_trylock_contended(lock, state))) {
876 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
877 		return HW_LOCK_ACQUIRED;
878 	}
879 
880 	return hw_lock_lock_contended(lock, thread, state, timeout, handler LCK_GRP_ARG(grp));
881 }
882 
883 /*
884  *	Routine: hw_lock_lock
885  *
886  *	Acquire lock, spinning until it becomes available,
887  *	return with preemption disabled.
888  */
889 void
890 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
891 {
892 	thread_t thread = current_thread();
893 	lock_disable_preemption_for_thread(thread);
894 	(void)hw_lock_to_internal(lock, thread, 0, hw_lock_timeout_panic LCK_GRP_ARG(grp));
895 }
896 
897 /*
898  *	Routine: hw_lock_lock_nopreempt
899  *
900  *	Acquire lock, spinning until it becomes available.
901  */
902 void
903 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
904 {
905 	thread_t thread = current_thread();
906 	__lck_require_preemption_disabled(lock, thread);
907 	(void)hw_lock_to_internal(lock, thread, 0, hw_lock_timeout_panic LCK_GRP_ARG(grp));
908 }
909 
910 /*
911  *	Routine: hw_lock_to
912  *
913  *	Acquire lock, spinning until it becomes available or timeout.
914  *	Timeout is in mach_absolute_time ticks, return with
915  *	preemption disabled.
916  */
917 unsigned
918 int
919 (hw_lock_to)(hw_lock_t lock, uint64_t timeout, hw_lock_timeout_handler_t handler
920     LCK_GRP_ARG(lck_grp_t *grp))
921 {
922 	thread_t thread = current_thread();
923 	lock_disable_preemption_for_thread(thread);
924 	return (unsigned)hw_lock_to_internal(lock, thread, timeout, handler LCK_GRP_ARG(grp));
925 }
926 
927 /*
928  *	Routine: hw_lock_to_nopreempt
929  *
930  *	Acquire lock, spinning until it becomes available or timeout.
931  *	Timeout is in mach_absolute_time ticks, called and return with
932  *	preemption disabled.
933  */
934 unsigned
935 int
936 (hw_lock_to_nopreempt)(hw_lock_t lock, uint64_t timeout,
937     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
938 {
939 	thread_t thread = current_thread();
940 	__lck_require_preemption_disabled(lock, thread);
941 	return (unsigned)hw_lock_to_internal(lock, thread, timeout, handler LCK_GRP_ARG(grp));
942 }
943 
944 __result_use_check
945 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))946 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
947 {
948 	int success = 0;
949 
950 #if LOCK_PRETEST
951 	if (__improbable(ordered_load_hw(lock) != 0)) {
952 		return 0;
953 	}
954 #endif  // LOCK_PRETEST
955 
956 	success = os_atomic_cmpxchg(&lock->lock_data, 0,
957 	    LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK, acquire);
958 
959 	if (success) {
960 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
961 	}
962 	return success;
963 }
964 
965 /*
966  *	Routine: hw_lock_try
967  *
968  *	returns with preemption disabled on success.
969  */
970 unsigned
971 int
972 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
973 {
974 	thread_t thread = current_thread();
975 	lock_disable_preemption_for_thread(thread);
976 	unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
977 	if (!success) {
978 		lock_enable_preemption();
979 	}
980 	return success;
981 }
982 
983 unsigned
984 int
985 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
986 {
987 	thread_t thread = current_thread();
988 	__lck_require_preemption_disabled(lock, thread);
989 	return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
990 }
991 
992 /*
993  *	Routine: hw_lock_unlock
994  *
995  *	Unconditionally release lock, release preemption level.
996  */
997 static inline void
hw_lock_unlock_internal(hw_lock_t lock)998 hw_lock_unlock_internal(hw_lock_t lock)
999 {
1000 	os_atomic_store(&lock->lock_data, 0, release);
1001 #if __arm__ || __arm64__
1002 	// ARM tests are only for open-source exclusion
1003 	set_event();
1004 #endif  // __arm__ || __arm64__
1005 #if     CONFIG_DTRACE
1006 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
1007 #endif /* CONFIG_DTRACE */
1008 }
1009 
1010 void
1011 (hw_lock_unlock)(hw_lock_t lock)
1012 {
1013 	hw_lock_unlock_internal(lock);
1014 	lock_enable_preemption();
1015 }
1016 
1017 void
1018 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
1019 {
1020 	hw_lock_unlock_internal(lock);
1021 }
1022 
1023 /*
1024  *	Routine hw_lock_held, doesn't change preemption state.
1025  *	N.B.  Racy, of course.
1026  */
1027 unsigned int
hw_lock_held(hw_lock_t lock)1028 hw_lock_held(hw_lock_t lock)
1029 {
1030 	return ordered_load_hw(lock) != 0;
1031 }
1032 
1033 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,uint64_t timeout,hw_lock_timeout_handler_t handler,bool validate LCK_GRP_ARG (lck_grp_t * grp))1034 hw_lock_bit_to_contended(
1035 	hw_lock_bit_t *lock,
1036 	uint32_t       bit,
1037 	uint64_t       timeout,
1038 	hw_lock_timeout_handler_t handler,
1039 	bool           validate
1040 	LCK_GRP_ARG(lck_grp_t *grp))
1041 {
1042 	uint64_t        end = 0, start = 0, interrupts = 0;
1043 	uint64_t        default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
1044 	bool            has_timeout = timeout > 0 || default_timeout > 0;
1045 
1046 	hw_lock_status_t rc;
1047 	uint32_t        mask = 1u << bit;
1048 
1049 #if CONFIG_DTRACE || LOCK_STATS
1050 	uint64_t begin = 0;
1051 	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
1052 
1053 	if (__improbable(stat_enabled)) {
1054 		begin = mach_absolute_time();
1055 	}
1056 #endif /* LOCK_STATS || CONFIG_DTRACE */
1057 
1058 	if (has_timeout && timeout == 0) {
1059 		timeout = default_timeout;
1060 	}
1061 #if INTERRUPT_MASKED_DEBUG
1062 	bool measure_interrupts = !pmap_in_ppl() && ml_get_interrupts_enabled();
1063 	thread_t thread = current_thread();
1064 	uint64_t start_interrupts = 0;
1065 #endif /* INTERRUPT_MASKED_DEBUG */
1066 	for (;;) {
1067 		for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
1068 			// Always load-exclusive before wfe
1069 			// This grabs the monitor and wakes up on a release event
1070 			if (validate) {
1071 				rc = hw_lock_trylock_mask_allow_invalid(lock, mask);
1072 				if (rc == HW_LOCK_INVALID) {
1073 					lock_enable_preemption();
1074 					return rc;
1075 				}
1076 			} else {
1077 				rc = hw_lock_trylock_bit(lock, bit, true);
1078 			}
1079 			if (rc == HW_LOCK_ACQUIRED) {
1080 				lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
1081 				goto end;
1082 			}
1083 		}
1084 		if (has_timeout) {
1085 			uint64_t now = ml_get_timebase();
1086 			if (end == 0) {
1087 #if INTERRUPT_MASKED_DEBUG
1088 				if (measure_interrupts) {
1089 					start_interrupts = thread->machine.int_time_mt;
1090 				}
1091 #endif /* INTERRUPT_MASKED_DEBUG */
1092 				start = now;
1093 				end = now + timeout;
1094 			} else if (now < end) {
1095 				/* keep spinning */
1096 			} else {
1097 #if INTERRUPT_MASKED_DEBUG
1098 				if (measure_interrupts) {
1099 					interrupts = thread->machine.int_time_mt - start_interrupts;
1100 				}
1101 #endif /* INTERRUPT_MASKED_DEBUG */
1102 				if (handler(lock, timeout, start, now, interrupts)) {
1103 					/* push the deadline */
1104 					end += timeout;
1105 				} else {
1106 					assert(rc == HW_LOCK_CONTENDED);
1107 					break;
1108 				}
1109 			}
1110 		}
1111 	}
1112 
1113 end:
1114 #if CONFIG_DTRACE || LOCK_STATS
1115 	if (__improbable(stat_enabled)) {
1116 		lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
1117 		    mach_absolute_time() - begin);
1118 	}
1119 	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
1120 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
1121 	return rc;
1122 }
1123 
1124 __result_use_check
1125 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))1126 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint64_t timeout,
1127     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
1128 {
1129 	if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
1130 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
1131 		return HW_LOCK_ACQUIRED;
1132 	}
1133 
1134 	return (unsigned)hw_lock_bit_to_contended(lock, bit, timeout, handler,
1135 	           false LCK_GRP_ARG(grp));
1136 }
1137 
1138 unsigned
1139 int
1140 (hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint64_t timeout,
1141     hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
1142 {
1143 	_disable_preemption();
1144 	return hw_lock_bit_to_internal(lock, bit, timeout, handler LCK_GRP_ARG(grp));
1145 }
1146 
1147 void
1148 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
1149 {
1150 	_disable_preemption();
1151 	(void)hw_lock_bit_to_internal(lock, bit, 0, hw_lock_bit_timeout_panic LCK_GRP_ARG(grp));
1152 }
1153 
1154 void
1155 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
1156 {
1157 	__lck_require_preemption_disabled(lock, current_thread());
1158 	(void)hw_lock_bit_to_internal(lock, bit, 0, hw_lock_bit_timeout_panic LCK_GRP_ARG(grp));
1159 }
1160 
1161 
hw_lock_status_t(hw_lock_bit_to_allow_invalid)1162 hw_lock_status_t
1163 (hw_lock_bit_to_allow_invalid)(hw_lock_bit_t * lock, unsigned int bit,
1164     uint64_t timeout, hw_lock_timeout_handler_t handler
1165     LCK_GRP_ARG(lck_grp_t *grp))
1166 {
1167 	int rc;
1168 
1169 	_disable_preemption();
1170 
1171 	rc = hw_lock_trylock_mask_allow_invalid(lock, 1u << bit);
1172 	if (__probable(rc == HW_LOCK_ACQUIRED)) {
1173 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
1174 		return HW_LOCK_ACQUIRED;
1175 	}
1176 
1177 	if (__probable(rc == HW_LOCK_CONTENDED)) {
1178 		return hw_lock_bit_to_contended(lock, bit, timeout, handler,
1179 		           true LCK_GRP_ARG(grp));
1180 	}
1181 
1182 	lock_enable_preemption();
1183 	return HW_LOCK_INVALID;
1184 }
1185 
1186 unsigned
1187 int
1188 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
1189 {
1190 	boolean_t success = false;
1191 
1192 	_disable_preemption();
1193 	success = hw_lock_trylock_bit(lock, bit, false);
1194 	if (!success) {
1195 		lock_enable_preemption();
1196 	}
1197 
1198 	if (success) {
1199 		lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
1200 	}
1201 
1202 	return success;
1203 }
1204 
1205 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)1206 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
1207 {
1208 	os_atomic_andnot(lock, 1u << bit, release);
1209 #if __arm__
1210 	set_event();
1211 #endif
1212 #if CONFIG_DTRACE
1213 	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
1214 #endif
1215 }
1216 
1217 /*
1218  *	Routine:	hw_unlock_bit
1219  *
1220  *		Release spin-lock. The second parameter is the bit number to test and set.
1221  *		Decrement the preemption level.
1222  */
1223 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)1224 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1225 {
1226 	hw_unlock_bit_internal(lock, bit);
1227 	lock_enable_preemption();
1228 }
1229 
1230 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)1231 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1232 {
1233 	__lck_require_preemption_disabled(lock, current_thread());
1234 	hw_unlock_bit_internal(lock, bit);
1235 }
1236 
1237 /*
1238  * Routine:	lck_spin_sleep
1239  */
1240 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)1241 lck_spin_sleep_grp(
1242 	lck_spin_t              *lck,
1243 	lck_sleep_action_t      lck_sleep_action,
1244 	event_t                 event,
1245 	wait_interrupt_t        interruptible,
1246 	lck_grp_t               *grp)
1247 {
1248 	wait_result_t   res;
1249 
1250 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1251 		panic("Invalid lock sleep action %x", lck_sleep_action);
1252 	}
1253 
1254 	res = assert_wait(event, interruptible);
1255 	if (res == THREAD_WAITING) {
1256 		lck_spin_unlock(lck);
1257 		res = thread_block(THREAD_CONTINUE_NULL);
1258 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1259 			lck_spin_lock_grp(lck, grp);
1260 		}
1261 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1262 		lck_spin_unlock(lck);
1263 	}
1264 
1265 	return res;
1266 }
1267 
1268 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1269 lck_spin_sleep(
1270 	lck_spin_t              *lck,
1271 	lck_sleep_action_t      lck_sleep_action,
1272 	event_t                 event,
1273 	wait_interrupt_t        interruptible)
1274 {
1275 	return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1276 }
1277 
1278 /*
1279  * Routine:	lck_spin_sleep_deadline
1280  */
1281 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1282 lck_spin_sleep_deadline(
1283 	lck_spin_t              *lck,
1284 	lck_sleep_action_t      lck_sleep_action,
1285 	event_t                 event,
1286 	wait_interrupt_t        interruptible,
1287 	uint64_t                deadline)
1288 {
1289 	wait_result_t   res;
1290 
1291 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1292 		panic("Invalid lock sleep action %x", lck_sleep_action);
1293 	}
1294 
1295 	res = assert_wait_deadline(event, interruptible, deadline);
1296 	if (res == THREAD_WAITING) {
1297 		lck_spin_unlock(lck);
1298 		res = thread_block(THREAD_CONTINUE_NULL);
1299 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1300 			lck_spin_lock(lck);
1301 		}
1302 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1303 		lck_spin_unlock(lck);
1304 	}
1305 
1306 	return res;
1307 }
1308 
1309 /*
1310  * Routine:	lck_mtx_sleep
1311  */
1312 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1313 lck_mtx_sleep(
1314 	lck_mtx_t               *lck,
1315 	lck_sleep_action_t      lck_sleep_action,
1316 	event_t                 event,
1317 	wait_interrupt_t        interruptible)
1318 {
1319 	wait_result_t           res;
1320 	thread_pri_floor_t      token;
1321 
1322 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1323 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1324 
1325 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1326 		panic("Invalid lock sleep action %x", lck_sleep_action);
1327 	}
1328 
1329 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1330 		/*
1331 		 * We get a priority floor
1332 		 * during the time that this thread is asleep, so that when it
1333 		 * is re-awakened (and not yet contending on the mutex), it is
1334 		 * runnable at a reasonably high priority.
1335 		 */
1336 		token = thread_priority_floor_start();
1337 	}
1338 
1339 	res = assert_wait(event, interruptible);
1340 	if (res == THREAD_WAITING) {
1341 		lck_mtx_unlock(lck);
1342 		res = thread_block(THREAD_CONTINUE_NULL);
1343 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1344 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1345 				lck_mtx_lock_spin(lck);
1346 			} else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1347 				lck_mtx_lock_spin_always(lck);
1348 			} else {
1349 				lck_mtx_lock(lck);
1350 			}
1351 		}
1352 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1353 		lck_mtx_unlock(lck);
1354 	}
1355 
1356 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1357 		thread_priority_floor_end(&token);
1358 	}
1359 
1360 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1361 
1362 	return res;
1363 }
1364 
1365 
1366 /*
1367  * Routine:	lck_mtx_sleep_deadline
1368  */
1369 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1370 lck_mtx_sleep_deadline(
1371 	lck_mtx_t               *lck,
1372 	lck_sleep_action_t      lck_sleep_action,
1373 	event_t                 event,
1374 	wait_interrupt_t        interruptible,
1375 	uint64_t                deadline)
1376 {
1377 	wait_result_t           res;
1378 	thread_pri_floor_t      token;
1379 
1380 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1381 	    VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1382 
1383 	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1384 		panic("Invalid lock sleep action %x", lck_sleep_action);
1385 	}
1386 
1387 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1388 		/*
1389 		 * See lck_mtx_sleep().
1390 		 */
1391 		token = thread_priority_floor_start();
1392 	}
1393 
1394 	res = assert_wait_deadline(event, interruptible, deadline);
1395 	if (res == THREAD_WAITING) {
1396 		lck_mtx_unlock(lck);
1397 		res = thread_block(THREAD_CONTINUE_NULL);
1398 		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1399 			if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1400 				lck_mtx_lock_spin(lck);
1401 			} else {
1402 				lck_mtx_lock(lck);
1403 			}
1404 		}
1405 	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1406 		lck_mtx_unlock(lck);
1407 	}
1408 
1409 	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1410 		thread_priority_floor_end(&token);
1411 	}
1412 
1413 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1414 
1415 	return res;
1416 }
1417 
1418 /*
1419  * Lock Boosting Invariants:
1420  *
1421  * The lock owner is always promoted to the max priority of all its waiters.
1422  * Max priority is capped at MAXPRI_PROMOTE.
1423  *
1424  * The last waiter is not given a promotion when it wakes up or acquires the lock.
1425  * When the last waiter is waking up, a new contender can always come in and
1426  * steal the lock without having to wait for the last waiter to make forward progress.
1427  */
1428 
1429 /*
1430  * Routine: lck_mtx_lock_wait
1431  *
1432  * Invoked in order to wait on contention.
1433  *
1434  * Called with the interlock locked and
1435  * returns it unlocked.
1436  *
1437  * Always aggressively sets the owning thread to promoted,
1438  * even if it's the same or higher priority
1439  * This prevents it from lowering its own priority while holding a lock
1440  *
1441  * TODO: Come up with a more efficient way to handle same-priority promotions
1442  *      <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
1443  */
1444 void
lck_mtx_lock_wait(lck_mtx_t * lck,thread_t holder,struct turnstile ** ts)1445 lck_mtx_lock_wait(
1446 	lck_mtx_t                       *lck,
1447 	thread_t                        holder,
1448 	struct turnstile                **ts)
1449 {
1450 	thread_t                thread = current_thread();
1451 	lck_mtx_t               *mutex = lck;
1452 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1453 
1454 #if     CONFIG_DTRACE
1455 	uint64_t                sleep_start = 0;
1456 
1457 	if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1458 		sleep_start = mach_absolute_time();
1459 	}
1460 #endif
1461 
1462 #if LOCKS_INDIRECT_ALLOW
1463 	if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1464 		mutex = &lck->lck_mtx_ptr->lck_mtx;
1465 	}
1466 #endif /* LOCKS_INDIRECT_ALLOW */
1467 
1468 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1469 	    trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1470 
1471 	mutex->lck_mtx_waiters++;
1472 
1473 	if (*ts == NULL) {
1474 		*ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1475 	}
1476 
1477 	struct turnstile *turnstile = *ts;
1478 	thread_set_pending_block_hint(thread, kThreadWaitKernelMutex);
1479 	turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1480 
1481 	waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
1482 
1483 	lck_mtx_ilk_unlock(mutex);
1484 
1485 	turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
1486 
1487 	thread_block(THREAD_CONTINUE_NULL);
1488 
1489 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1490 #if     CONFIG_DTRACE
1491 	/*
1492 	 * Record the DTrace lockstat probe for blocking, block time
1493 	 * measured from when we were entered.
1494 	 */
1495 	if (sleep_start) {
1496 #if LOCKS_INDIRECT_ALLOW
1497 		if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) {
1498 			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1499 			    mach_absolute_time() - sleep_start);
1500 		} else
1501 #endif /* LOCKS_INDIRECT_ALLOW */
1502 		{
1503 			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1504 			    mach_absolute_time() - sleep_start);
1505 		}
1506 	}
1507 #endif
1508 }
1509 
1510 /*
1511  * Routine:     lck_mtx_lock_acquire
1512  *
1513  * Invoked on acquiring the mutex when there is
1514  * contention.
1515  *
1516  * Returns the current number of waiters.
1517  *
1518  * Called with the interlock locked.
1519  */
1520 int
lck_mtx_lock_acquire(lck_mtx_t * lck,struct turnstile * ts)1521 lck_mtx_lock_acquire(
1522 	lck_mtx_t               *lck,
1523 	struct turnstile        *ts)
1524 {
1525 	thread_t                thread = current_thread();
1526 	lck_mtx_t               *mutex = lck;
1527 
1528 #if LOCKS_INDIRECT_ALLOW
1529 	if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1530 		mutex = &lck->lck_mtx_ptr->lck_mtx;
1531 	}
1532 #endif /* LOCKS_INDIRECT_ALLOW */
1533 
1534 	if (mutex->lck_mtx_waiters > 0) {
1535 		if (ts == NULL) {
1536 			ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1537 		}
1538 
1539 		turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1540 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1541 	}
1542 
1543 	if (ts != NULL) {
1544 		turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1545 	}
1546 
1547 	return mutex->lck_mtx_waiters;
1548 }
1549 
1550 /*
1551  * Routine:     lck_mtx_unlock_wakeup
1552  *
1553  * Invoked on unlock when there is contention.
1554  *
1555  * Called with the interlock locked.
1556  *
1557  * NOTE: callers should call turnstile_clenup after
1558  * dropping the interlock.
1559  */
1560 boolean_t
lck_mtx_unlock_wakeup(lck_mtx_t * lck,thread_t holder)1561 lck_mtx_unlock_wakeup(
1562 	lck_mtx_t                       *lck,
1563 	thread_t                        holder)
1564 {
1565 	thread_t                thread = current_thread();
1566 	lck_mtx_t               *mutex = lck;
1567 	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1568 	struct turnstile *ts;
1569 	kern_return_t did_wake;
1570 
1571 #if LOCKS_INDIRECT_ALLOW
1572 	if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1573 		mutex = &lck->lck_mtx_ptr->lck_mtx;
1574 	}
1575 #endif /* LOCKS_INDIRECT_ALLOW */
1576 
1577 
1578 	if (thread != holder) {
1579 		panic("lck_mtx_unlock_wakeup: mutex %p holder %p", mutex, holder);
1580 	}
1581 
1582 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
1583 	    trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1584 
1585 	assert(mutex->lck_mtx_waiters > 0);
1586 
1587 	ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1588 
1589 	if (mutex->lck_mtx_waiters > 1) {
1590 		/* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1591 		did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1592 	} else {
1593 		did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1594 		turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1595 	}
1596 	assert(did_wake == KERN_SUCCESS);
1597 
1598 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1599 	turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1600 
1601 	mutex->lck_mtx_waiters--;
1602 
1603 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1604 
1605 	return mutex->lck_mtx_waiters > 0;
1606 }
1607 
1608 /*
1609  * Routine:     mutex_pause
1610  *
1611  * Called by former callers of simple_lock_pause().
1612  */
1613 #define MAX_COLLISION_COUNTS    32
1614 #define MAX_COLLISION   8
1615 
1616 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1617 
1618 uint32_t collision_backoffs[MAX_COLLISION] = {
1619 	10, 50, 100, 200, 400, 600, 800, 1000
1620 };
1621 
1622 
1623 void
mutex_pause(uint32_t collisions)1624 mutex_pause(uint32_t collisions)
1625 {
1626 	wait_result_t wait_result;
1627 	uint32_t        back_off;
1628 
1629 	if (collisions >= MAX_COLLISION_COUNTS) {
1630 		collisions = MAX_COLLISION_COUNTS - 1;
1631 	}
1632 	max_collision_count[collisions]++;
1633 
1634 	if (collisions >= MAX_COLLISION) {
1635 		collisions = MAX_COLLISION - 1;
1636 	}
1637 	back_off = collision_backoffs[collisions];
1638 
1639 	wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1640 	assert(wait_result == THREAD_WAITING);
1641 
1642 	wait_result = thread_block(THREAD_CONTINUE_NULL);
1643 	assert(wait_result == THREAD_TIMED_OUT);
1644 }
1645 
1646 
1647 unsigned int mutex_yield_wait = 0;
1648 unsigned int mutex_yield_no_wait = 0;
1649 
1650 void
lck_mtx_yield(lck_mtx_t * lck)1651 lck_mtx_yield(
1652 	lck_mtx_t   *lck)
1653 {
1654 	int     waiters;
1655 
1656 #if DEBUG
1657 	lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1658 #endif /* DEBUG */
1659 
1660 #if LOCKS_INDIRECT_ALLOW
1661 	if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1662 		waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1663 	} else
1664 #endif /* LOCKS_INDIRECT_ALLOW */
1665 	{
1666 		waiters = lck->lck_mtx_waiters;
1667 	}
1668 
1669 	if (!waiters) {
1670 		mutex_yield_no_wait++;
1671 	} else {
1672 		mutex_yield_wait++;
1673 		lck_mtx_unlock(lck);
1674 		mutex_pause(0);
1675 		lck_mtx_lock(lck);
1676 	}
1677 }
1678 
1679 kern_return_t
host_lockgroup_info(host_t host,lockgroup_info_array_t * lockgroup_infop,mach_msg_type_number_t * lockgroup_infoCntp)1680 host_lockgroup_info(
1681 	host_t                                  host,
1682 	lockgroup_info_array_t  *lockgroup_infop,
1683 	mach_msg_type_number_t  *lockgroup_infoCntp)
1684 {
1685 	lockgroup_info_t        *lockgroup_info_base;
1686 	lockgroup_info_t        *lockgroup_info;
1687 	vm_offset_t                     lockgroup_info_addr;
1688 	vm_size_t                       lockgroup_info_size;
1689 	vm_size_t                       lockgroup_info_vmsize;
1690 	lck_grp_t                       *lck_grp;
1691 	unsigned int            i;
1692 	vm_map_copy_t           copy;
1693 	kern_return_t           kr;
1694 
1695 	if (host == HOST_NULL) {
1696 		return KERN_INVALID_HOST;
1697 	}
1698 
1699 	lck_mtx_lock(&lck_grp_lock);
1700 
1701 	lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1702 	lockgroup_info_vmsize = round_page(lockgroup_info_size);
1703 	kr = kmem_alloc_pageable(ipc_kernel_map,
1704 	    &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
1705 	if (kr != KERN_SUCCESS) {
1706 		lck_mtx_unlock(&lck_grp_lock);
1707 		return kr;
1708 	}
1709 
1710 	lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1711 	lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1712 	lockgroup_info = lockgroup_info_base;
1713 
1714 	for (i = 0; i < lck_grp_cnt; i++) {
1715 		lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1716 		lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1717 		lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1718 
1719 #if LOCK_STATS
1720 		lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stats.lgss_spin_held.lgs_count;
1721 		lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stats.lgss_spin_miss.lgs_count;
1722 #endif /* LOCK_STATS */
1723 
1724 		// Historically on x86, held was used for "direct wait" and util for "held"
1725 		lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stats.lgss_mtx_held.lgs_count;
1726 		lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stats.lgss_mtx_direct_wait.lgs_count;
1727 		lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stats.lgss_mtx_miss.lgs_count;
1728 		lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stats.lgss_mtx_wait.lgs_count;
1729 
1730 		(void) strncpy(lockgroup_info->lockgroup_name, lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1731 
1732 		lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1733 		lockgroup_info++;
1734 	}
1735 
1736 	*lockgroup_infoCntp = lck_grp_cnt;
1737 	lck_mtx_unlock(&lck_grp_lock);
1738 
1739 	if (lockgroup_info_size != lockgroup_info_vmsize) {
1740 		bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
1741 	}
1742 
1743 	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1744 	    (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1745 	assert(kr == KERN_SUCCESS);
1746 
1747 	*lockgroup_infop = (lockgroup_info_t *) copy;
1748 
1749 	return KERN_SUCCESS;
1750 }
1751 
1752 /*
1753  * sleep_with_inheritor and wakeup_with_inheritor KPI
1754  *
1755  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1756  * the latest thread specified as inheritor.
1757  *
1758  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1759  * direct the push. The inheritor cannot run in user space while holding a push from an event. Therefore is the caller responsibility to call a
1760  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1761  *
1762  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1763  *
1764  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1765  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1766  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1767  * invoking any turnstile operation.
1768  *
1769  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1770  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1771  * is instantiated for this KPI to manage the hash without interrupt disabled.
1772  * Also:
1773  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1774  * - every event will have a dedicated wait_queue.
1775  *
1776  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1777  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1778  */
1779 
1780 kern_return_t
wakeup_with_inheritor_and_turnstile_type(event_t event,turnstile_type_t type,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1781 wakeup_with_inheritor_and_turnstile_type(event_t event, turnstile_type_t type, wait_result_t result, bool wake_one, lck_wake_action_t action, thread_t *thread_wokenup)
1782 {
1783 	uint32_t index;
1784 	struct turnstile *ts = NULL;
1785 	kern_return_t ret = KERN_NOT_WAITING;
1786 	int priority;
1787 	thread_t wokeup;
1788 
1789 	/*
1790 	 * the hash bucket spinlock is used as turnstile interlock
1791 	 */
1792 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1793 
1794 	ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1795 
1796 	if (wake_one) {
1797 		if (action == LCK_WAKE_DEFAULT) {
1798 			priority = WAITQ_PROMOTE_ON_WAKE;
1799 		} else {
1800 			assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1801 			priority = WAITQ_ALL_PRIORITIES;
1802 		}
1803 
1804 		/*
1805 		 * WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor
1806 		 * if it finds a thread
1807 		 */
1808 		wokeup = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(event), result, priority);
1809 		if (wokeup != NULL) {
1810 			if (thread_wokenup != NULL) {
1811 				*thread_wokenup = wokeup;
1812 			} else {
1813 				thread_deallocate_safe(wokeup);
1814 			}
1815 			ret = KERN_SUCCESS;
1816 			if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1817 				goto complete;
1818 			}
1819 		} else {
1820 			if (thread_wokenup != NULL) {
1821 				*thread_wokenup = NULL;
1822 			}
1823 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1824 			ret = KERN_NOT_WAITING;
1825 		}
1826 	} else {
1827 		ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
1828 		turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1829 	}
1830 
1831 	/*
1832 	 * turnstile_update_inheritor_complete could be called while holding the interlock.
1833 	 * In this case the new inheritor or is null, or is a thread that is just been woken up
1834 	 * and have not blocked because it is racing with the same interlock used here
1835 	 * after the wait.
1836 	 * So there is no chain to update for the new inheritor.
1837 	 *
1838 	 * However unless the current thread is the old inheritor,
1839 	 * old inheritor can be blocked and requires a chain update.
1840 	 *
1841 	 * The chain should be short because kernel turnstiles cannot have user turnstiles
1842 	 * chained after them.
1843 	 *
1844 	 * We can anyway optimize this by asking turnstile to tell us
1845 	 * if old inheritor needs an update and drop the lock
1846 	 * just in that case.
1847 	 */
1848 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1849 
1850 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1851 
1852 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1853 
1854 complete:
1855 	turnstile_complete((uintptr_t)event, NULL, NULL, type);
1856 
1857 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1858 
1859 	turnstile_cleanup();
1860 
1861 	return ret;
1862 }
1863 
1864 static wait_result_t
1865 sleep_with_inheritor_and_turnstile_type(event_t event,
1866     thread_t inheritor,
1867     wait_interrupt_t interruptible,
1868     uint64_t deadline,
1869     turnstile_type_t type,
1870     void (^primitive_lock)(void),
1871     void (^primitive_unlock)(void))
1872 {
1873 	wait_result_t ret;
1874 	uint32_t index;
1875 	struct turnstile *ts = NULL;
1876 
1877 	/*
1878 	 * the hash bucket spinlock is used as turnstile interlock,
1879 	 * lock it before releasing the primitive lock
1880 	 */
1881 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1882 
1883 	primitive_unlock();
1884 
1885 	ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1886 
1887 	thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1888 	/*
1889 	 * We need TURNSTILE_DELAYED_UPDATE because we will call
1890 	 * waitq_assert_wait64 after.
1891 	 */
1892 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1893 
1894 	ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1895 
1896 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1897 
1898 	/*
1899 	 * Update new and old inheritor chains outside the interlock;
1900 	 */
1901 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1902 
1903 	if (ret == THREAD_WAITING) {
1904 		ret = thread_block(THREAD_CONTINUE_NULL);
1905 	}
1906 
1907 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1908 
1909 	turnstile_complete((uintptr_t)event, NULL, NULL, type);
1910 
1911 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1912 
1913 	turnstile_cleanup();
1914 
1915 	primitive_lock();
1916 
1917 	return ret;
1918 }
1919 
1920 kern_return_t
change_sleep_inheritor_and_turnstile_type(event_t event,thread_t inheritor,turnstile_type_t type)1921 change_sleep_inheritor_and_turnstile_type(event_t event,
1922     thread_t inheritor,
1923     turnstile_type_t type)
1924 {
1925 	uint32_t index;
1926 	struct turnstile *ts = NULL;
1927 	kern_return_t ret =  KERN_SUCCESS;
1928 	/*
1929 	 * the hash bucket spinlock is used as turnstile interlock
1930 	 */
1931 	turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1932 
1933 	ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1934 
1935 	if (!turnstile_has_waiters(ts)) {
1936 		ret = KERN_NOT_WAITING;
1937 	}
1938 
1939 	/*
1940 	 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1941 	 */
1942 	turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1943 
1944 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1945 
1946 	/*
1947 	 * update the chains outside the interlock
1948 	 */
1949 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1950 
1951 	turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1952 
1953 	turnstile_complete((uintptr_t)event, NULL, NULL, type);
1954 
1955 	turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1956 
1957 	turnstile_cleanup();
1958 
1959 	return ret;
1960 }
1961 
1962 typedef void (^void_block_void)(void);
1963 
1964 /*
1965  * sleep_with_inheritor functions with lck_mtx_t as locking primitive.
1966  */
1967 
1968 wait_result_t
lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline,turnstile_type_t type)1969 lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1970 {
1971 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1972 
1973 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1974 		return sleep_with_inheritor_and_turnstile_type(event,
1975 		           inheritor,
1976 		           interruptible,
1977 		           deadline,
1978 		           type,
1979 		           ^{;},
1980 		           ^{lck_mtx_unlock(lock);});
1981 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1982 		return sleep_with_inheritor_and_turnstile_type(event,
1983 		           inheritor,
1984 		           interruptible,
1985 		           deadline,
1986 		           type,
1987 		           ^{lck_mtx_lock_spin(lock);},
1988 		           ^{lck_mtx_unlock(lock);});
1989 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1990 		return sleep_with_inheritor_and_turnstile_type(event,
1991 		           inheritor,
1992 		           interruptible,
1993 		           deadline,
1994 		           type,
1995 		           ^{lck_mtx_lock_spin_always(lock);},
1996 		           ^{lck_mtx_unlock(lock);});
1997 	} else {
1998 		return sleep_with_inheritor_and_turnstile_type(event,
1999 		           inheritor,
2000 		           interruptible,
2001 		           deadline,
2002 		           type,
2003 		           ^{lck_mtx_lock(lock);},
2004 		           ^{lck_mtx_unlock(lock);});
2005 	}
2006 }
2007 
2008 /*
2009  * Name: lck_spin_sleep_with_inheritor
2010  *
2011  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2012  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2013  *              be directed to the inheritor specified.
2014  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2015  *
2016  * Args:
2017  *   Arg1: lck_spin_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2018  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
2019  *   Arg3: event to wait on.
2020  *   Arg4: thread to propagate the event push to.
2021  *   Arg5: interruptible flag for wait.
2022  *   Arg6: deadline for wait.
2023  *
2024  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2025  *             Lock will be dropped while waiting.
2026  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2027  *             wakeup for the event is called.
2028  *
2029  * Returns: result of the wait.
2030  */
2031 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)2032 lck_spin_sleep_with_inheritor(
2033 	lck_spin_t *lock,
2034 	lck_sleep_action_t lck_sleep_action,
2035 	event_t event,
2036 	thread_t inheritor,
2037 	wait_interrupt_t interruptible,
2038 	uint64_t deadline)
2039 {
2040 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2041 		return sleep_with_inheritor_and_turnstile_type(event, inheritor,
2042 		           interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
2043 		           ^{}, ^{ lck_spin_unlock(lock); });
2044 	} else {
2045 		return sleep_with_inheritor_and_turnstile_type(event, inheritor,
2046 		           interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
2047 		           ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
2048 	}
2049 }
2050 
2051 /*
2052  * Name: lck_ticket_sleep_with_inheritor
2053  *
2054  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2055  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2056  *              be directed to the inheritor specified.
2057  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2058  *
2059  * Args:
2060  *   Arg1: lck_ticket_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2061  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
2062  *   Arg3: event to wait on.
2063  *   Arg4: thread to propagate the event push to.
2064  *   Arg5: interruptible flag for wait.
2065  *   Arg6: deadline for wait.
2066  *
2067  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2068  *             Lock will be dropped while waiting.
2069  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2070  *             wakeup for the event is called.
2071  *
2072  * Returns: result of the wait.
2073  */
2074 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)2075 lck_ticket_sleep_with_inheritor(
2076 	lck_ticket_t *lock,
2077 	lck_grp_t *grp,
2078 	lck_sleep_action_t lck_sleep_action,
2079 	event_t event,
2080 	thread_t inheritor,
2081 	wait_interrupt_t interruptible,
2082 	uint64_t deadline)
2083 {
2084 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2085 		return sleep_with_inheritor_and_turnstile_type(event, inheritor,
2086 		           interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
2087 		           ^{}, ^{ lck_ticket_unlock(lock); });
2088 	} else {
2089 		return sleep_with_inheritor_and_turnstile_type(event, inheritor,
2090 		           interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
2091 		           ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
2092 	}
2093 }
2094 
2095 /*
2096  * Name: lck_mtx_sleep_with_inheritor
2097  *
2098  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2099  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2100  *              be directed to the inheritor specified.
2101  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2102  *
2103  * Args:
2104  *   Arg1: lck_mtx_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2105  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
2106  *   Arg3: event to wait on.
2107  *   Arg4: thread to propagate the event push to.
2108  *   Arg5: interruptible flag for wait.
2109  *   Arg6: deadline for wait.
2110  *
2111  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2112  *             Lock will be dropped while waiting.
2113  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2114  *             wakeup for the event is called.
2115  *
2116  * Returns: result of the wait.
2117  */
2118 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)2119 lck_mtx_sleep_with_inheritor(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
2120 {
2121 	return lck_mtx_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
2122 }
2123 
2124 /*
2125  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
2126  */
2127 
2128 wait_result_t
lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline,turnstile_type_t type)2129 lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
2130 {
2131 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2132 
2133 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2134 
2135 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2136 		return sleep_with_inheritor_and_turnstile_type(event,
2137 		           inheritor,
2138 		           interruptible,
2139 		           deadline,
2140 		           type,
2141 		           ^{;},
2142 		           ^{lck_rw_type = lck_rw_done(lock);});
2143 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2144 		return sleep_with_inheritor_and_turnstile_type(event,
2145 		           inheritor,
2146 		           interruptible,
2147 		           deadline,
2148 		           type,
2149 		           ^{lck_rw_lock(lock, lck_rw_type);},
2150 		           ^{lck_rw_type = lck_rw_done(lock);});
2151 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2152 		return sleep_with_inheritor_and_turnstile_type(event,
2153 		           inheritor,
2154 		           interruptible,
2155 		           deadline,
2156 		           type,
2157 		           ^{lck_rw_lock_exclusive(lock);},
2158 		           ^{lck_rw_type = lck_rw_done(lock);});
2159 	} else {
2160 		return sleep_with_inheritor_and_turnstile_type(event,
2161 		           inheritor,
2162 		           interruptible,
2163 		           deadline,
2164 		           type,
2165 		           ^{lck_rw_lock_shared(lock);},
2166 		           ^{lck_rw_type = lck_rw_done(lock);});
2167 	}
2168 }
2169 
2170 /*
2171  * Name: lck_rw_sleep_with_inheritor
2172  *
2173  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2174  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2175  *              be directed to the inheritor specified.
2176  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2177  *
2178  * Args:
2179  *   Arg1: lck_rw_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2180  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
2181  *   Arg3: event to wait on.
2182  *   Arg4: thread to propagate the event push to.
2183  *   Arg5: interruptible flag for wait.
2184  *   Arg6: deadline for wait.
2185  *
2186  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2187  *             Lock will be dropped while waiting.
2188  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2189  *             wakeup for the event is called.
2190  *
2191  * Returns: result of the wait.
2192  */
2193 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)2194 lck_rw_sleep_with_inheritor(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
2195 {
2196 	return lck_rw_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
2197 }
2198 
2199 /*
2200  * wakeup_with_inheritor functions are independent from the locking primitive.
2201  */
2202 
2203 /*
2204  * Name: wakeup_one_with_inheritor
2205  *
2206  * Description: wake up one waiter for event if any. The thread woken up will be the one with the higher sched priority waiting on event.
2207  *              The push for the event will be transferred from the last inheritor to the woken up thread if LCK_WAKE_DEFAULT is specified.
2208  *              If LCK_WAKE_DO_NOT_TRANSFER_PUSH is specified the push will not be transferred.
2209  *
2210  * Args:
2211  *   Arg1: event to wake from.
2212  *   Arg2: wait result to pass to the woken up thread.
2213  *   Arg3: wake flag. LCK_WAKE_DEFAULT or LCK_WAKE_DO_NOT_TRANSFER_PUSH.
2214  *   Arg4: pointer for storing the thread wokenup.
2215  *
2216  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2217  *
2218  * Conditions: The new inheritor wokenup cannot run in user space until another inheritor is specified for the event or a
2219  *             wakeup for the event is called.
2220  *             A reference for the wokenup thread is acquired.
2221  *             NOTE: this cannot be called from interrupt context.
2222  */
2223 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)2224 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
2225 {
2226 	return wakeup_with_inheritor_and_turnstile_type(event,
2227 	           TURNSTILE_SLEEP_INHERITOR,
2228 	           result,
2229 	           TRUE,
2230 	           action,
2231 	           thread_wokenup);
2232 }
2233 
2234 /*
2235  * Name: wakeup_all_with_inheritor
2236  *
2237  * Description: wake up all waiters waiting for event. The old inheritor will lose the push.
2238  *
2239  * Args:
2240  *   Arg1: event to wake from.
2241  *   Arg2: wait result to pass to the woken up threads.
2242  *
2243  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2244  *
2245  * Conditions: NOTE: this cannot be called from interrupt context.
2246  */
2247 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)2248 wakeup_all_with_inheritor(event_t event, wait_result_t result)
2249 {
2250 	return wakeup_with_inheritor_and_turnstile_type(event,
2251 	           TURNSTILE_SLEEP_INHERITOR,
2252 	           result,
2253 	           FALSE,
2254 	           0,
2255 	           NULL);
2256 }
2257 
2258 /*
2259  * change_sleep_inheritor is independent from the locking primitive.
2260  */
2261 
2262 /*
2263  * Name: change_sleep_inheritor
2264  *
2265  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
2266  *
2267  * Args:
2268  *   Arg1: event to redirect the push.
2269  *   Arg2: new inheritor for event.
2270  *
2271  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2272  *
2273  * Conditions: In case of success, the new inheritor cannot run in user space until another inheritor is specified for the event or a
2274  *             wakeup for the event is called.
2275  *             NOTE: this cannot be called from interrupt context.
2276  */
2277 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)2278 change_sleep_inheritor(event_t event, thread_t inheritor)
2279 {
2280 	return change_sleep_inheritor_and_turnstile_type(event,
2281 	           inheritor,
2282 	           TURNSTILE_SLEEP_INHERITOR);
2283 }
2284 
2285 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)2286 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
2287 {
2288 	assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
2289 	assert(waitq_is_turnstile_queue(waitq));
2290 	waitinfo->owner = 0;
2291 	waitinfo->context = 0;
2292 
2293 	if (waitq_held(waitq)) {
2294 		return;
2295 	}
2296 
2297 	struct turnstile *turnstile = waitq_to_turnstile(waitq);
2298 	assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
2299 	waitinfo->owner = thread_tid(turnstile->ts_inheritor);
2300 }
2301 
2302 #define GATE_TYPE        3
2303 #define GATE_ILOCK_BIT   0
2304 #define GATE_WAITERS_BIT 1
2305 
2306 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
2307 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
2308 
2309 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
2310 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
2311 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
2312 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
2313 #define ordered_store_gate(gate, value)  os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
2314 
2315 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
2316 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
2317 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
2318 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
2319 
2320 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
2321 
2322 #define GATE_EVENT(gate)     ((event_t) gate)
2323 #define EVENT_TO_GATE(event) ((gate_t *) event)
2324 
2325 typedef void (*void_func_void)(void);
2326 
2327 __abortlike
2328 static void
gate_verify_tag_panic(gate_t * gate)2329 gate_verify_tag_panic(gate_t *gate)
2330 {
2331 	panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2332 }
2333 
2334 __abortlike
2335 static void
gate_verify_destroy_panic(gate_t * gate)2336 gate_verify_destroy_panic(gate_t *gate)
2337 {
2338 	panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2339 }
2340 
2341 static void
gate_verify(gate_t * gate)2342 gate_verify(gate_t *gate)
2343 {
2344 	if (gate->gt_type != GATE_TYPE) {
2345 		gate_verify_tag_panic(gate);
2346 	}
2347 	if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
2348 		gate_verify_destroy_panic(gate);
2349 	}
2350 
2351 	assert(gate->gt_refs > 0);
2352 }
2353 
2354 __abortlike
2355 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)2356 gate_already_owned_panic(gate_t *gate, thread_t holder)
2357 {
2358 	panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2359 }
2360 
2361 static kern_return_t
gate_try_close(gate_t * gate)2362 gate_try_close(gate_t *gate)
2363 {
2364 	uintptr_t state;
2365 	thread_t holder;
2366 	kern_return_t ret;
2367 	thread_t thread = current_thread();
2368 
2369 	gate_verify(gate);
2370 
2371 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2372 		return KERN_SUCCESS;
2373 	}
2374 
2375 	gate_ilock(gate);
2376 	state = ordered_load_gate(gate);
2377 	holder = GATE_STATE_TO_THREAD(state);
2378 
2379 	if (holder == NULL) {
2380 		assert(gate_has_waiter_bit(state) == FALSE);
2381 
2382 		state = GATE_THREAD_TO_STATE(current_thread());
2383 		state |= GATE_ILOCK;
2384 		ordered_store_gate(gate, state);
2385 		ret = KERN_SUCCESS;
2386 	} else {
2387 		if (holder == current_thread()) {
2388 			gate_already_owned_panic(gate, holder);
2389 		}
2390 		ret = KERN_FAILURE;
2391 	}
2392 
2393 	gate_iunlock(gate);
2394 	return ret;
2395 }
2396 
2397 static void
gate_close(gate_t * gate)2398 gate_close(gate_t* gate)
2399 {
2400 	uintptr_t state;
2401 	thread_t holder;
2402 	thread_t thread = current_thread();
2403 
2404 	gate_verify(gate);
2405 
2406 	if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2407 		return;
2408 	}
2409 
2410 	gate_ilock(gate);
2411 	state = ordered_load_gate(gate);
2412 	holder = GATE_STATE_TO_THREAD(state);
2413 
2414 	if (holder != NULL) {
2415 		gate_already_owned_panic(gate, holder);
2416 	}
2417 
2418 	assert(gate_has_waiter_bit(state) == FALSE);
2419 
2420 	state = GATE_THREAD_TO_STATE(thread);
2421 	state |= GATE_ILOCK;
2422 	ordered_store_gate(gate, state);
2423 
2424 	gate_iunlock(gate);
2425 }
2426 
2427 static void
gate_open_turnstile(gate_t * gate)2428 gate_open_turnstile(gate_t *gate)
2429 {
2430 	struct turnstile *ts = NULL;
2431 
2432 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2433 	waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2434 	turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2435 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2436 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2437 	/*
2438 	 * We can do the cleanup while holding the interlock.
2439 	 * It is ok because:
2440 	 * 1. current_thread is the previous inheritor and it is running
2441 	 * 2. new inheritor is NULL.
2442 	 * => No chain of turnstiles needs to be updated.
2443 	 */
2444 	turnstile_cleanup();
2445 }
2446 
2447 __abortlike
2448 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2449 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2450 {
2451 	if (open) {
2452 		panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2453 	} else {
2454 		panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2455 	}
2456 }
2457 
2458 static void
gate_open(gate_t * gate)2459 gate_open(gate_t *gate)
2460 {
2461 	uintptr_t state;
2462 	thread_t holder;
2463 	bool waiters;
2464 	thread_t thread = current_thread();
2465 
2466 	gate_verify(gate);
2467 	if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2468 		return;
2469 	}
2470 
2471 	gate_ilock(gate);
2472 	state = ordered_load_gate(gate);
2473 	holder = GATE_STATE_TO_THREAD(state);
2474 	waiters = gate_has_waiter_bit(state);
2475 
2476 	if (holder != thread) {
2477 		gate_not_owned_panic(gate, holder, true);
2478 	}
2479 
2480 	if (waiters) {
2481 		gate_open_turnstile(gate);
2482 	}
2483 
2484 	state = GATE_ILOCK;
2485 	ordered_store_gate(gate, state);
2486 
2487 	gate_iunlock(gate);
2488 }
2489 
2490 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2491 gate_handoff_turnstile(gate_t *gate,
2492     int flags,
2493     thread_t *thread_woken_up,
2494     bool *waiters)
2495 {
2496 	struct turnstile *ts = NULL;
2497 	kern_return_t ret = KERN_FAILURE;
2498 	thread_t hp_thread;
2499 
2500 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2501 	/*
2502 	 * Wake up the higest priority thread waiting on the gate
2503 	 */
2504 	hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
2505 
2506 	if (hp_thread != NULL) {
2507 		/*
2508 		 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2509 		 */
2510 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2511 		*thread_woken_up = hp_thread;
2512 		*waiters = turnstile_has_waiters(ts);
2513 		/*
2514 		 * Note: hp_thread is the new holder and the new inheritor.
2515 		 * In case there are no more waiters, it doesn't need to be the inheritor
2516 		 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2517 		 * handoff can go through the fast path.
2518 		 * We could set the inheritor to NULL here, or the new holder itself can set it
2519 		 * on its way back from the sleep. In the latter case there are more chanses that
2520 		 * new waiters will come by, avoiding to do the opearation at all.
2521 		 */
2522 		ret = KERN_SUCCESS;
2523 	} else {
2524 		/*
2525 		 * waiters can have been woken up by an interrupt and still not
2526 		 * have updated gate->waiters, so we couldn't find them on the waitq.
2527 		 * Update the inheritor to NULL here, so that the current thread can return to userspace
2528 		 * indipendently from when the interrupted waiters will finish the wait.
2529 		 */
2530 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2531 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2532 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2533 		}
2534 		// there are no waiters.
2535 		ret = KERN_NOT_WAITING;
2536 	}
2537 
2538 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2539 
2540 	/*
2541 	 * We can do the cleanup while holding the interlock.
2542 	 * It is ok because:
2543 	 * 1. current_thread is the previous inheritor and it is running
2544 	 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2545 	 *    of the gate before trying to sleep.
2546 	 * => No chain of turnstiles needs to be updated.
2547 	 */
2548 	turnstile_cleanup();
2549 
2550 	return ret;
2551 }
2552 
2553 static kern_return_t
gate_handoff(gate_t * gate,int flags)2554 gate_handoff(gate_t *gate,
2555     int flags)
2556 {
2557 	kern_return_t ret;
2558 	thread_t new_holder = NULL;
2559 	uintptr_t state;
2560 	thread_t holder;
2561 	bool waiters;
2562 	thread_t thread = current_thread();
2563 
2564 	assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2565 	gate_verify(gate);
2566 
2567 	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2568 		if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2569 			//gate opened but there were no waiters, so return KERN_NOT_WAITING.
2570 			return KERN_NOT_WAITING;
2571 		}
2572 	}
2573 
2574 	gate_ilock(gate);
2575 	state = ordered_load_gate(gate);
2576 	holder = GATE_STATE_TO_THREAD(state);
2577 	waiters = gate_has_waiter_bit(state);
2578 
2579 	if (holder != current_thread()) {
2580 		gate_not_owned_panic(gate, holder, false);
2581 	}
2582 
2583 	if (waiters) {
2584 		ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2585 		if (ret == KERN_SUCCESS) {
2586 			state = GATE_THREAD_TO_STATE(new_holder);
2587 			if (waiters) {
2588 				state |= GATE_WAITERS;
2589 			}
2590 		} else {
2591 			if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2592 				state = 0;
2593 			}
2594 		}
2595 	} else {
2596 		if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2597 			state = 0;
2598 		}
2599 		ret = KERN_NOT_WAITING;
2600 	}
2601 	state |= GATE_ILOCK;
2602 	ordered_store_gate(gate, state);
2603 
2604 	gate_iunlock(gate);
2605 
2606 	if (new_holder) {
2607 		thread_deallocate(new_holder);
2608 	}
2609 	return ret;
2610 }
2611 
2612 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2613 gate_steal_turnstile(gate_t *gate,
2614     thread_t new_inheritor)
2615 {
2616 	struct turnstile *ts = NULL;
2617 
2618 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2619 
2620 	turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2621 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2622 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2623 
2624 	/*
2625 	 * turnstile_cleanup might need to update the chain of the old holder.
2626 	 * This operation should happen without the turnstile interlock held.
2627 	 */
2628 	return turnstile_cleanup;
2629 }
2630 
2631 __abortlike
2632 static void
gate_not_closed_panic(gate_t * gate,bool wait)2633 gate_not_closed_panic(gate_t *gate, bool wait)
2634 {
2635 	if (wait) {
2636 		panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2637 	} else {
2638 		panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2639 	}
2640 }
2641 
2642 static void
gate_steal(gate_t * gate)2643 gate_steal(gate_t *gate)
2644 {
2645 	uintptr_t state;
2646 	thread_t holder;
2647 	thread_t thread = current_thread();
2648 	bool waiters;
2649 
2650 	void_func_void func_after_interlock_unlock;
2651 
2652 	gate_verify(gate);
2653 
2654 	gate_ilock(gate);
2655 	state = ordered_load_gate(gate);
2656 	holder = GATE_STATE_TO_THREAD(state);
2657 	waiters = gate_has_waiter_bit(state);
2658 
2659 	if (holder == NULL) {
2660 		gate_not_closed_panic(gate, false);
2661 	}
2662 
2663 	state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2664 	if (waiters) {
2665 		state |= GATE_WAITERS;
2666 		ordered_store_gate(gate, state);
2667 		func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2668 		gate_iunlock(gate);
2669 
2670 		func_after_interlock_unlock();
2671 	} else {
2672 		ordered_store_gate(gate, state);
2673 		gate_iunlock(gate);
2674 	}
2675 }
2676 
2677 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2678 gate_wait_turnstile(gate_t *gate,
2679     wait_interrupt_t interruptible,
2680     uint64_t deadline,
2681     thread_t holder,
2682     wait_result_t* wait,
2683     bool* waiters)
2684 {
2685 	struct turnstile *ts;
2686 	uintptr_t state;
2687 
2688 	ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2689 
2690 	turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2691 	waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2692 
2693 	gate_iunlock(gate);
2694 
2695 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2696 
2697 	*wait = thread_block(THREAD_CONTINUE_NULL);
2698 
2699 	gate_ilock(gate);
2700 
2701 	*waiters = turnstile_has_waiters(ts);
2702 
2703 	if (!*waiters) {
2704 		/*
2705 		 * We want to enable the fast path as soon as we see that there are no more waiters.
2706 		 * On the fast path the holder will not do any turnstile operations.
2707 		 * Set the inheritor as NULL here.
2708 		 *
2709 		 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2710 		 * already been set to NULL.
2711 		 */
2712 		state = ordered_load_gate(gate);
2713 		holder = GATE_STATE_TO_THREAD(state);
2714 		if (holder &&
2715 		    ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2716 		    holder == current_thread())) {     // thread was woken up and it is the new holder
2717 			turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2718 			turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2719 		}
2720 	}
2721 
2722 	turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2723 
2724 	/*
2725 	 * turnstile_cleanup might need to update the chain of the old holder.
2726 	 * This operation should happen without the turnstile primitive interlock held.
2727 	 */
2728 	return turnstile_cleanup;
2729 }
2730 
2731 static void
gate_free_internal(gate_t * gate)2732 gate_free_internal(gate_t *gate)
2733 {
2734 	zfree(KT_GATE, gate);
2735 }
2736 
2737 __abortlike
2738 static void
gate_too_many_refs_panic(gate_t * gate)2739 gate_too_many_refs_panic(gate_t *gate)
2740 {
2741 	panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2742 }
2743 
2744 static gate_wait_result_t
2745 gate_wait(gate_t* gate,
2746     wait_interrupt_t interruptible,
2747     uint64_t deadline,
2748     void (^primitive_unlock)(void),
2749     void (^primitive_lock)(void))
2750 {
2751 	gate_wait_result_t ret;
2752 	void_func_void func_after_interlock_unlock;
2753 	wait_result_t wait_result;
2754 	uintptr_t state;
2755 	thread_t holder;
2756 	bool waiters;
2757 
2758 	gate_verify(gate);
2759 
2760 	gate_ilock(gate);
2761 	state = ordered_load_gate(gate);
2762 	holder = GATE_STATE_TO_THREAD(state);
2763 
2764 	if (holder == NULL) {
2765 		gate_not_closed_panic(gate, true);
2766 	}
2767 
2768 	/*
2769 	 * Get a ref on the gate so it will not
2770 	 * be freed while we are coming back from the sleep.
2771 	 */
2772 	if (gate->gt_refs == UINT16_MAX) {
2773 		gate_too_many_refs_panic(gate);
2774 	}
2775 	gate->gt_refs++;
2776 	state |= GATE_WAITERS;
2777 	ordered_store_gate(gate, state);
2778 
2779 	/*
2780 	 * Release the primitive lock before any
2781 	 * turnstile operation. Turnstile
2782 	 * does not support a blocking primitive as
2783 	 * interlock.
2784 	 *
2785 	 * In this way, concurrent threads will be
2786 	 * able to acquire the primitive lock
2787 	 * but still will wait for me through the
2788 	 * gate interlock.
2789 	 */
2790 	primitive_unlock();
2791 
2792 	func_after_interlock_unlock = gate_wait_turnstile(    gate,
2793 	    interruptible,
2794 	    deadline,
2795 	    holder,
2796 	    &wait_result,
2797 	    &waiters);
2798 
2799 	state = ordered_load_gate(gate);
2800 	holder = GATE_STATE_TO_THREAD(state);
2801 
2802 	switch (wait_result) {
2803 	case THREAD_INTERRUPTED:
2804 	case THREAD_TIMED_OUT:
2805 		assert(holder != current_thread());
2806 
2807 		if (waiters) {
2808 			state |= GATE_WAITERS;
2809 		} else {
2810 			state &= ~GATE_WAITERS;
2811 		}
2812 		ordered_store_gate(gate, state);
2813 
2814 		if (wait_result == THREAD_INTERRUPTED) {
2815 			ret = GATE_INTERRUPTED;
2816 		} else {
2817 			ret = GATE_TIMED_OUT;
2818 		}
2819 		break;
2820 	default:
2821 		/*
2822 		 * Note it is possible that even if the gate was handed off to
2823 		 * me, someone called gate_steal() before I woke up.
2824 		 *
2825 		 * As well as it is possible that the gate was opened, but someone
2826 		 * closed it while I was waking up.
2827 		 *
2828 		 * In both cases we return GATE_OPENED, as the gate was opened to me
2829 		 * at one point, it is the caller responsibility to check again if
2830 		 * the gate is open.
2831 		 */
2832 		if (holder == current_thread()) {
2833 			ret = GATE_HANDOFF;
2834 		} else {
2835 			ret = GATE_OPENED;
2836 		}
2837 		break;
2838 	}
2839 
2840 	assert(gate->gt_refs > 0);
2841 	uint32_t ref = --gate->gt_refs;
2842 	bool to_free = gate->gt_alloc;
2843 	gate_iunlock(gate);
2844 
2845 	if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2846 		if (to_free == true) {
2847 			assert(!waiters);
2848 			if (ref == 0) {
2849 				gate_free_internal(gate);
2850 			}
2851 			ret = GATE_OPENED;
2852 		} else {
2853 			gate_verify_destroy_panic(gate);
2854 		}
2855 	}
2856 
2857 	/*
2858 	 * turnstile func that needs to be executed without
2859 	 * holding the primitive interlock
2860 	 */
2861 	func_after_interlock_unlock();
2862 
2863 	primitive_lock();
2864 
2865 	return ret;
2866 }
2867 
2868 static void
gate_assert(gate_t * gate,int flags)2869 gate_assert(gate_t *gate, int flags)
2870 {
2871 	uintptr_t state;
2872 	thread_t holder;
2873 
2874 	gate_verify(gate);
2875 
2876 	gate_ilock(gate);
2877 	state = ordered_load_gate(gate);
2878 	holder = GATE_STATE_TO_THREAD(state);
2879 
2880 	switch (flags) {
2881 	case GATE_ASSERT_CLOSED:
2882 		assert(holder != NULL);
2883 		break;
2884 	case GATE_ASSERT_OPEN:
2885 		assert(holder == NULL);
2886 		break;
2887 	case GATE_ASSERT_HELD:
2888 		assert(holder == current_thread());
2889 		break;
2890 	default:
2891 		panic("invalid %s flag %d", __func__, flags);
2892 	}
2893 
2894 	gate_iunlock(gate);
2895 }
2896 
2897 enum {
2898 	GT_INIT_DEFAULT = 0,
2899 	GT_INIT_ALLOC
2900 };
2901 
2902 static void
gate_init(gate_t * gate,uint type)2903 gate_init(gate_t *gate, uint type)
2904 {
2905 	bzero(gate, sizeof(gate_t));
2906 
2907 	gate->gt_data = 0;
2908 	gate->gt_turnstile = NULL;
2909 	gate->gt_refs = 1;
2910 	switch (type) {
2911 	case GT_INIT_ALLOC:
2912 		gate->gt_alloc = 1;
2913 		break;
2914 	default:
2915 		gate->gt_alloc = 0;
2916 		break;
2917 	}
2918 	gate->gt_type = GATE_TYPE;
2919 	gate->gt_flags_pad = 0;
2920 }
2921 
2922 static gate_t*
gate_alloc_init(void)2923 gate_alloc_init(void)
2924 {
2925 	gate_t *gate;
2926 	gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2927 	gate_init(gate, GT_INIT_ALLOC);
2928 	return gate;
2929 }
2930 
2931 __abortlike
2932 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2933 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2934 {
2935 	panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2936 }
2937 
2938 __abortlike
2939 static void
gate_destroy_waiter_panic(gate_t * gate)2940 gate_destroy_waiter_panic(gate_t *gate)
2941 {
2942 	panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2943 }
2944 
2945 static uint16_t
gate_destroy_internal(gate_t * gate)2946 gate_destroy_internal(gate_t *gate)
2947 {
2948 	uintptr_t state;
2949 	thread_t holder;
2950 	uint16_t ref;
2951 
2952 	gate_ilock(gate);
2953 	state = ordered_load_gate(gate);
2954 	holder = GATE_STATE_TO_THREAD(state);
2955 
2956 	/*
2957 	 * The gate must be open
2958 	 * and all the threads must
2959 	 * have been woken up by this time
2960 	 */
2961 	if (holder != NULL) {
2962 		gate_destroy_owned_panic(gate, holder);
2963 	}
2964 	if (gate_has_waiter_bit(state)) {
2965 		gate_destroy_waiter_panic(gate);
2966 	}
2967 
2968 	assert(gate->gt_refs > 0);
2969 
2970 	ref = --gate->gt_refs;
2971 
2972 	/*
2973 	 * Mark the gate as destroyed.
2974 	 * The interlock bit still need
2975 	 * to be available to let the
2976 	 * last wokenup threads to clear
2977 	 * the wait.
2978 	 */
2979 	state = GATE_DESTROYED;
2980 	state |= GATE_ILOCK;
2981 	ordered_store_gate(gate, state);
2982 	gate_iunlock(gate);
2983 	return ref;
2984 }
2985 
2986 __abortlike
2987 static void
gate_destroy_panic(gate_t * gate)2988 gate_destroy_panic(gate_t *gate)
2989 {
2990 	panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2991 }
2992 
2993 static void
gate_destroy(gate_t * gate)2994 gate_destroy(gate_t *gate)
2995 {
2996 	gate_verify(gate);
2997 	if (gate->gt_alloc == 1) {
2998 		gate_destroy_panic(gate);
2999 	}
3000 	gate_destroy_internal(gate);
3001 }
3002 
3003 __abortlike
3004 static void
gate_free_panic(gate_t * gate)3005 gate_free_panic(gate_t *gate)
3006 {
3007 	panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
3008 }
3009 
3010 static void
gate_free(gate_t * gate)3011 gate_free(gate_t *gate)
3012 {
3013 	uint16_t ref;
3014 
3015 	gate_verify(gate);
3016 
3017 	if (gate->gt_alloc == 0) {
3018 		gate_free_panic(gate);
3019 	}
3020 
3021 	ref = gate_destroy_internal(gate);
3022 	/*
3023 	 * Some of the threads waiting on the gate
3024 	 * might still need to run after being woken up.
3025 	 * They will access the gate to cleanup the
3026 	 * state, so we cannot free it.
3027 	 * The last waiter will free the gate in this case.
3028 	 */
3029 	if (ref == 0) {
3030 		gate_free_internal(gate);
3031 	}
3032 }
3033 
3034 /*
3035  * Name: lck_rw_gate_init
3036  *
3037  * Description: initializes a variable declared with decl_lck_rw_gate_data.
3038  *
3039  * Args:
3040  *   Arg1: lck_rw_t lock used to protect the gate.
3041  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3042  */
3043 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)3044 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
3045 {
3046 	(void) lock;
3047 	gate_init(gate, GT_INIT_DEFAULT);
3048 }
3049 
3050 /*
3051  * Name: lck_rw_gate_alloc_init
3052  *
3053  * Description: allocates and initializes a gate_t.
3054  *
3055  * Args:
3056  *   Arg1: lck_rw_t lock used to protect the gate.
3057  *
3058  * Returns:
3059  *         gate_t allocated.
3060  */
3061 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)3062 lck_rw_gate_alloc_init(lck_rw_t *lock)
3063 {
3064 	(void) lock;
3065 	return gate_alloc_init();
3066 }
3067 
3068 /*
3069  * Name: lck_rw_gate_destroy
3070  *
3071  * Description: destroys a variable previously initialized
3072  *              with lck_rw_gate_init().
3073  *
3074  * Args:
3075  *   Arg1: lck_rw_t lock used to protect the gate.
3076  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3077  */
3078 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)3079 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
3080 {
3081 	(void) lock;
3082 	gate_destroy(gate);
3083 }
3084 
3085 /*
3086  * Name: lck_rw_gate_free
3087  *
3088  * Description: destroys and tries to free a gate previously allocated
3089  *              with lck_rw_gate_alloc_init().
3090  *              The gate free might be delegated to the last thread returning
3091  *              from the gate_wait().
3092  *
3093  * Args:
3094  *   Arg1: lck_rw_t lock used to protect the gate.
3095  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3096  */
3097 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)3098 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
3099 {
3100 	(void) lock;
3101 	gate_free(gate);
3102 }
3103 
3104 /*
3105  * Name: lck_rw_gate_try_close
3106  *
3107  * Description: Tries to close the gate.
3108  *              In case of success the current thread will be set as
3109  *              the holder of the gate.
3110  *
3111  * Args:
3112  *   Arg1: lck_rw_t lock used to protect the gate.
3113  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3114  *
3115  * Conditions: Lock must be held. Returns with the lock held.
3116  *
3117  * Returns:
3118  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3119  *          of the gate.
3120  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
3121  *          to wake up possible waiters on the gate before returning to userspace.
3122  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3123  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
3124  *
3125  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3126  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3127  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
3128  *          be done without dropping the lock that is protecting the gate in between.
3129  */
3130 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)3131 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
3132 {
3133 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3134 
3135 	return gate_try_close(gate);
3136 }
3137 
3138 /*
3139  * Name: lck_rw_gate_close
3140  *
3141  * Description: Closes the gate. The current thread will be set as
3142  *              the holder of the gate. Will panic if the gate is already closed.
3143  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
3144  *              to wake up possible waiters on the gate before returning to userspace.
3145  *
3146  * Args:
3147  *   Arg1: lck_rw_t lock used to protect the gate.
3148  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3149  *
3150  * Conditions: Lock must be held. Returns with the lock held.
3151  *             The gate must be open.
3152  *
3153  */
3154 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)3155 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
3156 {
3157 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3158 
3159 	return gate_close(gate);
3160 }
3161 
3162 /*
3163  * Name: lck_rw_gate_open
3164  *
3165  * Description: Opens the gate and wakes up possible waiters.
3166  *
3167  * Args:
3168  *   Arg1: lck_rw_t lock used to protect the gate.
3169  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3170  *
3171  * Conditions: Lock must be held. Returns with the lock held.
3172  *             The current thread must be the holder of the gate.
3173  *
3174  */
3175 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)3176 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
3177 {
3178 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3179 
3180 	gate_open(gate);
3181 }
3182 
3183 /*
3184  * Name: lck_rw_gate_handoff
3185  *
3186  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3187  *              priority will be selected as the new holder of the gate, and woken up,
3188  *              with the gate remaining in the closed state throughout.
3189  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3190  *              will be returned.
3191  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3192  *              case no waiters were found.
3193  *
3194  *
3195  * Args:
3196  *   Arg1: lck_rw_t lock used to protect the gate.
3197  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3198  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3199  *
3200  * Conditions: Lock must be held. Returns with the lock held.
3201  *             The current thread must be the holder of the gate.
3202  *
3203  * Returns:
3204  *          KERN_SUCCESS in case one of the waiters became the new holder.
3205  *          KERN_NOT_WAITING in case there were no waiters.
3206  *
3207  */
3208 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)3209 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3210 {
3211 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3212 
3213 	return gate_handoff(gate, flags);
3214 }
3215 
3216 /*
3217  * Name: lck_rw_gate_steal
3218  *
3219  * Description: Set the current ownership of the gate. It sets the current thread as the
3220  *              new holder of the gate.
3221  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
3222  *              to wake up possible waiters on the gate before returning to userspace.
3223  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
3224  *              anymore.
3225  *
3226  *
3227  * Args:
3228  *   Arg1: lck_rw_t lock used to protect the gate.
3229  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3230  *
3231  * Conditions: Lock must be held. Returns with the lock held.
3232  *             The gate must be closed and the current thread must not already be the holder.
3233  *
3234  */
3235 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)3236 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
3237 {
3238 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3239 
3240 	gate_steal(gate);
3241 }
3242 
3243 /*
3244  * Name: lck_rw_gate_wait
3245  *
3246  * Description: Waits for the current thread to become the holder of the gate or for the
3247  *              gate to become open. An interruptible mode and deadline can be specified
3248  *              to return earlier from the wait.
3249  *
3250  * Args:
3251  *   Arg1: lck_rw_t lock used to protect the gate.
3252  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3253  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
3254  *   Arg3: interruptible flag for wait.
3255  *   Arg4: deadline
3256  *
3257  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3258  *             Lock will be dropped while waiting.
3259  *             The gate must be closed.
3260  *
3261  * Returns: Reason why the thread was woken up.
3262  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3263  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
3264  *                         to wake up possible waiters on the gate before returning to userspace.
3265  *          GATE_OPENED - the gate was opened by the holder.
3266  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3267  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3268  */
3269 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3270 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3271 {
3272 	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
3273 
3274 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3275 
3276 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3277 		return gate_wait(gate,
3278 		           interruptible,
3279 		           deadline,
3280 		           ^{lck_rw_type = lck_rw_done(lock);},
3281 		           ^{;});
3282 	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
3283 		return gate_wait(gate,
3284 		           interruptible,
3285 		           deadline,
3286 		           ^{lck_rw_type = lck_rw_done(lock);},
3287 		           ^{lck_rw_lock(lock, lck_rw_type);});
3288 	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
3289 		return gate_wait(gate,
3290 		           interruptible,
3291 		           deadline,
3292 		           ^{lck_rw_type = lck_rw_done(lock);},
3293 		           ^{lck_rw_lock_exclusive(lock);});
3294 	} else {
3295 		return gate_wait(gate,
3296 		           interruptible,
3297 		           deadline,
3298 		           ^{lck_rw_type = lck_rw_done(lock);},
3299 		           ^{lck_rw_lock_shared(lock);});
3300 	}
3301 }
3302 
3303 /*
3304  * Name: lck_rw_gate_assert
3305  *
3306  * Description: asserts that the gate is in the specified state.
3307  *
3308  * Args:
3309  *   Arg1: lck_rw_t lock used to protect the gate.
3310  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
3311  *   Arg3: flags to specified assert type.
3312  *         GATE_ASSERT_CLOSED - the gate is currently closed
3313  *         GATE_ASSERT_OPEN - the gate is currently opened
3314  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3315  */
3316 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)3317 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
3318 {
3319 	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3320 
3321 	gate_assert(gate, flags);
3322 	return;
3323 }
3324 
3325 /*
3326  * Name: lck_mtx_gate_init
3327  *
3328  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
3329  *
3330  * Args:
3331  *   Arg1: lck_mtx_t lock used to protect the gate.
3332  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3333  */
3334 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)3335 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
3336 {
3337 	(void) lock;
3338 	gate_init(gate, GT_INIT_DEFAULT);
3339 }
3340 
3341 /*
3342  * Name: lck_mtx_gate_alloc_init
3343  *
3344  * Description: allocates and initializes a gate_t.
3345  *
3346  * Args:
3347  *   Arg1: lck_mtx_t lock used to protect the gate.
3348  *
3349  * Returns:
3350  *         gate_t allocated.
3351  */
3352 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)3353 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3354 {
3355 	(void) lock;
3356 	return gate_alloc_init();
3357 }
3358 
3359 /*
3360  * Name: lck_mtx_gate_destroy
3361  *
3362  * Description: destroys a variable previously initialized
3363  *              with lck_mtx_gate_init().
3364  *
3365  * Args:
3366  *   Arg1: lck_mtx_t lock used to protect the gate.
3367  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3368  */
3369 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)3370 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3371 {
3372 	(void) lock;
3373 	gate_destroy(gate);
3374 }
3375 
3376 /*
3377  * Name: lck_mtx_gate_free
3378  *
3379  * Description: destroys and tries to free a gate previously allocated
3380  *	        with lck_mtx_gate_alloc_init().
3381  *              The gate free might be delegated to the last thread returning
3382  *              from the gate_wait().
3383  *
3384  * Args:
3385  *   Arg1: lck_mtx_t lock used to protect the gate.
3386  *   Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3387  */
3388 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3389 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3390 {
3391 	(void) lock;
3392 	gate_free(gate);
3393 }
3394 
3395 /*
3396  * Name: lck_mtx_gate_try_close
3397  *
3398  * Description: Tries to close the gate.
3399  *              In case of success the current thread will be set as
3400  *              the holder of the gate.
3401  *
3402  * Args:
3403  *   Arg1: lck_mtx_t lock used to protect the gate.
3404  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3405  *
3406  * Conditions: Lock must be held. Returns with the lock held.
3407  *
3408  * Returns:
3409  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3410  *          of the gate.
3411  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3412  *          to wake up possible waiters on the gate before returning to userspace.
3413  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3414  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3415  *
3416  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3417  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3418  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3419  *          be done without dropping the lock that is protecting the gate in between.
3420  */
3421 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3422 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3423 {
3424 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3425 
3426 	return gate_try_close(gate);
3427 }
3428 
3429 /*
3430  * Name: lck_mtx_gate_close
3431  *
3432  * Description: Closes the gate. The current thread will be set as
3433  *              the holder of the gate. Will panic if the gate is already closed.
3434  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3435  *              to wake up possible waiters on the gate before returning to userspace.
3436  *
3437  * Args:
3438  *   Arg1: lck_mtx_t lock used to protect the gate.
3439  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3440  *
3441  * Conditions: Lock must be held. Returns with the lock held.
3442  *             The gate must be open.
3443  *
3444  */
3445 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3446 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3447 {
3448 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3449 
3450 	return gate_close(gate);
3451 }
3452 
3453 /*
3454  * Name: lck_mtx_gate_open
3455  *
3456  * Description: Opens of the gate and wakes up possible waiters.
3457  *
3458  * Args:
3459  *   Arg1: lck_mtx_t lock used to protect the gate.
3460  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3461  *
3462  * Conditions: Lock must be held. Returns with the lock held.
3463  *             The current thread must be the holder of the gate.
3464  *
3465  */
3466 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3467 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3468 {
3469 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3470 
3471 	gate_open(gate);
3472 }
3473 
3474 /*
3475  * Name: lck_mtx_gate_handoff
3476  *
3477  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3478  *              priority will be selected as the new holder of the gate, and woken up,
3479  *              with the gate remaining in the closed state throughout.
3480  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3481  *              will be returned.
3482  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3483  *              case no waiters were found.
3484  *
3485  *
3486  * Args:
3487  *   Arg1: lck_mtx_t lock used to protect the gate.
3488  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3489  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3490  *
3491  * Conditions: Lock must be held. Returns with the lock held.
3492  *             The current thread must be the holder of the gate.
3493  *
3494  * Returns:
3495  *          KERN_SUCCESS in case one of the waiters became the new holder.
3496  *          KERN_NOT_WAITING in case there were no waiters.
3497  *
3498  */
3499 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3500 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3501 {
3502 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3503 
3504 	return gate_handoff(gate, flags);
3505 }
3506 
3507 /*
3508  * Name: lck_mtx_gate_steal
3509  *
3510  * Description: Steals the ownership of the gate. It sets the current thread as the
3511  *              new holder of the gate.
3512  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3513  *              to wake up possible waiters on the gate before returning to userspace.
3514  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3515  *              anymore.
3516  *
3517  *
3518  * Args:
3519  *   Arg1: lck_mtx_t lock used to protect the gate.
3520  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3521  *
3522  * Conditions: Lock must be held. Returns with the lock held.
3523  *             The gate must be closed and the current thread must not already be the holder.
3524  *
3525  */
3526 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3527 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3528 {
3529 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3530 
3531 	gate_steal(gate);
3532 }
3533 
3534 /*
3535  * Name: lck_mtx_gate_wait
3536  *
3537  * Description: Waits for the current thread to become the holder of the gate or for the
3538  *              gate to become open. An interruptible mode and deadline can be specified
3539  *              to return earlier from the wait.
3540  *
3541  * Args:
3542  *   Arg1: lck_mtx_t lock used to protect the gate.
3543  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3544  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3545  *   Arg3: interruptible flag for wait.
3546  *   Arg4: deadline
3547  *
3548  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3549  *             Lock will be dropped while waiting.
3550  *             The gate must be closed.
3551  *
3552  * Returns: Reason why the thread was woken up.
3553  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3554  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3555  *                         to wake up possible waiters on the gate before returning to userspace.
3556  *          GATE_OPENED - the gate was opened by the holder.
3557  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3558  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3559  */
3560 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3561 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3562 {
3563 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3564 
3565 	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3566 		return gate_wait(gate,
3567 		           interruptible,
3568 		           deadline,
3569 		           ^{lck_mtx_unlock(lock);},
3570 		           ^{;});
3571 	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3572 		return gate_wait(gate,
3573 		           interruptible,
3574 		           deadline,
3575 		           ^{lck_mtx_unlock(lock);},
3576 		           ^{lck_mtx_lock_spin(lock);});
3577 	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3578 		return gate_wait(gate,
3579 		           interruptible,
3580 		           deadline,
3581 		           ^{lck_mtx_unlock(lock);},
3582 		           ^{lck_mtx_lock_spin_always(lock);});
3583 	} else {
3584 		return gate_wait(gate,
3585 		           interruptible,
3586 		           deadline,
3587 		           ^{lck_mtx_unlock(lock);},
3588 		           ^{lck_mtx_lock(lock);});
3589 	}
3590 }
3591 
3592 /*
3593  * Name: lck_mtx_gate_assert
3594  *
3595  * Description: asserts that the gate is in the specified state.
3596  *
3597  * Args:
3598  *   Arg1: lck_mtx_t lock used to protect the gate.
3599  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3600  *   Arg3: flags to specified assert type.
3601  *         GATE_ASSERT_CLOSED - the gate is currently closed
3602  *         GATE_ASSERT_OPEN - the gate is currently opened
3603  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3604  */
3605 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3606 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3607 {
3608 	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3609 
3610 	gate_assert(gate, flags);
3611 }
3612 
3613 #pragma mark - LCK_*_DECLARE support
3614 
3615 __startup_func
3616 void
lck_grp_attr_startup_init(struct lck_grp_attr_startup_spec * sp)3617 lck_grp_attr_startup_init(struct lck_grp_attr_startup_spec *sp)
3618 {
3619 	lck_grp_attr_t *attr = sp->grp_attr;
3620 	lck_grp_attr_setdefault(attr);
3621 	attr->grp_attr_val |= sp->grp_attr_set_flags;
3622 	attr->grp_attr_val &= ~sp->grp_attr_clear_flags;
3623 }
3624 
3625 __startup_func
3626 void
lck_grp_startup_init(struct lck_grp_startup_spec * sp)3627 lck_grp_startup_init(struct lck_grp_startup_spec *sp)
3628 {
3629 	lck_grp_init(sp->grp, sp->grp_name, sp->grp_attr);
3630 }
3631 
3632 __startup_func
3633 void
lck_attr_startup_init(struct lck_attr_startup_spec * sp)3634 lck_attr_startup_init(struct lck_attr_startup_spec *sp)
3635 {
3636 	lck_attr_t *attr = sp->lck_attr;
3637 	lck_attr_setdefault(attr);
3638 	attr->lck_attr_val |= sp->lck_attr_set_flags;
3639 	attr->lck_attr_val &= ~sp->lck_attr_clear_flags;
3640 }
3641 
3642 __startup_func
3643 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3644 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3645 {
3646 	lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3647 }
3648 
3649 __startup_func
3650 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3651 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3652 {
3653 	if (sp->lck_ext) {
3654 		lck_mtx_init_ext(sp->lck, sp->lck_ext, sp->lck_grp, sp->lck_attr);
3655 	} else {
3656 		lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3657 	}
3658 }
3659 
3660 __startup_func
3661 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3662 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3663 {
3664 	lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3665 }
3666 
3667 __startup_func
3668 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3669 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3670 {
3671 	simple_lock_init(sp->lck, sp->lck_init_arg);
3672 }
3673