1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #define LOCK_PRIVATE 1
58
59 #include <mach_ldebug.h>
60 #include <debug.h>
61
62 #include <mach/kern_return.h>
63
64 #include <kern/lock_stat.h>
65 #include <kern/locks.h>
66 #include <kern/misc_protos.h>
67 #include <kern/zalloc.h>
68 #include <kern/thread.h>
69 #include <kern/processor.h>
70 #include <kern/sched_prim.h>
71 #include <kern/debug.h>
72 #include <libkern/section_keywords.h>
73 #include <machine/atomic.h>
74 #include <machine/machine_cpu.h>
75 #include <string.h>
76 #include <vm/pmap.h>
77
78 #include <sys/kdebug.h>
79
80 #define LCK_MTX_SLEEP_CODE 0
81 #define LCK_MTX_SLEEP_DEADLINE_CODE 1
82 #define LCK_MTX_LCK_WAIT_CODE 2
83 #define LCK_MTX_UNLCK_WAKEUP_CODE 3
84
85 #if MACH_LDEBUG
86 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
87 #else
88 #define ALIGN_TEST(p, t) do{}while(0)
89 #endif
90
91 #define NOINLINE __attribute__((noinline))
92
93 #define ordered_load_hw(lock) os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
94 #define ordered_store_hw(lock, value) os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
95
96 KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
97
98 struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
99 volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
100
101 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
102
103 #if DEBUG
104 TUNABLE(uint32_t, LcksOpts, "lcks", enaLkDeb);
105 #else
106 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
107 #endif
108
109 #if CONFIG_DTRACE
110 #if defined (__x86_64__)
111 uint32_t _Atomic dtrace_spin_threshold = 500; // 500ns
112 #define lock_enable_preemption enable_preemption
113 #elif defined(__arm__) || defined(__arm64__)
114 MACHINE_TIMEOUT32(dtrace_spin_threshold, "dtrace-spin-threshold",
115 0xC /* 12 ticks == 500ns with 24MHz OSC */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
116 #endif
117 #endif
118
119 uintptr_t
unslide_for_kdebug(void * object)120 unslide_for_kdebug(void* object)
121 {
122 if (__improbable(kdebug_enable)) {
123 return VM_KERNEL_UNSLIDE_OR_PERM(object);
124 } else {
125 return 0;
126 }
127 }
128
129 static __abortlike void
__lck_require_preemption_disabled_panic(void * lock)130 __lck_require_preemption_disabled_panic(void *lock)
131 {
132 panic("Attempt to take no-preempt lock %p in preemptible context", lock);
133 }
134
135 static inline void
__lck_require_preemption_disabled(void * lock,thread_t self __unused)136 __lck_require_preemption_disabled(void *lock, thread_t self __unused)
137 {
138 if (__improbable(!lock_preemption_disabled_for_thread(self))) {
139 __lck_require_preemption_disabled_panic(lock);
140 }
141 }
142
143 /*
144 * Routine: hw_lock_init
145 *
146 * Initialize a hardware lock.
147 */
148 MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)149 hw_lock_init(hw_lock_t lock)
150 {
151 ordered_store_hw(lock, 0);
152 }
153
154 __result_use_check
155 static inline bool
hw_lock_trylock_contended(hw_lock_t lock,uintptr_t newval)156 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
157 {
158 #if OS_ATOMIC_USE_LLSC
159 uintptr_t oldval;
160 os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
161 if (oldval != 0) {
162 wait_for_event(); // clears the monitor so we don't need give_up()
163 return false;
164 }
165 });
166 return true;
167 #else // !OS_ATOMIC_USE_LLSC
168 #if OS_ATOMIC_HAS_LLSC
169 uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
170 if (oldval != 0) {
171 wait_for_event(); // clears the monitor so we don't need give_up()
172 return false;
173 }
174 #elif LOCK_PRETEST
175 if (ordered_load_hw(lock) != 0) {
176 return false;
177 }
178 #endif
179 return os_atomic_cmpxchg(&lock->lock_data, 0, newval, acquire);
180 #endif // !OS_ATOMIC_USE_LLSC
181 }
182
183 /*
184 * Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
185 */
186 __attribute__((always_inline))
187 uint64_t
188 #if INTERRUPT_MASKED_DEBUG
hw_lock_compute_timeout(uint64_t in_timeout,uint64_t default_timeout,__unused bool in_ppl,__unused bool interruptible)189 hw_lock_compute_timeout(uint64_t in_timeout, uint64_t default_timeout, __unused bool in_ppl, __unused bool interruptible)
190 #else
191 hw_lock_compute_timeout(uint64_t in_timeout, uint64_t default_timeout)
192 #endif /* INTERRUPT_MASKED_DEBUG */
193 {
194 uint64_t timeout = in_timeout;
195 if (timeout == 0) {
196 timeout = default_timeout;
197 #if INTERRUPT_MASKED_DEBUG
198 #ifndef KASAN
199 if (timeout > 0 && !in_ppl) {
200 if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC && !interruptible) {
201 uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
202 if (int_timeout < timeout) {
203 timeout = int_timeout;
204 }
205 }
206 }
207 #endif /* !KASAN */
208 #endif /* INTERRUPT_MASKED_DEBUG */
209 }
210
211 return timeout;
212 }
213
214 __attribute__((always_inline))
215 void
lck_spinlock_timeout_set_orig_owner(uintptr_t owner)216 lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
217 {
218 #if DEBUG || DEVELOPMENT
219 PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~0x7ul;
220 #else
221 (void)owner;
222 #endif
223 }
224
225 lck_spinlock_to_info_t
lck_spinlock_timeout_hit(void * lck,uintptr_t owner)226 lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
227 {
228 lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
229
230 /* strip possible bits used by the lock implementations */
231 owner &= ~0x7ul;
232
233 lsti->lock = lck;
234 lsti->owner_thread_cur = owner;
235 lsti->owner_cpu = ~0u;
236 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
237
238 if (owner == 0) {
239 /* if the owner isn't known, just bail */
240 goto out;
241 }
242
243 for (uint32_t i = 0; i <= ml_early_cpu_max_number(); i++) {
244 cpu_data_t *data = cpu_datap(i);
245 if (data && (uintptr_t)data->cpu_active_thread == owner) {
246 lsti->owner_cpu = i;
247 os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
248 #if __x86_64__
249 if ((uint32_t)cpu_number() != i) {
250 /* Cause NMI and panic on the owner's cpu */
251 NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
252 }
253 #endif
254 break;
255 }
256 }
257
258 out:
259 return lsti;
260 }
261
262 __result_use_check
263 static inline bool
hw_lock_trylock_bit(uint32_t * target,unsigned int bit,bool wait)264 hw_lock_trylock_bit(uint32_t *target, unsigned int bit, bool wait)
265 {
266 uint32_t mask = 1u << bit;
267
268 #if OS_ATOMIC_USE_LLSC || !OS_ATOMIC_HAS_LLSC
269 uint32_t oldval, newval;
270 os_atomic_rmw_loop(target, oldval, newval, acquire, {
271 newval = oldval | mask;
272 if (__improbable(oldval & mask)) {
273 #if OS_ATOMIC_HAS_LLSC
274 if (wait) {
275 wait_for_event(); // clears the monitor so we don't need give_up()
276 } else {
277 os_atomic_clear_exclusive();
278 }
279 #else
280 if (wait) {
281 cpu_pause();
282 }
283 #endif
284 return false;
285 }
286 });
287 return true;
288 #else
289 uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
290 if (__improbable(oldval & mask)) {
291 if (wait) {
292 wait_for_event(); // clears the monitor so we don't need give_up()
293 } else {
294 os_atomic_clear_exclusive();
295 }
296 return false;
297 }
298 return (os_atomic_or_orig(target, mask, acquire) & mask) == 0;
299 #endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
300 }
301
302 static hw_lock_timeout_status_t
hw_lock_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)303 hw_lock_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
304 {
305 #pragma unused(interrupt_time)
306
307 hw_lock_t lock = _lock;
308 uintptr_t owner = lock->lock_data & ~0x7ul;
309 lck_spinlock_to_info_t lsti;
310
311 if (!spinlock_timeout_panic) {
312 /* keep spinning rather than panicing */
313 return HW_LOCK_TIMEOUT_CONTINUE;
314 }
315
316 if (pmap_in_ppl()) {
317 /*
318 * This code is used by the PPL and can't write to globals.
319 */
320 panic("Spinlock[%p] timeout after %llu ticks; "
321 "current owner: %p, "
322 "start time: %llu, now: %llu, timeout: %llu",
323 lock, now - start, (void *)owner,
324 start, now, timeout);
325 }
326
327 // Capture the actual time spent blocked, which may be higher than the timeout
328 // if a misbehaving interrupt stole this thread's CPU time.
329 lsti = lck_spinlock_timeout_hit(lock, owner);
330 panic("Spinlock[%p] timeout after %llu ticks; "
331 "current owner: %p (on cpu %d), "
332 #if DEBUG || DEVELOPMENT
333 "initial owner: %p, "
334 #endif /* DEBUG || DEVELOPMENT */
335 #if INTERRUPT_MASKED_DEBUG
336 "interrupt time: %llu, "
337 #endif /* INTERRUPT_MASKED_DEBUG */
338 "start time: %llu, now: %llu, timeout: %llu",
339 lock, now - start,
340 (void *)lsti->owner_thread_cur, lsti->owner_cpu,
341 #if DEBUG || DEVELOPMENT
342 (void *)lsti->owner_thread_orig,
343 #endif /* DEBUG || DEVELOPMENT */
344 #if INTERRUPT_MASKED_DEBUG
345 interrupt_time,
346 #endif /* INTERRUPT_MASKED_DEBUG */
347 start, now, timeout);
348 }
349
350 static hw_lock_timeout_status_t
hw_lock_bit_timeout_panic(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)351 hw_lock_bit_timeout_panic(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
352 {
353 #pragma unused(interrupt_time)
354
355 hw_lock_t lock = _lock;
356 uintptr_t state = lock->lock_data;
357
358 if (!spinlock_timeout_panic) {
359 /* keep spinning rather than panicing */
360 return HW_LOCK_TIMEOUT_CONTINUE;
361 }
362
363 panic("Spinlock[%p] timeout after %llu ticks; "
364 "current state: %p, "
365 #if INTERRUPT_MASKED_DEBUG
366 "interrupt time: %llu, "
367 #endif /* INTERRUPT_MASKED_DEBUG */
368 "start time: %llu, now: %llu, timeout: %llu",
369 lock, now - start, (void*) state,
370 #if INTERRUPT_MASKED_DEBUG
371 interrupt_time,
372 #endif /* INTERRUPT_MASKED_DEBUG */
373 start, now, timeout);
374 }
375
376 /*
377 * Routine: hw_lock_lock_contended
378 *
379 * Spin until lock is acquired or timeout expires.
380 * timeout is in mach_absolute_time ticks. Called with
381 * preemption disabled.
382 */
383 static hw_lock_status_t NOINLINE
hw_lock_lock_contended(hw_lock_t lock,thread_t thread,uintptr_t data,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))384 hw_lock_lock_contended(hw_lock_t lock, thread_t thread, uintptr_t data, uint64_t timeout,
385 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
386 {
387 #pragma unused(thread)
388
389 uint64_t end = 0, start = 0, interrupts = 0;
390 uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
391 bool has_timeout = true, in_ppl = pmap_in_ppl();
392 #if INTERRUPT_MASKED_DEBUG
393 /* Note we can't check if we are interruptible if in ppl */
394 bool interruptible = !in_ppl && ml_get_interrupts_enabled();
395 uint64_t start_interrupts = 0;
396 #endif /* INTERRUPT_MASKED_DEBUG */
397
398 #if CONFIG_DTRACE || LOCK_STATS
399 uint64_t begin = 0;
400 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
401
402 if (__improbable(stat_enabled)) {
403 begin = mach_absolute_time();
404 }
405 #endif /* CONFIG_DTRACE || LOCK_STATS */
406
407 if (!in_ppl) {
408 /*
409 * This code is used by the PPL and can't write to globals.
410 */
411 lck_spinlock_timeout_set_orig_owner(lock->lock_data);
412 }
413
414 #if INTERRUPT_MASKED_DEBUG
415 timeout = hw_lock_compute_timeout(timeout, default_timeout, in_ppl, interruptible);
416 #else
417 timeout = hw_lock_compute_timeout(timeout, default_timeout);
418 #endif /* INTERRUPT_MASKED_DEBUG */
419 if (timeout == 0) {
420 has_timeout = false;
421 }
422
423 for (;;) {
424 for (uint32_t i = 0; i < LOCK_SNOOP_SPINS; i++) {
425 cpu_pause();
426 if (hw_lock_trylock_contended(lock, data)) {
427 #if CONFIG_DTRACE || LOCK_STATS
428 if (__improbable(stat_enabled)) {
429 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
430 mach_absolute_time() - begin);
431 }
432 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
433 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
434 #endif /* CONFIG_DTRACE || LOCK_STATS */
435 return HW_LOCK_ACQUIRED;
436 }
437 }
438 if (has_timeout) {
439 uint64_t now = ml_get_timebase();
440 if (end == 0) {
441 #if INTERRUPT_MASKED_DEBUG
442 if (interruptible) {
443 start_interrupts = thread->machine.int_time_mt;
444 }
445 #endif /* INTERRUPT_MASKED_DEBUG */
446 start = now;
447 end = now + timeout;
448 } else if (now < end) {
449 /* keep spinning */
450 } else {
451 #if INTERRUPT_MASKED_DEBUG
452 if (interruptible) {
453 interrupts = thread->machine.int_time_mt - start_interrupts;
454 }
455 #endif /* INTERRUPT_MASKED_DEBUG */
456 if (handler(lock, timeout, start, now, interrupts)) {
457 /* push the deadline */
458 end += timeout;
459 } else {
460 #if CONFIG_DTRACE || LOCK_STATS
461 if (__improbable(stat_enabled)) {
462 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
463 mach_absolute_time() - begin);
464 }
465 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
466 #endif /* CONFIG_DTRACE || LOCK_STATS */
467 return HW_LOCK_CONTENDED;
468 }
469 }
470 }
471 }
472 }
473
474 uint32_t
hw_wait_while_equals32(uint32_t * address,uint32_t current)475 hw_wait_while_equals32(uint32_t *address, uint32_t current)
476 {
477 uint32_t v;
478 uint64_t end = 0, timeout = 0;
479 uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
480 bool has_timeout = true;
481 #if INTERRUPT_MASKED_DEBUG
482 thread_t thread = current_thread();
483 bool in_ppl = pmap_in_ppl();
484 /* Note we can't check if we are interruptible if in ppl */
485 bool interruptible = !in_ppl && ml_get_interrupts_enabled();
486 uint64_t interrupts = 0, start_interrupts = 0;
487
488 timeout = hw_lock_compute_timeout(0, default_timeout, in_ppl, interruptible);
489 #else
490 timeout = hw_lock_compute_timeout(0, default_timeout);
491 #endif /* INTERRUPT_MASKED_DEBUG */
492 if (timeout == 0) {
493 has_timeout = false;
494 }
495
496 for (;;) {
497 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
498 cpu_pause();
499 #if OS_ATOMIC_HAS_LLSC
500 v = os_atomic_load_exclusive(address, relaxed);
501 if (__probable(v != current)) {
502 os_atomic_clear_exclusive();
503 return v;
504 }
505 wait_for_event();
506 #else
507 v = os_atomic_load(address, relaxed);
508 if (__probable(v != current)) {
509 return v;
510 }
511 #endif // OS_ATOMIC_HAS_LLSC
512 }
513 if (has_timeout) {
514 if (end == 0) {
515 end = ml_get_timebase() + timeout;
516 #if INTERRUPT_MASKED_DEBUG
517 if (interruptible) {
518 start_interrupts = thread->machine.int_time_mt;
519 }
520 #endif /* INTERRUPT_MASKED_DEBUG */
521 } else if (ml_get_timebase() >= end) {
522 #if INTERRUPT_MASKED_DEBUG
523 if (interruptible) {
524 interrupts = thread->machine.int_time_mt - start_interrupts;
525 panic("Wait while equals timeout @ *%p == 0x%x, "
526 "interrupt_time %llu", address, v, interrupts);
527 }
528 #endif /* INTERRUPT_MASKED_DEBUG */
529 panic("Wait while equals timeout @ *%p == 0x%x",
530 address, v);
531 }
532 }
533 }
534 }
535
536 uint64_t
hw_wait_while_equals64(uint64_t * address,uint64_t current)537 hw_wait_while_equals64(uint64_t *address, uint64_t current)
538 {
539 uint64_t v;
540 uint64_t end = 0, timeout = 0;
541 uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
542 bool has_timeout = true;
543
544 #if INTERRUPT_MASKED_DEBUG
545 thread_t thread = current_thread();
546 bool in_ppl = pmap_in_ppl();
547 /* Note we can't check if we are interruptible if in ppl */
548 bool interruptible = !in_ppl && ml_get_interrupts_enabled();
549 uint64_t interrupts = 0, start_interrupts = 0;
550
551 timeout = hw_lock_compute_timeout(0, default_timeout, in_ppl, interruptible);
552 #else
553 timeout = hw_lock_compute_timeout(0, default_timeout);
554 #endif /* INTERRUPT_MASKED_DEBUG */
555 if (timeout == 0) {
556 has_timeout = false;
557 }
558
559 for (;;) {
560 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
561 cpu_pause();
562 #if OS_ATOMIC_HAS_LLSC
563 v = os_atomic_load_exclusive(address, relaxed);
564 if (__probable(v != current)) {
565 os_atomic_clear_exclusive();
566 return v;
567 }
568 wait_for_event();
569 #else
570 v = os_atomic_load(address, relaxed);
571 if (__probable(v != current)) {
572 return v;
573 }
574 #endif // OS_ATOMIC_HAS_LLSC
575 }
576 if (has_timeout) {
577 if (end == 0) {
578 end = ml_get_timebase() + timeout;
579 #if INTERRUPT_MASKED_DEBUG
580 if (interruptible) {
581 start_interrupts = thread->machine.int_time_mt;
582 }
583 #endif /* INTERRUPT_MASKED_DEBUG */
584 } else if (ml_get_timebase() >= end) {
585 #if INTERRUPT_MASKED_DEBUG
586 if (interruptible) {
587 interrupts = thread->machine.int_time_mt - start_interrupts;
588 panic("Wait while equals timeout @ *%p == 0x%llx, "
589 "interrupt_time %llu", address, v, interrupts);
590 }
591 #endif /* INTERRUPT_MASKED_DEBUG */
592 panic("Wait while equals timeout @ *%p == 0x%llx",
593 address, v);
594 }
595 }
596 }
597 }
598
599 __result_use_check
600 static inline hw_lock_status_t
hw_lock_to_internal(hw_lock_t lock,thread_t thread,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))601 hw_lock_to_internal(hw_lock_t lock, thread_t thread, uint64_t timeout,
602 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
603 {
604 uintptr_t state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
605
606 if (__probable(hw_lock_trylock_contended(lock, state))) {
607 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
608 return HW_LOCK_ACQUIRED;
609 }
610
611 return hw_lock_lock_contended(lock, thread, state, timeout, handler LCK_GRP_ARG(grp));
612 }
613
614 /*
615 * Routine: hw_lock_lock
616 *
617 * Acquire lock, spinning until it becomes available,
618 * return with preemption disabled.
619 */
620 void
621 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
622 {
623 thread_t thread = current_thread();
624 lock_disable_preemption_for_thread(thread);
625 (void)hw_lock_to_internal(lock, thread, 0, hw_lock_timeout_panic LCK_GRP_ARG(grp));
626 }
627
628 /*
629 * Routine: hw_lock_lock_nopreempt
630 *
631 * Acquire lock, spinning until it becomes available.
632 */
633 void
634 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
635 {
636 thread_t thread = current_thread();
637 __lck_require_preemption_disabled(lock, thread);
638 (void)hw_lock_to_internal(lock, thread, 0, hw_lock_timeout_panic LCK_GRP_ARG(grp));
639 }
640
641 /*
642 * Routine: hw_lock_to
643 *
644 * Acquire lock, spinning until it becomes available or timeout.
645 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
646 * preemption disabled.
647 */
648 unsigned
649 int
650 (hw_lock_to)(hw_lock_t lock, uint64_t timeout, hw_lock_timeout_handler_t handler
651 LCK_GRP_ARG(lck_grp_t *grp))
652 {
653 thread_t thread = current_thread();
654 lock_disable_preemption_for_thread(thread);
655 return (unsigned)hw_lock_to_internal(lock, thread, timeout, handler LCK_GRP_ARG(grp));
656 }
657
658 /*
659 * Routine: hw_lock_to_nopreempt
660 *
661 * Acquire lock, spinning until it becomes available or timeout.
662 * Timeout is in mach_absolute_time ticks, called and return with
663 * preemption disabled.
664 */
665 unsigned
666 int
667 (hw_lock_to_nopreempt)(hw_lock_t lock, uint64_t timeout,
668 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
669 {
670 thread_t thread = current_thread();
671 __lck_require_preemption_disabled(lock, thread);
672 return (unsigned)hw_lock_to_internal(lock, thread, timeout, handler LCK_GRP_ARG(grp));
673 }
674
675 __result_use_check
676 static inline unsigned int
hw_lock_try_internal(hw_lock_t lock,thread_t thread LCK_GRP_ARG (lck_grp_t * grp))677 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
678 {
679 int success = 0;
680
681 #if LOCK_PRETEST
682 if (__improbable(ordered_load_hw(lock) != 0)) {
683 return 0;
684 }
685 #endif // LOCK_PRETEST
686
687 success = os_atomic_cmpxchg(&lock->lock_data, 0,
688 LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK, acquire);
689
690 if (success) {
691 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
692 }
693 return success;
694 }
695
696 /*
697 * Routine: hw_lock_try
698 *
699 * returns with preemption disabled on success.
700 */
701 unsigned
702 int
703 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
704 {
705 thread_t thread = current_thread();
706 lock_disable_preemption_for_thread(thread);
707 unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
708 if (!success) {
709 lock_enable_preemption();
710 }
711 return success;
712 }
713
714 unsigned
715 int
716 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
717 {
718 thread_t thread = current_thread();
719 __lck_require_preemption_disabled(lock, thread);
720 return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
721 }
722
723 /*
724 * Routine: hw_lock_unlock
725 *
726 * Unconditionally release lock, release preemption level.
727 */
728 static inline void
hw_lock_unlock_internal(hw_lock_t lock)729 hw_lock_unlock_internal(hw_lock_t lock)
730 {
731 os_atomic_store(&lock->lock_data, 0, release);
732 #if __arm__ || __arm64__
733 // ARM tests are only for open-source exclusion
734 set_event();
735 #endif // __arm__ || __arm64__
736 #if CONFIG_DTRACE
737 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
738 #endif /* CONFIG_DTRACE */
739 }
740
741 void
742 (hw_lock_unlock)(hw_lock_t lock)
743 {
744 hw_lock_unlock_internal(lock);
745 lock_enable_preemption();
746 }
747
748 void
749 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
750 {
751 hw_lock_unlock_internal(lock);
752 }
753
754 /*
755 * Routine hw_lock_held, doesn't change preemption state.
756 * N.B. Racy, of course.
757 */
758 unsigned int
hw_lock_held(hw_lock_t lock)759 hw_lock_held(hw_lock_t lock)
760 {
761 return ordered_load_hw(lock) != 0;
762 }
763
764 static hw_lock_status_t NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t * lock,uint32_t bit,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))765 hw_lock_bit_to_contended(
766 hw_lock_bit_t *lock,
767 uint32_t bit,
768 uint64_t timeout,
769 hw_lock_timeout_handler_t handler
770 LCK_GRP_ARG(lck_grp_t *grp))
771 {
772 uint64_t end = 0, start = 0, interrupts = 0;
773 uint64_t default_timeout = os_atomic_load(&lock_panic_timeout, relaxed);
774 bool has_timeout = true;
775 hw_lock_status_t rc;
776 #if INTERRUPT_MASKED_DEBUG
777 thread_t thread = current_thread();
778 bool in_ppl = pmap_in_ppl();
779 /* Note we can't check if we are interruptible if in ppl */
780 bool interruptible = !in_ppl && ml_get_interrupts_enabled();
781 uint64_t start_interrupts = 0;
782 #endif /* INTERRUPT_MASKED_DEBUG */
783
784 #if CONFIG_DTRACE || LOCK_STATS
785 uint64_t begin = 0;
786 boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
787
788 if (__improbable(stat_enabled)) {
789 begin = mach_absolute_time();
790 }
791 #endif /* LOCK_STATS || CONFIG_DTRACE */
792
793 #if INTERRUPT_MASKED_DEBUG
794 timeout = hw_lock_compute_timeout(timeout, default_timeout, in_ppl, interruptible);
795 #else
796 timeout = hw_lock_compute_timeout(timeout, default_timeout);
797 #endif /* INTERRUPT_MASKED_DEBUG */
798 if (timeout == 0) {
799 has_timeout = false;
800 }
801
802 for (;;) {
803 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
804 // Always load-exclusive before wfe
805 // This grabs the monitor and wakes up on a release event
806 rc = hw_lock_trylock_bit(lock, bit, true);
807 if (rc == HW_LOCK_ACQUIRED) {
808 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
809 goto end;
810 }
811 }
812 if (has_timeout) {
813 uint64_t now = ml_get_timebase();
814 if (end == 0) {
815 #if INTERRUPT_MASKED_DEBUG
816 if (interruptible) {
817 start_interrupts = thread->machine.int_time_mt;
818 }
819 #endif /* INTERRUPT_MASKED_DEBUG */
820 start = now;
821 end = now + timeout;
822 } else if (now < end) {
823 /* keep spinning */
824 } else {
825 #if INTERRUPT_MASKED_DEBUG
826 if (interruptible) {
827 interrupts = thread->machine.int_time_mt - start_interrupts;
828 }
829 #endif /* INTERRUPT_MASKED_DEBUG */
830 if (handler(lock, timeout, start, now, interrupts)) {
831 /* push the deadline */
832 end += timeout;
833 } else {
834 assert(rc == HW_LOCK_CONTENDED);
835 break;
836 }
837 }
838 }
839 }
840
841 end:
842 #if CONFIG_DTRACE || LOCK_STATS
843 if (__improbable(stat_enabled)) {
844 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
845 mach_absolute_time() - begin);
846 }
847 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
848 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
849 return rc;
850 }
851
852 __result_use_check
853 static inline unsigned int
hw_lock_bit_to_internal(hw_lock_bit_t * lock,unsigned int bit,uint64_t timeout,hw_lock_timeout_handler_t handler LCK_GRP_ARG (lck_grp_t * grp))854 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint64_t timeout,
855 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
856 {
857 if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
858 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
859 return HW_LOCK_ACQUIRED;
860 }
861
862 return (unsigned)hw_lock_bit_to_contended(lock, bit, timeout, handler
863 LCK_GRP_ARG(grp));
864 }
865
866 /*
867 * Routine: hw_lock_bit_to
868 *
869 * Acquire bit lock, spinning until it becomes available or timeout.
870 * Timeout is in mach_absolute_time ticks (TSC in Intel), return with
871 * preemption disabled.
872 */
873 unsigned
874 int
875 (hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint64_t timeout,
876 hw_lock_timeout_handler_t handler LCK_GRP_ARG(lck_grp_t *grp))
877 {
878 _disable_preemption();
879 return hw_lock_bit_to_internal(lock, bit, timeout, handler LCK_GRP_ARG(grp));
880 }
881
882 /*
883 * Routine: hw_lock_bit
884 *
885 * Acquire bit lock, spinning until it becomes available,
886 * return with preemption disabled.
887 */
888 void
889 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
890 {
891 _disable_preemption();
892 (void)hw_lock_bit_to_internal(lock, bit, 0, hw_lock_bit_timeout_panic LCK_GRP_ARG(grp));
893 }
894
895 /*
896 * Routine: hw_lock_bit_nopreempt
897 *
898 * Acquire bit lock, spinning until it becomes available.
899 */
900 void
901 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
902 {
903 __lck_require_preemption_disabled(lock, current_thread());
904 (void)hw_lock_bit_to_internal(lock, bit, 0, hw_lock_bit_timeout_panic LCK_GRP_ARG(grp));
905 }
906
907
908 unsigned
909 int
910 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
911 {
912 boolean_t success = false;
913
914 _disable_preemption();
915 success = hw_lock_trylock_bit(lock, bit, false);
916 if (!success) {
917 lock_enable_preemption();
918 }
919
920 if (success) {
921 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
922 }
923
924 return success;
925 }
926
927 static inline void
hw_unlock_bit_internal(hw_lock_bit_t * lock,unsigned int bit)928 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
929 {
930 os_atomic_andnot(lock, 1u << bit, release);
931 #if __arm__
932 set_event();
933 #endif
934 #if CONFIG_DTRACE
935 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
936 #endif
937 }
938
939 /*
940 * Routine: hw_unlock_bit
941 *
942 * Release spin-lock. The second parameter is the bit number to test and set.
943 * Decrement the preemption level.
944 */
945 void
hw_unlock_bit(hw_lock_bit_t * lock,unsigned int bit)946 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
947 {
948 hw_unlock_bit_internal(lock, bit);
949 lock_enable_preemption();
950 }
951
952 void
hw_unlock_bit_nopreempt(hw_lock_bit_t * lock,unsigned int bit)953 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
954 {
955 __lck_require_preemption_disabled(lock, current_thread());
956 hw_unlock_bit_internal(lock, bit);
957 }
958
959 /*
960 * Routine: lck_spin_sleep
961 */
962 wait_result_t
lck_spin_sleep_grp(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,lck_grp_t * grp)963 lck_spin_sleep_grp(
964 lck_spin_t *lck,
965 lck_sleep_action_t lck_sleep_action,
966 event_t event,
967 wait_interrupt_t interruptible,
968 lck_grp_t *grp)
969 {
970 wait_result_t res;
971
972 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
973 panic("Invalid lock sleep action %x", lck_sleep_action);
974 }
975
976 res = assert_wait(event, interruptible);
977 if (res == THREAD_WAITING) {
978 lck_spin_unlock(lck);
979 res = thread_block(THREAD_CONTINUE_NULL);
980 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
981 lck_spin_lock_grp(lck, grp);
982 }
983 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
984 lck_spin_unlock(lck);
985 }
986
987 return res;
988 }
989
990 wait_result_t
lck_spin_sleep(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)991 lck_spin_sleep(
992 lck_spin_t *lck,
993 lck_sleep_action_t lck_sleep_action,
994 event_t event,
995 wait_interrupt_t interruptible)
996 {
997 return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
998 }
999
1000 /*
1001 * Routine: lck_spin_sleep_deadline
1002 */
1003 wait_result_t
lck_spin_sleep_deadline(lck_spin_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1004 lck_spin_sleep_deadline(
1005 lck_spin_t *lck,
1006 lck_sleep_action_t lck_sleep_action,
1007 event_t event,
1008 wait_interrupt_t interruptible,
1009 uint64_t deadline)
1010 {
1011 wait_result_t res;
1012
1013 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1014 panic("Invalid lock sleep action %x", lck_sleep_action);
1015 }
1016
1017 res = assert_wait_deadline(event, interruptible, deadline);
1018 if (res == THREAD_WAITING) {
1019 lck_spin_unlock(lck);
1020 res = thread_block(THREAD_CONTINUE_NULL);
1021 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1022 lck_spin_lock(lck);
1023 }
1024 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1025 lck_spin_unlock(lck);
1026 }
1027
1028 return res;
1029 }
1030
1031 /*
1032 * Routine: lck_mtx_sleep
1033 */
1034 wait_result_t
lck_mtx_sleep(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)1035 lck_mtx_sleep(
1036 lck_mtx_t *lck,
1037 lck_sleep_action_t lck_sleep_action,
1038 event_t event,
1039 wait_interrupt_t interruptible)
1040 {
1041 wait_result_t res;
1042 thread_pri_floor_t token;
1043
1044 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1045 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1046
1047 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1048 panic("Invalid lock sleep action %x", lck_sleep_action);
1049 }
1050
1051 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1052 /*
1053 * We get a priority floor
1054 * during the time that this thread is asleep, so that when it
1055 * is re-awakened (and not yet contending on the mutex), it is
1056 * runnable at a reasonably high priority.
1057 */
1058 token = thread_priority_floor_start();
1059 }
1060
1061 res = assert_wait(event, interruptible);
1062 if (res == THREAD_WAITING) {
1063 lck_mtx_unlock(lck);
1064 res = thread_block(THREAD_CONTINUE_NULL);
1065 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1066 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1067 lck_mtx_lock_spin(lck);
1068 } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1069 lck_mtx_lock_spin_always(lck);
1070 } else {
1071 lck_mtx_lock(lck);
1072 }
1073 }
1074 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1075 lck_mtx_unlock(lck);
1076 }
1077
1078 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1079 thread_priority_floor_end(&token);
1080 }
1081
1082 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1083
1084 return res;
1085 }
1086
1087
1088 /*
1089 * Routine: lck_mtx_sleep_deadline
1090 */
1091 wait_result_t
lck_mtx_sleep_deadline(lck_mtx_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)1092 lck_mtx_sleep_deadline(
1093 lck_mtx_t *lck,
1094 lck_sleep_action_t lck_sleep_action,
1095 event_t event,
1096 wait_interrupt_t interruptible,
1097 uint64_t deadline)
1098 {
1099 wait_result_t res;
1100 thread_pri_floor_t token;
1101
1102 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1103 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1104
1105 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1106 panic("Invalid lock sleep action %x", lck_sleep_action);
1107 }
1108
1109 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1110 /*
1111 * See lck_mtx_sleep().
1112 */
1113 token = thread_priority_floor_start();
1114 }
1115
1116 res = assert_wait_deadline(event, interruptible, deadline);
1117 if (res == THREAD_WAITING) {
1118 lck_mtx_unlock(lck);
1119 res = thread_block(THREAD_CONTINUE_NULL);
1120 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1121 if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1122 lck_mtx_lock_spin(lck);
1123 } else {
1124 lck_mtx_lock(lck);
1125 }
1126 }
1127 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1128 lck_mtx_unlock(lck);
1129 }
1130
1131 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1132 thread_priority_floor_end(&token);
1133 }
1134
1135 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1136
1137 return res;
1138 }
1139
1140 /*
1141 * Lock Boosting Invariants:
1142 *
1143 * The lock owner is always promoted to the max priority of all its waiters.
1144 * Max priority is capped at MAXPRI_PROMOTE.
1145 *
1146 * The last waiter is not given a promotion when it wakes up or acquires the lock.
1147 * When the last waiter is waking up, a new contender can always come in and
1148 * steal the lock without having to wait for the last waiter to make forward progress.
1149 */
1150
1151 /*
1152 * Routine: lck_mtx_lock_wait
1153 *
1154 * Invoked in order to wait on contention.
1155 *
1156 * Called with the interlock locked and
1157 * returns it unlocked.
1158 *
1159 * Always aggressively sets the owning thread to promoted,
1160 * even if it's the same or higher priority
1161 * This prevents it from lowering its own priority while holding a lock
1162 *
1163 * TODO: Come up with a more efficient way to handle same-priority promotions
1164 * <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
1165 */
1166 void
lck_mtx_lock_wait(lck_mtx_t * lck,thread_t holder,struct turnstile ** ts)1167 lck_mtx_lock_wait(
1168 lck_mtx_t *lck,
1169 thread_t holder,
1170 struct turnstile **ts)
1171 {
1172 thread_t thread = current_thread();
1173 lck_mtx_t *mutex = lck;
1174 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1175
1176 #if CONFIG_DTRACE
1177 uint64_t sleep_start = 0;
1178
1179 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1180 sleep_start = mach_absolute_time();
1181 }
1182 #endif
1183
1184 #if LOCKS_INDIRECT_ALLOW
1185 if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1186 mutex = &lck->lck_mtx_ptr->lck_mtx;
1187 }
1188 #endif /* LOCKS_INDIRECT_ALLOW */
1189
1190 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1191 trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1192
1193 mutex->lck_mtx_waiters++;
1194
1195 if (*ts == NULL) {
1196 *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1197 }
1198
1199 struct turnstile *turnstile = *ts;
1200 thread_set_pending_block_hint(thread, kThreadWaitKernelMutex);
1201 turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1202
1203 waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
1204
1205 lck_mtx_ilk_unlock(mutex);
1206
1207 turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
1208
1209 thread_block(THREAD_CONTINUE_NULL);
1210
1211 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1212 #if CONFIG_DTRACE
1213 /*
1214 * Record the DTrace lockstat probe for blocking, block time
1215 * measured from when we were entered.
1216 */
1217 if (sleep_start) {
1218 #if LOCKS_INDIRECT_ALLOW
1219 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) {
1220 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1221 mach_absolute_time() - sleep_start);
1222 } else
1223 #endif /* LOCKS_INDIRECT_ALLOW */
1224 {
1225 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1226 mach_absolute_time() - sleep_start);
1227 }
1228 }
1229 #endif
1230 }
1231
1232 /*
1233 * Routine: lck_mtx_lock_acquire
1234 *
1235 * Invoked on acquiring the mutex when there is
1236 * contention.
1237 *
1238 * Returns the current number of waiters.
1239 *
1240 * Called with the interlock locked.
1241 */
1242 int
lck_mtx_lock_acquire(lck_mtx_t * lck,struct turnstile * ts)1243 lck_mtx_lock_acquire(
1244 lck_mtx_t *lck,
1245 struct turnstile *ts)
1246 {
1247 thread_t thread = current_thread();
1248 lck_mtx_t *mutex = lck;
1249
1250 #if LOCKS_INDIRECT_ALLOW
1251 if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1252 mutex = &lck->lck_mtx_ptr->lck_mtx;
1253 }
1254 #endif /* LOCKS_INDIRECT_ALLOW */
1255
1256 if (mutex->lck_mtx_waiters > 0) {
1257 if (ts == NULL) {
1258 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1259 }
1260
1261 turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1262 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1263 }
1264
1265 if (ts != NULL) {
1266 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1267 }
1268
1269 return mutex->lck_mtx_waiters;
1270 }
1271
1272 /*
1273 * Routine: lck_mtx_unlock_wakeup
1274 *
1275 * Invoked on unlock when there is contention.
1276 *
1277 * Called with the interlock locked.
1278 *
1279 * NOTE: callers should call turnstile_clenup after
1280 * dropping the interlock.
1281 */
1282 boolean_t
lck_mtx_unlock_wakeup(lck_mtx_t * lck,thread_t holder)1283 lck_mtx_unlock_wakeup(
1284 lck_mtx_t *lck,
1285 thread_t holder)
1286 {
1287 thread_t thread = current_thread();
1288 lck_mtx_t *mutex = lck;
1289 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1290 struct turnstile *ts;
1291 kern_return_t did_wake;
1292
1293 #if LOCKS_INDIRECT_ALLOW
1294 if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1295 mutex = &lck->lck_mtx_ptr->lck_mtx;
1296 }
1297 #endif /* LOCKS_INDIRECT_ALLOW */
1298
1299
1300 if (thread != holder) {
1301 panic("lck_mtx_unlock_wakeup: mutex %p holder %p", mutex, holder);
1302 }
1303
1304 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
1305 trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1306
1307 assert(mutex->lck_mtx_waiters > 0);
1308
1309 ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1310
1311 if (mutex->lck_mtx_waiters > 1) {
1312 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1313 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1314 } else {
1315 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1316 turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1317 }
1318 assert(did_wake == KERN_SUCCESS);
1319
1320 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1321 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1322
1323 mutex->lck_mtx_waiters--;
1324
1325 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1326
1327 return mutex->lck_mtx_waiters > 0;
1328 }
1329
1330 /*
1331 * Routine: mutex_pause
1332 *
1333 * Called by former callers of simple_lock_pause().
1334 */
1335 #define MAX_COLLISION_COUNTS 32
1336 #define MAX_COLLISION 8
1337
1338 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1339
1340 uint32_t collision_backoffs[MAX_COLLISION] = {
1341 10, 50, 100, 200, 400, 600, 800, 1000
1342 };
1343
1344
1345 void
mutex_pause(uint32_t collisions)1346 mutex_pause(uint32_t collisions)
1347 {
1348 wait_result_t wait_result;
1349 uint32_t back_off;
1350
1351 if (collisions >= MAX_COLLISION_COUNTS) {
1352 collisions = MAX_COLLISION_COUNTS - 1;
1353 }
1354 max_collision_count[collisions]++;
1355
1356 if (collisions >= MAX_COLLISION) {
1357 collisions = MAX_COLLISION - 1;
1358 }
1359 back_off = collision_backoffs[collisions];
1360
1361 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1362 assert(wait_result == THREAD_WAITING);
1363
1364 wait_result = thread_block(THREAD_CONTINUE_NULL);
1365 assert(wait_result == THREAD_TIMED_OUT);
1366 }
1367
1368
1369 unsigned int mutex_yield_wait = 0;
1370 unsigned int mutex_yield_no_wait = 0;
1371
1372 void
lck_mtx_yield(lck_mtx_t * lck)1373 lck_mtx_yield(
1374 lck_mtx_t *lck)
1375 {
1376 int waiters;
1377
1378 #if DEBUG
1379 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1380 #endif /* DEBUG */
1381
1382 #if LOCKS_INDIRECT_ALLOW
1383 if (__improbable(lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)) {
1384 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1385 } else
1386 #endif /* LOCKS_INDIRECT_ALLOW */
1387 {
1388 waiters = lck->lck_mtx_waiters;
1389 }
1390
1391 if (!waiters) {
1392 mutex_yield_no_wait++;
1393 } else {
1394 mutex_yield_wait++;
1395 lck_mtx_unlock(lck);
1396 mutex_pause(0);
1397 lck_mtx_lock(lck);
1398 }
1399 }
1400
1401 /*
1402 * sleep_with_inheritor and wakeup_with_inheritor KPI
1403 *
1404 * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1405 * the latest thread specified as inheritor.
1406 *
1407 * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1408 * direct the push. The inheritor cannot run in user space while holding a push from an event. Therefore is the caller responsibility to call a
1409 * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1410 *
1411 * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1412 *
1413 * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1414 * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1415 * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1416 * invoking any turnstile operation.
1417 *
1418 * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1419 * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1420 * is instantiated for this KPI to manage the hash without interrupt disabled.
1421 * Also:
1422 * - all events on the system that hash on the same bucket will contend on the same spinlock.
1423 * - every event will have a dedicated wait_queue.
1424 *
1425 * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1426 * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1427 */
1428
1429 kern_return_t
wakeup_with_inheritor_and_turnstile_type(event_t event,turnstile_type_t type,wait_result_t result,bool wake_one,lck_wake_action_t action,thread_t * thread_wokenup)1430 wakeup_with_inheritor_and_turnstile_type(event_t event, turnstile_type_t type, wait_result_t result, bool wake_one, lck_wake_action_t action, thread_t *thread_wokenup)
1431 {
1432 uint32_t index;
1433 struct turnstile *ts = NULL;
1434 kern_return_t ret = KERN_NOT_WAITING;
1435 int priority;
1436 thread_t wokeup;
1437
1438 /*
1439 * the hash bucket spinlock is used as turnstile interlock
1440 */
1441 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1442
1443 ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1444
1445 if (wake_one) {
1446 if (action == LCK_WAKE_DEFAULT) {
1447 priority = WAITQ_PROMOTE_ON_WAKE;
1448 } else {
1449 assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1450 priority = WAITQ_ALL_PRIORITIES;
1451 }
1452
1453 /*
1454 * WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor
1455 * if it finds a thread
1456 */
1457 wokeup = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(event), result, priority);
1458 if (wokeup != NULL) {
1459 if (thread_wokenup != NULL) {
1460 *thread_wokenup = wokeup;
1461 } else {
1462 thread_deallocate_safe(wokeup);
1463 }
1464 ret = KERN_SUCCESS;
1465 if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1466 goto complete;
1467 }
1468 } else {
1469 if (thread_wokenup != NULL) {
1470 *thread_wokenup = NULL;
1471 }
1472 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1473 ret = KERN_NOT_WAITING;
1474 }
1475 } else {
1476 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
1477 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1478 }
1479
1480 /*
1481 * turnstile_update_inheritor_complete could be called while holding the interlock.
1482 * In this case the new inheritor or is null, or is a thread that is just been woken up
1483 * and have not blocked because it is racing with the same interlock used here
1484 * after the wait.
1485 * So there is no chain to update for the new inheritor.
1486 *
1487 * However unless the current thread is the old inheritor,
1488 * old inheritor can be blocked and requires a chain update.
1489 *
1490 * The chain should be short because kernel turnstiles cannot have user turnstiles
1491 * chained after them.
1492 *
1493 * We can anyway optimize this by asking turnstile to tell us
1494 * if old inheritor needs an update and drop the lock
1495 * just in that case.
1496 */
1497 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1498
1499 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1500
1501 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1502
1503 complete:
1504 turnstile_complete((uintptr_t)event, NULL, NULL, type);
1505
1506 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1507
1508 turnstile_cleanup();
1509
1510 return ret;
1511 }
1512
1513 static wait_result_t
1514 sleep_with_inheritor_and_turnstile_type(event_t event,
1515 thread_t inheritor,
1516 wait_interrupt_t interruptible,
1517 uint64_t deadline,
1518 turnstile_type_t type,
1519 void (^primitive_lock)(void),
1520 void (^primitive_unlock)(void))
1521 {
1522 wait_result_t ret;
1523 uint32_t index;
1524 struct turnstile *ts = NULL;
1525
1526 /*
1527 * the hash bucket spinlock is used as turnstile interlock,
1528 * lock it before releasing the primitive lock
1529 */
1530 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1531
1532 primitive_unlock();
1533
1534 ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1535
1536 thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1537 /*
1538 * We need TURNSTILE_DELAYED_UPDATE because we will call
1539 * waitq_assert_wait64 after.
1540 */
1541 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1542
1543 ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1544
1545 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1546
1547 /*
1548 * Update new and old inheritor chains outside the interlock;
1549 */
1550 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1551
1552 if (ret == THREAD_WAITING) {
1553 ret = thread_block(THREAD_CONTINUE_NULL);
1554 }
1555
1556 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1557
1558 turnstile_complete((uintptr_t)event, NULL, NULL, type);
1559
1560 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1561
1562 turnstile_cleanup();
1563
1564 primitive_lock();
1565
1566 return ret;
1567 }
1568
1569 kern_return_t
change_sleep_inheritor_and_turnstile_type(event_t event,thread_t inheritor,turnstile_type_t type)1570 change_sleep_inheritor_and_turnstile_type(event_t event,
1571 thread_t inheritor,
1572 turnstile_type_t type)
1573 {
1574 uint32_t index;
1575 struct turnstile *ts = NULL;
1576 kern_return_t ret = KERN_SUCCESS;
1577 /*
1578 * the hash bucket spinlock is used as turnstile interlock
1579 */
1580 turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1581
1582 ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1583
1584 if (!turnstile_has_waiters(ts)) {
1585 ret = KERN_NOT_WAITING;
1586 }
1587
1588 /*
1589 * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1590 */
1591 turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1592
1593 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1594
1595 /*
1596 * update the chains outside the interlock
1597 */
1598 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1599
1600 turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1601
1602 turnstile_complete((uintptr_t)event, NULL, NULL, type);
1603
1604 turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1605
1606 turnstile_cleanup();
1607
1608 return ret;
1609 }
1610
1611 typedef void (^void_block_void)(void);
1612
1613 /*
1614 * sleep_with_inheritor functions with lck_mtx_t as locking primitive.
1615 */
1616
1617 wait_result_t
lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline,turnstile_type_t type)1618 lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1619 {
1620 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1621
1622 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1623 return sleep_with_inheritor_and_turnstile_type(event,
1624 inheritor,
1625 interruptible,
1626 deadline,
1627 type,
1628 ^{;},
1629 ^{lck_mtx_unlock(lock);});
1630 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1631 return sleep_with_inheritor_and_turnstile_type(event,
1632 inheritor,
1633 interruptible,
1634 deadline,
1635 type,
1636 ^{lck_mtx_lock_spin(lock);},
1637 ^{lck_mtx_unlock(lock);});
1638 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1639 return sleep_with_inheritor_and_turnstile_type(event,
1640 inheritor,
1641 interruptible,
1642 deadline,
1643 type,
1644 ^{lck_mtx_lock_spin_always(lock);},
1645 ^{lck_mtx_unlock(lock);});
1646 } else {
1647 return sleep_with_inheritor_and_turnstile_type(event,
1648 inheritor,
1649 interruptible,
1650 deadline,
1651 type,
1652 ^{lck_mtx_lock(lock);},
1653 ^{lck_mtx_unlock(lock);});
1654 }
1655 }
1656
1657 /*
1658 * Name: lck_spin_sleep_with_inheritor
1659 *
1660 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1661 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1662 * be directed to the inheritor specified.
1663 * An interruptible mode and deadline can be specified to return earlier from the wait.
1664 *
1665 * Args:
1666 * Arg1: lck_spin_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1667 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1668 * Arg3: event to wait on.
1669 * Arg4: thread to propagate the event push to.
1670 * Arg5: interruptible flag for wait.
1671 * Arg6: deadline for wait.
1672 *
1673 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1674 * Lock will be dropped while waiting.
1675 * The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1676 * wakeup for the event is called.
1677 *
1678 * Returns: result of the wait.
1679 */
1680 wait_result_t
lck_spin_sleep_with_inheritor(lck_spin_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1681 lck_spin_sleep_with_inheritor(
1682 lck_spin_t *lock,
1683 lck_sleep_action_t lck_sleep_action,
1684 event_t event,
1685 thread_t inheritor,
1686 wait_interrupt_t interruptible,
1687 uint64_t deadline)
1688 {
1689 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1690 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1691 interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1692 ^{}, ^{ lck_spin_unlock(lock); });
1693 } else {
1694 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1695 interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1696 ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1697 }
1698 }
1699
1700 /*
1701 * Name: lck_ticket_sleep_with_inheritor
1702 *
1703 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1704 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1705 * be directed to the inheritor specified.
1706 * An interruptible mode and deadline can be specified to return earlier from the wait.
1707 *
1708 * Args:
1709 * Arg1: lck_ticket_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1710 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1711 * Arg3: event to wait on.
1712 * Arg4: thread to propagate the event push to.
1713 * Arg5: interruptible flag for wait.
1714 * Arg6: deadline for wait.
1715 *
1716 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1717 * Lock will be dropped while waiting.
1718 * The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1719 * wakeup for the event is called.
1720 *
1721 * Returns: result of the wait.
1722 */
1723 wait_result_t
lck_ticket_sleep_with_inheritor(lck_ticket_t * lock,lck_grp_t * grp,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1724 lck_ticket_sleep_with_inheritor(
1725 lck_ticket_t *lock,
1726 lck_grp_t *grp,
1727 lck_sleep_action_t lck_sleep_action,
1728 event_t event,
1729 thread_t inheritor,
1730 wait_interrupt_t interruptible,
1731 uint64_t deadline)
1732 {
1733 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1734 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1735 interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1736 ^{}, ^{ lck_ticket_unlock(lock); });
1737 } else {
1738 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1739 interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1740 ^{ lck_ticket_lock(lock, grp); }, ^{ lck_ticket_unlock(lock); });
1741 }
1742 }
1743
1744 /*
1745 * Name: lck_mtx_sleep_with_inheritor
1746 *
1747 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1748 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1749 * be directed to the inheritor specified.
1750 * An interruptible mode and deadline can be specified to return earlier from the wait.
1751 *
1752 * Args:
1753 * Arg1: lck_mtx_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1754 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
1755 * Arg3: event to wait on.
1756 * Arg4: thread to propagate the event push to.
1757 * Arg5: interruptible flag for wait.
1758 * Arg6: deadline for wait.
1759 *
1760 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1761 * Lock will be dropped while waiting.
1762 * The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1763 * wakeup for the event is called.
1764 *
1765 * Returns: result of the wait.
1766 */
1767 wait_result_t
lck_mtx_sleep_with_inheritor(lck_mtx_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1768 lck_mtx_sleep_with_inheritor(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
1769 {
1770 return lck_mtx_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1771 }
1772
1773 /*
1774 * sleep_with_inheritor functions with lck_rw_t as locking primitive.
1775 */
1776
1777 wait_result_t
lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline,turnstile_type_t type)1778 lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1779 {
1780 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1781
1782 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1783
1784 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1785 return sleep_with_inheritor_and_turnstile_type(event,
1786 inheritor,
1787 interruptible,
1788 deadline,
1789 type,
1790 ^{;},
1791 ^{lck_rw_type = lck_rw_done(lock);});
1792 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1793 return sleep_with_inheritor_and_turnstile_type(event,
1794 inheritor,
1795 interruptible,
1796 deadline,
1797 type,
1798 ^{lck_rw_lock(lock, lck_rw_type);},
1799 ^{lck_rw_type = lck_rw_done(lock);});
1800 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1801 return sleep_with_inheritor_and_turnstile_type(event,
1802 inheritor,
1803 interruptible,
1804 deadline,
1805 type,
1806 ^{lck_rw_lock_exclusive(lock);},
1807 ^{lck_rw_type = lck_rw_done(lock);});
1808 } else {
1809 return sleep_with_inheritor_and_turnstile_type(event,
1810 inheritor,
1811 interruptible,
1812 deadline,
1813 type,
1814 ^{lck_rw_lock_shared(lock);},
1815 ^{lck_rw_type = lck_rw_done(lock);});
1816 }
1817 }
1818
1819 /*
1820 * Name: lck_rw_sleep_with_inheritor
1821 *
1822 * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1823 * While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1824 * be directed to the inheritor specified.
1825 * An interruptible mode and deadline can be specified to return earlier from the wait.
1826 *
1827 * Args:
1828 * Arg1: lck_rw_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1829 * Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
1830 * Arg3: event to wait on.
1831 * Arg4: thread to propagate the event push to.
1832 * Arg5: interruptible flag for wait.
1833 * Arg6: deadline for wait.
1834 *
1835 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1836 * Lock will be dropped while waiting.
1837 * The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1838 * wakeup for the event is called.
1839 *
1840 * Returns: result of the wait.
1841 */
1842 wait_result_t
lck_rw_sleep_with_inheritor(lck_rw_t * lock,lck_sleep_action_t lck_sleep_action,event_t event,thread_t inheritor,wait_interrupt_t interruptible,uint64_t deadline)1843 lck_rw_sleep_with_inheritor(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
1844 {
1845 return lck_rw_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
1846 }
1847
1848 /*
1849 * wakeup_with_inheritor functions are independent from the locking primitive.
1850 */
1851
1852 /*
1853 * Name: wakeup_one_with_inheritor
1854 *
1855 * Description: wake up one waiter for event if any. The thread woken up will be the one with the higher sched priority waiting on event.
1856 * The push for the event will be transferred from the last inheritor to the woken up thread if LCK_WAKE_DEFAULT is specified.
1857 * If LCK_WAKE_DO_NOT_TRANSFER_PUSH is specified the push will not be transferred.
1858 *
1859 * Args:
1860 * Arg1: event to wake from.
1861 * Arg2: wait result to pass to the woken up thread.
1862 * Arg3: wake flag. LCK_WAKE_DEFAULT or LCK_WAKE_DO_NOT_TRANSFER_PUSH.
1863 * Arg4: pointer for storing the thread wokenup.
1864 *
1865 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1866 *
1867 * Conditions: The new inheritor wokenup cannot run in user space until another inheritor is specified for the event or a
1868 * wakeup for the event is called.
1869 * A reference for the wokenup thread is acquired.
1870 * NOTE: this cannot be called from interrupt context.
1871 */
1872 kern_return_t
wakeup_one_with_inheritor(event_t event,wait_result_t result,lck_wake_action_t action,thread_t * thread_wokenup)1873 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1874 {
1875 return wakeup_with_inheritor_and_turnstile_type(event,
1876 TURNSTILE_SLEEP_INHERITOR,
1877 result,
1878 TRUE,
1879 action,
1880 thread_wokenup);
1881 }
1882
1883 /*
1884 * Name: wakeup_all_with_inheritor
1885 *
1886 * Description: wake up all waiters waiting for event. The old inheritor will lose the push.
1887 *
1888 * Args:
1889 * Arg1: event to wake from.
1890 * Arg2: wait result to pass to the woken up threads.
1891 *
1892 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1893 *
1894 * Conditions: NOTE: this cannot be called from interrupt context.
1895 */
1896 kern_return_t
wakeup_all_with_inheritor(event_t event,wait_result_t result)1897 wakeup_all_with_inheritor(event_t event, wait_result_t result)
1898 {
1899 return wakeup_with_inheritor_and_turnstile_type(event,
1900 TURNSTILE_SLEEP_INHERITOR,
1901 result,
1902 FALSE,
1903 0,
1904 NULL);
1905 }
1906
1907 /*
1908 * change_sleep_inheritor is independent from the locking primitive.
1909 */
1910
1911 /*
1912 * Name: change_sleep_inheritor
1913 *
1914 * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1915 *
1916 * Args:
1917 * Arg1: event to redirect the push.
1918 * Arg2: new inheritor for event.
1919 *
1920 * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1921 *
1922 * Conditions: In case of success, the new inheritor cannot run in user space until another inheritor is specified for the event or a
1923 * wakeup for the event is called.
1924 * NOTE: this cannot be called from interrupt context.
1925 */
1926 kern_return_t
change_sleep_inheritor(event_t event,thread_t inheritor)1927 change_sleep_inheritor(event_t event, thread_t inheritor)
1928 {
1929 return change_sleep_inheritor_and_turnstile_type(event,
1930 inheritor,
1931 TURNSTILE_SLEEP_INHERITOR);
1932 }
1933
1934 void
kdp_sleep_with_inheritor_find_owner(struct waitq * waitq,__unused event64_t event,thread_waitinfo_t * waitinfo)1935 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1936 {
1937 assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1938 assert(waitq_type(waitq) == WQT_TURNSTILE);
1939 waitinfo->owner = 0;
1940 waitinfo->context = 0;
1941
1942 if (waitq_held(waitq)) {
1943 return;
1944 }
1945
1946 struct turnstile *turnstile = waitq_to_turnstile(waitq);
1947 assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1948 waitinfo->owner = thread_tid(turnstile->ts_inheritor);
1949 }
1950
1951 #define GATE_TYPE 3
1952 #define GATE_ILOCK_BIT 0
1953 #define GATE_WAITERS_BIT 1
1954
1955 #define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1956 #define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1957
1958 #define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1959 #define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1960 #define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1961 #define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1962 #define ordered_store_gate(gate, value) os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1963
1964 #define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK | GATE_WAITERS))
1965 #define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1966 #define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1967 #define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1968
1969 #define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1970
1971 #define GATE_EVENT(gate) ((event_t) gate)
1972 #define EVENT_TO_GATE(event) ((gate_t *) event)
1973
1974 typedef void (*void_func_void)(void);
1975
1976 __abortlike
1977 static void
gate_verify_tag_panic(gate_t * gate)1978 gate_verify_tag_panic(gate_t *gate)
1979 {
1980 panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1981 }
1982
1983 __abortlike
1984 static void
gate_verify_destroy_panic(gate_t * gate)1985 gate_verify_destroy_panic(gate_t *gate)
1986 {
1987 panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
1988 }
1989
1990 static void
gate_verify(gate_t * gate)1991 gate_verify(gate_t *gate)
1992 {
1993 if (gate->gt_type != GATE_TYPE) {
1994 gate_verify_tag_panic(gate);
1995 }
1996 if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
1997 gate_verify_destroy_panic(gate);
1998 }
1999
2000 assert(gate->gt_refs > 0);
2001 }
2002
2003 __abortlike
2004 static void
gate_already_owned_panic(gate_t * gate,thread_t holder)2005 gate_already_owned_panic(gate_t *gate, thread_t holder)
2006 {
2007 panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2008 }
2009
2010 static kern_return_t
gate_try_close(gate_t * gate)2011 gate_try_close(gate_t *gate)
2012 {
2013 uintptr_t state;
2014 thread_t holder;
2015 kern_return_t ret;
2016 thread_t thread = current_thread();
2017
2018 gate_verify(gate);
2019
2020 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2021 return KERN_SUCCESS;
2022 }
2023
2024 gate_ilock(gate);
2025 state = ordered_load_gate(gate);
2026 holder = GATE_STATE_TO_THREAD(state);
2027
2028 if (holder == NULL) {
2029 assert(gate_has_waiter_bit(state) == FALSE);
2030
2031 state = GATE_THREAD_TO_STATE(current_thread());
2032 state |= GATE_ILOCK;
2033 ordered_store_gate(gate, state);
2034 ret = KERN_SUCCESS;
2035 } else {
2036 if (holder == current_thread()) {
2037 gate_already_owned_panic(gate, holder);
2038 }
2039 ret = KERN_FAILURE;
2040 }
2041
2042 gate_iunlock(gate);
2043 return ret;
2044 }
2045
2046 static void
gate_close(gate_t * gate)2047 gate_close(gate_t* gate)
2048 {
2049 uintptr_t state;
2050 thread_t holder;
2051 thread_t thread = current_thread();
2052
2053 gate_verify(gate);
2054
2055 if (os_atomic_cmpxchg(&gate->gt_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2056 return;
2057 }
2058
2059 gate_ilock(gate);
2060 state = ordered_load_gate(gate);
2061 holder = GATE_STATE_TO_THREAD(state);
2062
2063 if (holder != NULL) {
2064 gate_already_owned_panic(gate, holder);
2065 }
2066
2067 assert(gate_has_waiter_bit(state) == FALSE);
2068
2069 state = GATE_THREAD_TO_STATE(thread);
2070 state |= GATE_ILOCK;
2071 ordered_store_gate(gate, state);
2072
2073 gate_iunlock(gate);
2074 }
2075
2076 static void
gate_open_turnstile(gate_t * gate)2077 gate_open_turnstile(gate_t *gate)
2078 {
2079 struct turnstile *ts = NULL;
2080
2081 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2082 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2083 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2084 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2085 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2086 /*
2087 * We can do the cleanup while holding the interlock.
2088 * It is ok because:
2089 * 1. current_thread is the previous inheritor and it is running
2090 * 2. new inheritor is NULL.
2091 * => No chain of turnstiles needs to be updated.
2092 */
2093 turnstile_cleanup();
2094 }
2095
2096 __abortlike
2097 static void
gate_not_owned_panic(gate_t * gate,thread_t holder,bool open)2098 gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2099 {
2100 if (open) {
2101 panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2102 } else {
2103 panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2104 }
2105 }
2106
2107 static void
gate_open(gate_t * gate)2108 gate_open(gate_t *gate)
2109 {
2110 uintptr_t state;
2111 thread_t holder;
2112 bool waiters;
2113 thread_t thread = current_thread();
2114
2115 gate_verify(gate);
2116 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2117 return;
2118 }
2119
2120 gate_ilock(gate);
2121 state = ordered_load_gate(gate);
2122 holder = GATE_STATE_TO_THREAD(state);
2123 waiters = gate_has_waiter_bit(state);
2124
2125 if (holder != thread) {
2126 gate_not_owned_panic(gate, holder, true);
2127 }
2128
2129 if (waiters) {
2130 gate_open_turnstile(gate);
2131 }
2132
2133 state = GATE_ILOCK;
2134 ordered_store_gate(gate, state);
2135
2136 gate_iunlock(gate);
2137 }
2138
2139 static kern_return_t
gate_handoff_turnstile(gate_t * gate,int flags,thread_t * thread_woken_up,bool * waiters)2140 gate_handoff_turnstile(gate_t *gate,
2141 int flags,
2142 thread_t *thread_woken_up,
2143 bool *waiters)
2144 {
2145 struct turnstile *ts = NULL;
2146 kern_return_t ret = KERN_FAILURE;
2147 thread_t hp_thread;
2148
2149 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2150 /*
2151 * Wake up the higest priority thread waiting on the gate
2152 */
2153 hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
2154
2155 if (hp_thread != NULL) {
2156 /*
2157 * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2158 */
2159 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2160 *thread_woken_up = hp_thread;
2161 *waiters = turnstile_has_waiters(ts);
2162 /*
2163 * Note: hp_thread is the new holder and the new inheritor.
2164 * In case there are no more waiters, it doesn't need to be the inheritor
2165 * and it shouldn't be it by the time it finishes the wait, so that its next open or
2166 * handoff can go through the fast path.
2167 * We could set the inheritor to NULL here, or the new holder itself can set it
2168 * on its way back from the sleep. In the latter case there are more chanses that
2169 * new waiters will come by, avoiding to do the opearation at all.
2170 */
2171 ret = KERN_SUCCESS;
2172 } else {
2173 /*
2174 * waiters can have been woken up by an interrupt and still not
2175 * have updated gate->waiters, so we couldn't find them on the waitq.
2176 * Update the inheritor to NULL here, so that the current thread can return to userspace
2177 * indipendently from when the interrupted waiters will finish the wait.
2178 */
2179 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2180 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2181 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2182 }
2183 // there are no waiters.
2184 ret = KERN_NOT_WAITING;
2185 }
2186
2187 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2188
2189 /*
2190 * We can do the cleanup while holding the interlock.
2191 * It is ok because:
2192 * 1. current_thread is the previous inheritor and it is running
2193 * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2194 * of the gate before trying to sleep.
2195 * => No chain of turnstiles needs to be updated.
2196 */
2197 turnstile_cleanup();
2198
2199 return ret;
2200 }
2201
2202 static kern_return_t
gate_handoff(gate_t * gate,int flags)2203 gate_handoff(gate_t *gate,
2204 int flags)
2205 {
2206 kern_return_t ret;
2207 thread_t new_holder = NULL;
2208 uintptr_t state;
2209 thread_t holder;
2210 bool waiters;
2211 thread_t thread = current_thread();
2212
2213 assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2214 gate_verify(gate);
2215
2216 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2217 if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2218 //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2219 return KERN_NOT_WAITING;
2220 }
2221 }
2222
2223 gate_ilock(gate);
2224 state = ordered_load_gate(gate);
2225 holder = GATE_STATE_TO_THREAD(state);
2226 waiters = gate_has_waiter_bit(state);
2227
2228 if (holder != current_thread()) {
2229 gate_not_owned_panic(gate, holder, false);
2230 }
2231
2232 if (waiters) {
2233 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2234 if (ret == KERN_SUCCESS) {
2235 state = GATE_THREAD_TO_STATE(new_holder);
2236 if (waiters) {
2237 state |= GATE_WAITERS;
2238 }
2239 } else {
2240 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2241 state = 0;
2242 }
2243 }
2244 } else {
2245 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2246 state = 0;
2247 }
2248 ret = KERN_NOT_WAITING;
2249 }
2250 state |= GATE_ILOCK;
2251 ordered_store_gate(gate, state);
2252
2253 gate_iunlock(gate);
2254
2255 if (new_holder) {
2256 thread_deallocate(new_holder);
2257 }
2258 return ret;
2259 }
2260
2261 static void_func_void
gate_steal_turnstile(gate_t * gate,thread_t new_inheritor)2262 gate_steal_turnstile(gate_t *gate,
2263 thread_t new_inheritor)
2264 {
2265 struct turnstile *ts = NULL;
2266
2267 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2268
2269 turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2270 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2271 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2272
2273 /*
2274 * turnstile_cleanup might need to update the chain of the old holder.
2275 * This operation should happen without the turnstile interlock held.
2276 */
2277 return turnstile_cleanup;
2278 }
2279
2280 __abortlike
2281 static void
gate_not_closed_panic(gate_t * gate,bool wait)2282 gate_not_closed_panic(gate_t *gate, bool wait)
2283 {
2284 if (wait) {
2285 panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2286 } else {
2287 panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2288 }
2289 }
2290
2291 static void
gate_steal(gate_t * gate)2292 gate_steal(gate_t *gate)
2293 {
2294 uintptr_t state;
2295 thread_t holder;
2296 thread_t thread = current_thread();
2297 bool waiters;
2298
2299 void_func_void func_after_interlock_unlock;
2300
2301 gate_verify(gate);
2302
2303 gate_ilock(gate);
2304 state = ordered_load_gate(gate);
2305 holder = GATE_STATE_TO_THREAD(state);
2306 waiters = gate_has_waiter_bit(state);
2307
2308 if (holder == NULL) {
2309 gate_not_closed_panic(gate, false);
2310 }
2311
2312 state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2313 if (waiters) {
2314 state |= GATE_WAITERS;
2315 ordered_store_gate(gate, state);
2316 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2317 gate_iunlock(gate);
2318
2319 func_after_interlock_unlock();
2320 } else {
2321 ordered_store_gate(gate, state);
2322 gate_iunlock(gate);
2323 }
2324 }
2325
2326 static void_func_void
gate_wait_turnstile(gate_t * gate,wait_interrupt_t interruptible,uint64_t deadline,thread_t holder,wait_result_t * wait,bool * waiters)2327 gate_wait_turnstile(gate_t *gate,
2328 wait_interrupt_t interruptible,
2329 uint64_t deadline,
2330 thread_t holder,
2331 wait_result_t* wait,
2332 bool* waiters)
2333 {
2334 struct turnstile *ts;
2335 uintptr_t state;
2336
2337 ts = turnstile_prepare((uintptr_t)gate, &gate->gt_turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2338
2339 turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2340 waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2341
2342 gate_iunlock(gate);
2343
2344 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2345
2346 *wait = thread_block(THREAD_CONTINUE_NULL);
2347
2348 gate_ilock(gate);
2349
2350 *waiters = turnstile_has_waiters(ts);
2351
2352 if (!*waiters) {
2353 /*
2354 * We want to enable the fast path as soon as we see that there are no more waiters.
2355 * On the fast path the holder will not do any turnstile operations.
2356 * Set the inheritor as NULL here.
2357 *
2358 * NOTE: if it was an open operation that woke this thread up, the inheritor has
2359 * already been set to NULL.
2360 */
2361 state = ordered_load_gate(gate);
2362 holder = GATE_STATE_TO_THREAD(state);
2363 if (holder &&
2364 ((*wait != THREAD_AWAKENED) || // thread interrupted or timedout
2365 holder == current_thread())) { // thread was woken up and it is the new holder
2366 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2367 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2368 }
2369 }
2370
2371 turnstile_complete((uintptr_t)gate, &gate->gt_turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2372
2373 /*
2374 * turnstile_cleanup might need to update the chain of the old holder.
2375 * This operation should happen without the turnstile primitive interlock held.
2376 */
2377 return turnstile_cleanup;
2378 }
2379
2380 static void
gate_free_internal(gate_t * gate)2381 gate_free_internal(gate_t *gate)
2382 {
2383 zfree(KT_GATE, gate);
2384 }
2385
2386 __abortlike
2387 static void
gate_too_many_refs_panic(gate_t * gate)2388 gate_too_many_refs_panic(gate_t *gate)
2389 {
2390 panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2391 }
2392
2393 static gate_wait_result_t
2394 gate_wait(gate_t* gate,
2395 wait_interrupt_t interruptible,
2396 uint64_t deadline,
2397 void (^primitive_unlock)(void),
2398 void (^primitive_lock)(void))
2399 {
2400 gate_wait_result_t ret;
2401 void_func_void func_after_interlock_unlock;
2402 wait_result_t wait_result;
2403 uintptr_t state;
2404 thread_t holder;
2405 bool waiters;
2406
2407 gate_verify(gate);
2408
2409 gate_ilock(gate);
2410 state = ordered_load_gate(gate);
2411 holder = GATE_STATE_TO_THREAD(state);
2412
2413 if (holder == NULL) {
2414 gate_not_closed_panic(gate, true);
2415 }
2416
2417 /*
2418 * Get a ref on the gate so it will not
2419 * be freed while we are coming back from the sleep.
2420 */
2421 if (gate->gt_refs == UINT16_MAX) {
2422 gate_too_many_refs_panic(gate);
2423 }
2424 gate->gt_refs++;
2425 state |= GATE_WAITERS;
2426 ordered_store_gate(gate, state);
2427
2428 /*
2429 * Release the primitive lock before any
2430 * turnstile operation. Turnstile
2431 * does not support a blocking primitive as
2432 * interlock.
2433 *
2434 * In this way, concurrent threads will be
2435 * able to acquire the primitive lock
2436 * but still will wait for me through the
2437 * gate interlock.
2438 */
2439 primitive_unlock();
2440
2441 func_after_interlock_unlock = gate_wait_turnstile( gate,
2442 interruptible,
2443 deadline,
2444 holder,
2445 &wait_result,
2446 &waiters);
2447
2448 state = ordered_load_gate(gate);
2449 holder = GATE_STATE_TO_THREAD(state);
2450
2451 switch (wait_result) {
2452 case THREAD_INTERRUPTED:
2453 case THREAD_TIMED_OUT:
2454 assert(holder != current_thread());
2455
2456 if (waiters) {
2457 state |= GATE_WAITERS;
2458 } else {
2459 state &= ~GATE_WAITERS;
2460 }
2461 ordered_store_gate(gate, state);
2462
2463 if (wait_result == THREAD_INTERRUPTED) {
2464 ret = GATE_INTERRUPTED;
2465 } else {
2466 ret = GATE_TIMED_OUT;
2467 }
2468 break;
2469 default:
2470 /*
2471 * Note it is possible that even if the gate was handed off to
2472 * me, someone called gate_steal() before I woke up.
2473 *
2474 * As well as it is possible that the gate was opened, but someone
2475 * closed it while I was waking up.
2476 *
2477 * In both cases we return GATE_OPENED, as the gate was opened to me
2478 * at one point, it is the caller responsibility to check again if
2479 * the gate is open.
2480 */
2481 if (holder == current_thread()) {
2482 ret = GATE_HANDOFF;
2483 } else {
2484 ret = GATE_OPENED;
2485 }
2486 break;
2487 }
2488
2489 assert(gate->gt_refs > 0);
2490 uint32_t ref = --gate->gt_refs;
2491 bool to_free = gate->gt_alloc;
2492 gate_iunlock(gate);
2493
2494 if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2495 if (to_free == true) {
2496 assert(!waiters);
2497 if (ref == 0) {
2498 gate_free_internal(gate);
2499 }
2500 ret = GATE_OPENED;
2501 } else {
2502 gate_verify_destroy_panic(gate);
2503 }
2504 }
2505
2506 /*
2507 * turnstile func that needs to be executed without
2508 * holding the primitive interlock
2509 */
2510 func_after_interlock_unlock();
2511
2512 primitive_lock();
2513
2514 return ret;
2515 }
2516
2517 static void
gate_assert(gate_t * gate,int flags)2518 gate_assert(gate_t *gate, int flags)
2519 {
2520 uintptr_t state;
2521 thread_t holder;
2522
2523 gate_verify(gate);
2524
2525 gate_ilock(gate);
2526 state = ordered_load_gate(gate);
2527 holder = GATE_STATE_TO_THREAD(state);
2528
2529 switch (flags) {
2530 case GATE_ASSERT_CLOSED:
2531 assert(holder != NULL);
2532 break;
2533 case GATE_ASSERT_OPEN:
2534 assert(holder == NULL);
2535 break;
2536 case GATE_ASSERT_HELD:
2537 assert(holder == current_thread());
2538 break;
2539 default:
2540 panic("invalid %s flag %d", __func__, flags);
2541 }
2542
2543 gate_iunlock(gate);
2544 }
2545
2546 enum {
2547 GT_INIT_DEFAULT = 0,
2548 GT_INIT_ALLOC
2549 };
2550
2551 static void
gate_init(gate_t * gate,uint type)2552 gate_init(gate_t *gate, uint type)
2553 {
2554 bzero(gate, sizeof(gate_t));
2555
2556 gate->gt_data = 0;
2557 gate->gt_turnstile = NULL;
2558 gate->gt_refs = 1;
2559 switch (type) {
2560 case GT_INIT_ALLOC:
2561 gate->gt_alloc = 1;
2562 break;
2563 default:
2564 gate->gt_alloc = 0;
2565 break;
2566 }
2567 gate->gt_type = GATE_TYPE;
2568 gate->gt_flags_pad = 0;
2569 }
2570
2571 static gate_t*
gate_alloc_init(void)2572 gate_alloc_init(void)
2573 {
2574 gate_t *gate;
2575 gate = zalloc_flags(KT_GATE, Z_WAITOK | Z_NOFAIL);
2576 gate_init(gate, GT_INIT_ALLOC);
2577 return gate;
2578 }
2579
2580 __abortlike
2581 static void
gate_destroy_owned_panic(gate_t * gate,thread_t holder)2582 gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2583 {
2584 panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2585 }
2586
2587 __abortlike
2588 static void
gate_destroy_waiter_panic(gate_t * gate)2589 gate_destroy_waiter_panic(gate_t *gate)
2590 {
2591 panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2592 }
2593
2594 static uint16_t
gate_destroy_internal(gate_t * gate)2595 gate_destroy_internal(gate_t *gate)
2596 {
2597 uintptr_t state;
2598 thread_t holder;
2599 uint16_t ref;
2600
2601 gate_ilock(gate);
2602 state = ordered_load_gate(gate);
2603 holder = GATE_STATE_TO_THREAD(state);
2604
2605 /*
2606 * The gate must be open
2607 * and all the threads must
2608 * have been woken up by this time
2609 */
2610 if (holder != NULL) {
2611 gate_destroy_owned_panic(gate, holder);
2612 }
2613 if (gate_has_waiter_bit(state)) {
2614 gate_destroy_waiter_panic(gate);
2615 }
2616
2617 assert(gate->gt_refs > 0);
2618
2619 ref = --gate->gt_refs;
2620
2621 /*
2622 * Mark the gate as destroyed.
2623 * The interlock bit still need
2624 * to be available to let the
2625 * last wokenup threads to clear
2626 * the wait.
2627 */
2628 state = GATE_DESTROYED;
2629 state |= GATE_ILOCK;
2630 ordered_store_gate(gate, state);
2631 gate_iunlock(gate);
2632 return ref;
2633 }
2634
2635 __abortlike
2636 static void
gate_destroy_panic(gate_t * gate)2637 gate_destroy_panic(gate_t *gate)
2638 {
2639 panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2640 }
2641
2642 static void
gate_destroy(gate_t * gate)2643 gate_destroy(gate_t *gate)
2644 {
2645 gate_verify(gate);
2646 if (gate->gt_alloc == 1) {
2647 gate_destroy_panic(gate);
2648 }
2649 gate_destroy_internal(gate);
2650 }
2651
2652 __abortlike
2653 static void
gate_free_panic(gate_t * gate)2654 gate_free_panic(gate_t *gate)
2655 {
2656 panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2657 }
2658
2659 static void
gate_free(gate_t * gate)2660 gate_free(gate_t *gate)
2661 {
2662 uint16_t ref;
2663
2664 gate_verify(gate);
2665
2666 if (gate->gt_alloc == 0) {
2667 gate_free_panic(gate);
2668 }
2669
2670 ref = gate_destroy_internal(gate);
2671 /*
2672 * Some of the threads waiting on the gate
2673 * might still need to run after being woken up.
2674 * They will access the gate to cleanup the
2675 * state, so we cannot free it.
2676 * The last waiter will free the gate in this case.
2677 */
2678 if (ref == 0) {
2679 gate_free_internal(gate);
2680 }
2681 }
2682
2683 /*
2684 * Name: lck_rw_gate_init
2685 *
2686 * Description: initializes a variable declared with decl_lck_rw_gate_data.
2687 *
2688 * Args:
2689 * Arg1: lck_rw_t lock used to protect the gate.
2690 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2691 */
2692 void
lck_rw_gate_init(lck_rw_t * lock,gate_t * gate)2693 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2694 {
2695 (void) lock;
2696 gate_init(gate, GT_INIT_DEFAULT);
2697 }
2698
2699 /*
2700 * Name: lck_rw_gate_alloc_init
2701 *
2702 * Description: allocates and initializes a gate_t.
2703 *
2704 * Args:
2705 * Arg1: lck_rw_t lock used to protect the gate.
2706 *
2707 * Returns:
2708 * gate_t allocated.
2709 */
2710 gate_t*
lck_rw_gate_alloc_init(lck_rw_t * lock)2711 lck_rw_gate_alloc_init(lck_rw_t *lock)
2712 {
2713 (void) lock;
2714 return gate_alloc_init();
2715 }
2716
2717 /*
2718 * Name: lck_rw_gate_destroy
2719 *
2720 * Description: destroys a variable previously initialized
2721 * with lck_rw_gate_init().
2722 *
2723 * Args:
2724 * Arg1: lck_rw_t lock used to protect the gate.
2725 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2726 */
2727 void
lck_rw_gate_destroy(lck_rw_t * lock,gate_t * gate)2728 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2729 {
2730 (void) lock;
2731 gate_destroy(gate);
2732 }
2733
2734 /*
2735 * Name: lck_rw_gate_free
2736 *
2737 * Description: destroys and tries to free a gate previously allocated
2738 * with lck_rw_gate_alloc_init().
2739 * The gate free might be delegated to the last thread returning
2740 * from the gate_wait().
2741 *
2742 * Args:
2743 * Arg1: lck_rw_t lock used to protect the gate.
2744 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2745 */
2746 void
lck_rw_gate_free(lck_rw_t * lock,gate_t * gate)2747 lck_rw_gate_free(lck_rw_t *lock, gate_t *gate)
2748 {
2749 (void) lock;
2750 gate_free(gate);
2751 }
2752
2753 /*
2754 * Name: lck_rw_gate_try_close
2755 *
2756 * Description: Tries to close the gate.
2757 * In case of success the current thread will be set as
2758 * the holder of the gate.
2759 *
2760 * Args:
2761 * Arg1: lck_rw_t lock used to protect the gate.
2762 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2763 *
2764 * Conditions: Lock must be held. Returns with the lock held.
2765 *
2766 * Returns:
2767 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2768 * of the gate.
2769 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2770 * to wake up possible waiters on the gate before returning to userspace.
2771 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2772 * between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2773 *
2774 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2775 * lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2776 * The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2777 * be done without dropping the lock that is protecting the gate in between.
2778 */
2779 int
lck_rw_gate_try_close(__assert_only lck_rw_t * lock,gate_t * gate)2780 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2781 {
2782 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2783
2784 return gate_try_close(gate);
2785 }
2786
2787 /*
2788 * Name: lck_rw_gate_close
2789 *
2790 * Description: Closes the gate. The current thread will be set as
2791 * the holder of the gate. Will panic if the gate is already closed.
2792 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2793 * to wake up possible waiters on the gate before returning to userspace.
2794 *
2795 * Args:
2796 * Arg1: lck_rw_t lock used to protect the gate.
2797 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2798 *
2799 * Conditions: Lock must be held. Returns with the lock held.
2800 * The gate must be open.
2801 *
2802 */
2803 void
lck_rw_gate_close(__assert_only lck_rw_t * lock,gate_t * gate)2804 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2805 {
2806 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2807
2808 return gate_close(gate);
2809 }
2810
2811 /*
2812 * Name: lck_rw_gate_open
2813 *
2814 * Description: Opens the gate and wakes up possible waiters.
2815 *
2816 * Args:
2817 * Arg1: lck_rw_t lock used to protect the gate.
2818 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2819 *
2820 * Conditions: Lock must be held. Returns with the lock held.
2821 * The current thread must be the holder of the gate.
2822 *
2823 */
2824 void
lck_rw_gate_open(__assert_only lck_rw_t * lock,gate_t * gate)2825 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2826 {
2827 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2828
2829 gate_open(gate);
2830 }
2831
2832 /*
2833 * Name: lck_rw_gate_handoff
2834 *
2835 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2836 * priority will be selected as the new holder of the gate, and woken up,
2837 * with the gate remaining in the closed state throughout.
2838 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2839 * will be returned.
2840 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2841 * case no waiters were found.
2842 *
2843 *
2844 * Args:
2845 * Arg1: lck_rw_t lock used to protect the gate.
2846 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2847 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2848 *
2849 * Conditions: Lock must be held. Returns with the lock held.
2850 * The current thread must be the holder of the gate.
2851 *
2852 * Returns:
2853 * KERN_SUCCESS in case one of the waiters became the new holder.
2854 * KERN_NOT_WAITING in case there were no waiters.
2855 *
2856 */
2857 kern_return_t
lck_rw_gate_handoff(__assert_only lck_rw_t * lock,gate_t * gate,gate_handoff_flags_t flags)2858 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, gate_handoff_flags_t flags)
2859 {
2860 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2861
2862 return gate_handoff(gate, flags);
2863 }
2864
2865 /*
2866 * Name: lck_rw_gate_steal
2867 *
2868 * Description: Set the current ownership of the gate. It sets the current thread as the
2869 * new holder of the gate.
2870 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2871 * to wake up possible waiters on the gate before returning to userspace.
2872 * NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2873 * anymore.
2874 *
2875 *
2876 * Args:
2877 * Arg1: lck_rw_t lock used to protect the gate.
2878 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2879 *
2880 * Conditions: Lock must be held. Returns with the lock held.
2881 * The gate must be closed and the current thread must not already be the holder.
2882 *
2883 */
2884 void
lck_rw_gate_steal(__assert_only lck_rw_t * lock,gate_t * gate)2885 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2886 {
2887 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2888
2889 gate_steal(gate);
2890 }
2891
2892 /*
2893 * Name: lck_rw_gate_wait
2894 *
2895 * Description: Waits for the current thread to become the holder of the gate or for the
2896 * gate to become open. An interruptible mode and deadline can be specified
2897 * to return earlier from the wait.
2898 *
2899 * Args:
2900 * Arg1: lck_rw_t lock used to protect the gate.
2901 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2902 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2903 * Arg3: interruptible flag for wait.
2904 * Arg4: deadline
2905 *
2906 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2907 * Lock will be dropped while waiting.
2908 * The gate must be closed.
2909 *
2910 * Returns: Reason why the thread was woken up.
2911 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2912 * A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2913 * to wake up possible waiters on the gate before returning to userspace.
2914 * GATE_OPENED - the gate was opened by the holder.
2915 * GATE_TIMED_OUT - the thread was woken up by a timeout.
2916 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
2917 */
2918 gate_wait_result_t
lck_rw_gate_wait(lck_rw_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)2919 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2920 {
2921 __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2922
2923 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2924
2925 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2926 return gate_wait(gate,
2927 interruptible,
2928 deadline,
2929 ^{lck_rw_type = lck_rw_done(lock);},
2930 ^{;});
2931 } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2932 return gate_wait(gate,
2933 interruptible,
2934 deadline,
2935 ^{lck_rw_type = lck_rw_done(lock);},
2936 ^{lck_rw_lock(lock, lck_rw_type);});
2937 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2938 return gate_wait(gate,
2939 interruptible,
2940 deadline,
2941 ^{lck_rw_type = lck_rw_done(lock);},
2942 ^{lck_rw_lock_exclusive(lock);});
2943 } else {
2944 return gate_wait(gate,
2945 interruptible,
2946 deadline,
2947 ^{lck_rw_type = lck_rw_done(lock);},
2948 ^{lck_rw_lock_shared(lock);});
2949 }
2950 }
2951
2952 /*
2953 * Name: lck_rw_gate_assert
2954 *
2955 * Description: asserts that the gate is in the specified state.
2956 *
2957 * Args:
2958 * Arg1: lck_rw_t lock used to protect the gate.
2959 * Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2960 * Arg3: flags to specified assert type.
2961 * GATE_ASSERT_CLOSED - the gate is currently closed
2962 * GATE_ASSERT_OPEN - the gate is currently opened
2963 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2964 */
2965 void
lck_rw_gate_assert(__assert_only lck_rw_t * lock,gate_t * gate,gate_assert_flags_t flags)2966 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, gate_assert_flags_t flags)
2967 {
2968 LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2969
2970 gate_assert(gate, flags);
2971 return;
2972 }
2973
2974 /*
2975 * Name: lck_mtx_gate_init
2976 *
2977 * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2978 *
2979 * Args:
2980 * Arg1: lck_mtx_t lock used to protect the gate.
2981 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2982 */
2983 void
lck_mtx_gate_init(lck_mtx_t * lock,gate_t * gate)2984 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2985 {
2986 (void) lock;
2987 gate_init(gate, GT_INIT_DEFAULT);
2988 }
2989
2990 /*
2991 * Name: lck_mtx_gate_alloc_init
2992 *
2993 * Description: allocates and initializes a gate_t.
2994 *
2995 * Args:
2996 * Arg1: lck_mtx_t lock used to protect the gate.
2997 *
2998 * Returns:
2999 * gate_t allocated.
3000 */
3001 gate_t*
lck_mtx_gate_alloc_init(lck_mtx_t * lock)3002 lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3003 {
3004 (void) lock;
3005 return gate_alloc_init();
3006 }
3007
3008 /*
3009 * Name: lck_mtx_gate_destroy
3010 *
3011 * Description: destroys a variable previously initialized
3012 * with lck_mtx_gate_init().
3013 *
3014 * Args:
3015 * Arg1: lck_mtx_t lock used to protect the gate.
3016 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3017 */
3018 void
lck_mtx_gate_destroy(lck_mtx_t * lock,gate_t * gate)3019 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3020 {
3021 (void) lock;
3022 gate_destroy(gate);
3023 }
3024
3025 /*
3026 * Name: lck_mtx_gate_free
3027 *
3028 * Description: destroys and tries to free a gate previously allocated
3029 * with lck_mtx_gate_alloc_init().
3030 * The gate free might be delegated to the last thread returning
3031 * from the gate_wait().
3032 *
3033 * Args:
3034 * Arg1: lck_mtx_t lock used to protect the gate.
3035 * Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3036 */
3037 void
lck_mtx_gate_free(lck_mtx_t * lock,gate_t * gate)3038 lck_mtx_gate_free(lck_mtx_t *lock, gate_t *gate)
3039 {
3040 (void) lock;
3041 gate_free(gate);
3042 }
3043
3044 /*
3045 * Name: lck_mtx_gate_try_close
3046 *
3047 * Description: Tries to close the gate.
3048 * In case of success the current thread will be set as
3049 * the holder of the gate.
3050 *
3051 * Args:
3052 * Arg1: lck_mtx_t lock used to protect the gate.
3053 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3054 *
3055 * Conditions: Lock must be held. Returns with the lock held.
3056 *
3057 * Returns:
3058 * KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3059 * of the gate.
3060 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3061 * to wake up possible waiters on the gate before returning to userspace.
3062 * If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3063 * between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3064 *
3065 * KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3066 * lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3067 * The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3068 * be done without dropping the lock that is protecting the gate in between.
3069 */
3070 int
lck_mtx_gate_try_close(__assert_only lck_mtx_t * lock,gate_t * gate)3071 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3072 {
3073 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3074
3075 return gate_try_close(gate);
3076 }
3077
3078 /*
3079 * Name: lck_mtx_gate_close
3080 *
3081 * Description: Closes the gate. The current thread will be set as
3082 * the holder of the gate. Will panic if the gate is already closed.
3083 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3084 * to wake up possible waiters on the gate before returning to userspace.
3085 *
3086 * Args:
3087 * Arg1: lck_mtx_t lock used to protect the gate.
3088 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3089 *
3090 * Conditions: Lock must be held. Returns with the lock held.
3091 * The gate must be open.
3092 *
3093 */
3094 void
lck_mtx_gate_close(__assert_only lck_mtx_t * lock,gate_t * gate)3095 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3096 {
3097 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3098
3099 return gate_close(gate);
3100 }
3101
3102 /*
3103 * Name: lck_mtx_gate_open
3104 *
3105 * Description: Opens of the gate and wakes up possible waiters.
3106 *
3107 * Args:
3108 * Arg1: lck_mtx_t lock used to protect the gate.
3109 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3110 *
3111 * Conditions: Lock must be held. Returns with the lock held.
3112 * The current thread must be the holder of the gate.
3113 *
3114 */
3115 void
lck_mtx_gate_open(__assert_only lck_mtx_t * lock,gate_t * gate)3116 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3117 {
3118 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3119
3120 gate_open(gate);
3121 }
3122
3123 /*
3124 * Name: lck_mtx_gate_handoff
3125 *
3126 * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3127 * priority will be selected as the new holder of the gate, and woken up,
3128 * with the gate remaining in the closed state throughout.
3129 * If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3130 * will be returned.
3131 * GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3132 * case no waiters were found.
3133 *
3134 *
3135 * Args:
3136 * Arg1: lck_mtx_t lock used to protect the gate.
3137 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3138 * Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3139 *
3140 * Conditions: Lock must be held. Returns with the lock held.
3141 * The current thread must be the holder of the gate.
3142 *
3143 * Returns:
3144 * KERN_SUCCESS in case one of the waiters became the new holder.
3145 * KERN_NOT_WAITING in case there were no waiters.
3146 *
3147 */
3148 kern_return_t
lck_mtx_gate_handoff(__assert_only lck_mtx_t * lock,gate_t * gate,gate_handoff_flags_t flags)3149 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, gate_handoff_flags_t flags)
3150 {
3151 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3152
3153 return gate_handoff(gate, flags);
3154 }
3155
3156 /*
3157 * Name: lck_mtx_gate_steal
3158 *
3159 * Description: Steals the ownership of the gate. It sets the current thread as the
3160 * new holder of the gate.
3161 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3162 * to wake up possible waiters on the gate before returning to userspace.
3163 * NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3164 * anymore.
3165 *
3166 *
3167 * Args:
3168 * Arg1: lck_mtx_t lock used to protect the gate.
3169 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3170 *
3171 * Conditions: Lock must be held. Returns with the lock held.
3172 * The gate must be closed and the current thread must not already be the holder.
3173 *
3174 */
3175 void
lck_mtx_gate_steal(__assert_only lck_mtx_t * lock,gate_t * gate)3176 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3177 {
3178 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3179
3180 gate_steal(gate);
3181 }
3182
3183 /*
3184 * Name: lck_mtx_gate_wait
3185 *
3186 * Description: Waits for the current thread to become the holder of the gate or for the
3187 * gate to become open. An interruptible mode and deadline can be specified
3188 * to return earlier from the wait.
3189 *
3190 * Args:
3191 * Arg1: lck_mtx_t lock used to protect the gate.
3192 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3193 * Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3194 * Arg3: interruptible flag for wait.
3195 * Arg4: deadline
3196 *
3197 * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3198 * Lock will be dropped while waiting.
3199 * The gate must be closed.
3200 *
3201 * Returns: Reason why the thread was woken up.
3202 * GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3203 * A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3204 * to wake up possible waiters on the gate before returning to userspace.
3205 * GATE_OPENED - the gate was opened by the holder.
3206 * GATE_TIMED_OUT - the thread was woken up by a timeout.
3207 * GATE_INTERRUPTED - the thread was interrupted while sleeping.
3208 */
3209 gate_wait_result_t
lck_mtx_gate_wait(lck_mtx_t * lock,gate_t * gate,lck_sleep_action_t lck_sleep_action,wait_interrupt_t interruptible,uint64_t deadline)3210 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3211 {
3212 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3213
3214 if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3215 return gate_wait(gate,
3216 interruptible,
3217 deadline,
3218 ^{lck_mtx_unlock(lock);},
3219 ^{;});
3220 } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3221 return gate_wait(gate,
3222 interruptible,
3223 deadline,
3224 ^{lck_mtx_unlock(lock);},
3225 ^{lck_mtx_lock_spin(lock);});
3226 } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3227 return gate_wait(gate,
3228 interruptible,
3229 deadline,
3230 ^{lck_mtx_unlock(lock);},
3231 ^{lck_mtx_lock_spin_always(lock);});
3232 } else {
3233 return gate_wait(gate,
3234 interruptible,
3235 deadline,
3236 ^{lck_mtx_unlock(lock);},
3237 ^{lck_mtx_lock(lock);});
3238 }
3239 }
3240
3241 /*
3242 * Name: lck_mtx_gate_assert
3243 *
3244 * Description: asserts that the gate is in the specified state.
3245 *
3246 * Args:
3247 * Arg1: lck_mtx_t lock used to protect the gate.
3248 * Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3249 * Arg3: flags to specified assert type.
3250 * GATE_ASSERT_CLOSED - the gate is currently closed
3251 * GATE_ASSERT_OPEN - the gate is currently opened
3252 * GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3253 */
3254 void
lck_mtx_gate_assert(__assert_only lck_mtx_t * lock,gate_t * gate,gate_assert_flags_t flags)3255 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, gate_assert_flags_t flags)
3256 {
3257 LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3258
3259 gate_assert(gate, flags);
3260 }
3261
3262 #pragma mark - LCK_*_DECLARE support
3263
3264 __startup_func
3265 void
lck_spin_startup_init(struct lck_spin_startup_spec * sp)3266 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3267 {
3268 lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3269 }
3270
3271 __startup_func
3272 void
lck_mtx_startup_init(struct lck_mtx_startup_spec * sp)3273 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3274 {
3275 if (sp->lck_ext) {
3276 lck_mtx_init_ext(sp->lck, sp->lck_ext, sp->lck_grp, sp->lck_attr);
3277 } else {
3278 lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3279 }
3280 }
3281
3282 __startup_func
3283 void
lck_rw_startup_init(struct lck_rw_startup_spec * sp)3284 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3285 {
3286 lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3287 }
3288
3289 __startup_func
3290 void
usimple_lock_startup_init(struct usimple_lock_startup_spec * sp)3291 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3292 {
3293 simple_lock_init(sp->lck, sp->lck_init_arg);
3294 }
3295