1 /*
2 * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <machine/atomic.h>
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/ioctl.h>
34 #include <sys/file_internal.h>
35 #include <sys/proc_internal.h>
36 #include <sys/kernel.h>
37 #include <sys/guarded.h>
38 #include <sys/stat.h>
39 #include <sys/malloc.h>
40 #include <sys/sysproto.h>
41 #include <sys/pthread_shims.h>
42
43 #include <mach/mach_types.h>
44
45 #include <kern/cpu_data.h>
46 #include <kern/mach_param.h>
47 #include <kern/kern_types.h>
48 #include <kern/assert.h>
49 #include <kern/zalloc.h>
50 #include <kern/thread.h>
51 #include <kern/clock.h>
52 #include <kern/ledger.h>
53 #include <kern/policy_internal.h>
54 #include <kern/task.h>
55 #include <kern/telemetry.h>
56 #include <kern/waitq.h>
57 #include <kern/sched_prim.h>
58 #include <kern/turnstile.h>
59 #include <kern/zalloc.h>
60 #include <kern/debug.h>
61
62 #include <pexpert/pexpert.h>
63
64 #define XNU_TEST_BITMAP
65 #include <kern/bits.h>
66
67 #include <os/hash.h>
68 #include <sys/ulock.h>
69
70 /*
71 * How ulock promotion works:
72 *
73 * There’s a requested policy field on every thread called ‘promotions’, which
74 * expresses which ulock promotions are happening to this thread.
75 * The promotion priority saturates until the promotion count goes to 0.
76 *
77 * We also track effective promotion qos, which is the qos before clamping.
78 * This value is used for promoting a thread that another thread is waiting on,
79 * so that the lock owner reinflates to the right priority after unclamping.
80 *
81 * This also works for non-QoS threads, which can donate base priority to QoS
82 * and non-QoS threads alike.
83 *
84 * ulock wait applies a promotion to the owner communicated through
85 * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
86 * there is still an owner. In ulock wake, if the waker is still the owner,
87 * then it clears its ownership and drops the boost. It does NOT transfer
88 * ownership/priority boost to the new thread. Instead, it selects the
89 * waiting thread with the highest base priority to be woken next, and
90 * relies on that thread to carry the torch for the other waiting threads.
91 */
92
93 static LCK_GRP_DECLARE(ull_lck_grp, "ulocks");
94
95 typedef lck_spin_t ull_lock_t;
96 #define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL)
97 #define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, &ull_lck_grp)
98 #define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp)
99 #define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock)
100 #define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED)
101 #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED)
102
103 #define ULOCK_TO_EVENT(ull) ((event_t)ull)
104 #define EVENT_TO_ULOCK(event) ((ull_t *)event)
105
106 typedef enum {
107 ULK_INVALID = 0,
108 ULK_UADDR,
109 ULK_XPROC,
110 } ulk_type;
111
112 typedef struct {
113 union {
114 struct __attribute__((packed)) {
115 user_addr_t ulk_addr;
116 pid_t ulk_pid;
117 };
118 struct __attribute__((packed)) {
119 uint64_t ulk_object;
120 uint64_t ulk_offset;
121 };
122 };
123 ulk_type ulk_key_type;
124 } ulk_t;
125
126 #define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(pid_t))
127 #define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t))
128
129 inline static bool
ull_key_match(ulk_t * a,ulk_t * b)130 ull_key_match(ulk_t *a, ulk_t *b)
131 {
132 if (a->ulk_key_type != b->ulk_key_type) {
133 return false;
134 }
135
136 if (a->ulk_key_type == ULK_UADDR) {
137 return (a->ulk_pid == b->ulk_pid) &&
138 (a->ulk_addr == b->ulk_addr);
139 }
140
141 assert(a->ulk_key_type == ULK_XPROC);
142 return (a->ulk_object == b->ulk_object) &&
143 (a->ulk_offset == b->ulk_offset);
144 }
145
146 typedef struct ull {
147 /*
148 * ull_owner is the most recent known value for the owner of this ulock
149 * i.e. it may be out of date WRT the real value in userspace.
150 */
151 thread_t ull_owner; /* holds +1 thread reference */
152 ulk_t ull_key;
153 ull_lock_t ull_lock;
154 uint ull_bucket_index;
155 int32_t ull_nwaiters;
156 int32_t ull_refcount;
157 uint8_t ull_opcode;
158 struct turnstile *ull_turnstile;
159 queue_chain_t ull_hash_link;
160 } ull_t;
161
162 #define ULL_MUST_EXIST 0x0001
163 static void ull_put(ull_t *);
164
165 static uint32_t ulock_adaptive_spin_usecs = 20;
166
167 SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
168 &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration");
169
170 #if DEVELOPMENT || DEBUG
171 static int ull_simulate_copyin_fault = 0;
172
173 static void
ull_dump(ull_t * ull)174 ull_dump(ull_t *ull)
175 {
176 kprintf("ull\t%p\n", ull);
177 switch (ull->ull_key.ulk_key_type) {
178 case ULK_UADDR:
179 kprintf("ull_key.ulk_key_type\tULK_UADDR\n");
180 kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid);
181 kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
182 break;
183 case ULK_XPROC:
184 kprintf("ull_key.ulk_key_type\tULK_XPROC\n");
185 kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object));
186 kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset));
187 break;
188 default:
189 kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type);
190 break;
191 }
192 kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
193 kprintf("ull_refcount\t%d\n", ull->ull_refcount);
194 kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
195 kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
196 kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile);
197 }
198 #endif
199
200 typedef struct ull_bucket {
201 queue_head_t ulb_head;
202 lck_spin_t ulb_lock;
203 } ull_bucket_t;
204
205 static SECURITY_READ_ONLY_LATE(int) ull_hash_buckets;
206 static SECURITY_READ_ONLY_LATE(ull_bucket_t *) ull_bucket;
207 static uint32_t ull_nzalloc = 0;
208 static ZONE_DECLARE(ull_zone, "ulocks", sizeof(ull_t), ZC_CACHING);
209
210 #define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp)
211 #define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock)
212
213 static __inline__ uint32_t
ull_hash_index(const void * key,size_t length)214 ull_hash_index(const void *key, size_t length)
215 {
216 uint32_t hash = os_hash_jenkins(key, length);
217
218 hash &= (ull_hash_buckets - 1);
219
220 return hash;
221 }
222
223 #define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN)
224
225 static void
ulock_initialize(void)226 ulock_initialize(void)
227 {
228 assert(thread_max > 16);
229 /* Size ull_hash_buckets based on thread_max.
230 * Round up to nearest power of 2, then divide by 4
231 */
232 ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2));
233
234 kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
235 assert(ull_hash_buckets >= thread_max / 4);
236
237 ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets,
238 ZALIGN_PTR);
239 assert(ull_bucket != NULL);
240
241 for (int i = 0; i < ull_hash_buckets; i++) {
242 queue_init(&ull_bucket[i].ulb_head);
243 lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL);
244 }
245 }
246 STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, ulock_initialize);
247
248 #if DEVELOPMENT || DEBUG
249 /* Count the number of hash entries for a given pid.
250 * if pid==0, dump the whole table.
251 */
252 static int
ull_hash_dump(pid_t pid)253 ull_hash_dump(pid_t pid)
254 {
255 int count = 0;
256 if (pid == 0) {
257 kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
258 kprintf("%s>BEGIN\n", __FUNCTION__);
259 }
260 for (int i = 0; i < ull_hash_buckets; i++) {
261 ull_bucket_lock(i);
262 if (!queue_empty(&ull_bucket[i].ulb_head)) {
263 ull_t *elem;
264 if (pid == 0) {
265 kprintf("%s>index %d:\n", __FUNCTION__, i);
266 }
267 qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
268 if ((pid == 0) || ((elem->ull_key.ulk_key_type == ULK_UADDR) && (pid == elem->ull_key.ulk_pid))) {
269 ull_dump(elem);
270 count++;
271 }
272 }
273 }
274 ull_bucket_unlock(i);
275 }
276 if (pid == 0) {
277 kprintf("%s>END\n", __FUNCTION__);
278 ull_nzalloc = 0;
279 }
280 return count;
281 }
282 #endif
283
284 static ull_t *
ull_alloc(ulk_t * key)285 ull_alloc(ulk_t *key)
286 {
287 ull_t *ull = (ull_t *)zalloc(ull_zone);
288 assert(ull != NULL);
289
290 ull->ull_refcount = 1;
291 ull->ull_key = *key;
292 ull->ull_bucket_index = ULL_INDEX(key);
293 ull->ull_nwaiters = 0;
294 ull->ull_opcode = 0;
295
296 ull->ull_owner = THREAD_NULL;
297 ull->ull_turnstile = TURNSTILE_NULL;
298
299 ull_lock_init(ull);
300
301 ull_nzalloc++;
302 return ull;
303 }
304
305 static void
ull_free(ull_t * ull)306 ull_free(ull_t *ull)
307 {
308 assert(ull->ull_owner == THREAD_NULL);
309 assert(ull->ull_turnstile == TURNSTILE_NULL);
310
311 ull_assert_notwned(ull);
312
313 ull_lock_destroy(ull);
314
315 zfree(ull_zone, ull);
316 }
317
318 /* Finds an existing ulock structure (ull_t), or creates a new one.
319 * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
320 * The ulock structure is returned with ull_lock locked
321 */
322 static ull_t *
ull_get(ulk_t * key,uint32_t flags,ull_t ** unused_ull)323 ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull)
324 {
325 ull_t *ull = NULL;
326 uint i = ULL_INDEX(key);
327 ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key);
328 ull_t *elem;
329
330 ull_bucket_lock(i);
331 qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
332 ull_lock(elem);
333 if (ull_key_match(&elem->ull_key, key)) {
334 ull = elem;
335 break;
336 } else {
337 ull_unlock(elem);
338 }
339 }
340 if (ull == NULL) {
341 if (flags & ULL_MUST_EXIST) {
342 /* Must already exist (called from wake) */
343 ull_bucket_unlock(i);
344 assert(new_ull == NULL);
345 assert(unused_ull == NULL);
346 return NULL;
347 }
348
349 if (new_ull == NULL) {
350 /* Alloc above failed */
351 ull_bucket_unlock(i);
352 return NULL;
353 }
354
355 ull = new_ull;
356 ull_lock(ull);
357 enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link);
358 } else if (!(flags & ULL_MUST_EXIST)) {
359 assert(new_ull);
360 assert(unused_ull);
361 assert(*unused_ull == NULL);
362 *unused_ull = new_ull;
363 }
364
365 ull->ull_refcount++;
366
367 ull_bucket_unlock(i);
368
369 return ull; /* still locked */
370 }
371
372 /*
373 * Must be called with ull_lock held
374 */
375 static void
ull_put(ull_t * ull)376 ull_put(ull_t *ull)
377 {
378 ull_assert_owned(ull);
379 int refcount = --ull->ull_refcount;
380 assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1);
381 ull_unlock(ull);
382
383 if (refcount > 0) {
384 return;
385 }
386
387 ull_bucket_lock(ull->ull_bucket_index);
388 remqueue(&ull->ull_hash_link);
389 ull_bucket_unlock(ull->ull_bucket_index);
390
391 ull_free(ull);
392 }
393
394 extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count);
395 extern vm_map_t current_map(void);
396 extern boolean_t machine_thread_on_core(thread_t thread);
397
398 static int
uaddr_findobj(user_addr_t uaddr,uint64_t * objectp,uint64_t * offsetp)399 uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
400 {
401 kern_return_t ret;
402 vm_page_info_basic_data_t info;
403 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
404 ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
405 if (ret != KERN_SUCCESS) {
406 return EINVAL;
407 }
408
409 if (objectp != NULL) {
410 *objectp = (uint64_t)info.object_id;
411 }
412 if (offsetp != NULL) {
413 *offsetp = (uint64_t)info.offset;
414 }
415
416 return 0;
417 }
418
419 static void ulock_wait_continue(void *, wait_result_t);
420 static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *);
421
422 inline static int
wait_result_to_return_code(wait_result_t wr)423 wait_result_to_return_code(wait_result_t wr)
424 {
425 int ret = 0;
426
427 switch (wr) {
428 case THREAD_AWAKENED:
429 break;
430 case THREAD_TIMED_OUT:
431 ret = ETIMEDOUT;
432 break;
433 case THREAD_INTERRUPTED:
434 case THREAD_RESTART:
435 default:
436 ret = EINTR;
437 break;
438 }
439
440 return ret;
441 }
442
443 static int
ulock_resolve_owner(uint32_t value,thread_t * owner)444 ulock_resolve_owner(uint32_t value, thread_t *owner)
445 {
446 mach_port_name_t owner_name = ulock_owner_value_to_port_name(value);
447
448 *owner = port_name_to_thread(owner_name,
449 PORT_INTRANS_THREAD_IN_CURRENT_TASK |
450 PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
451 if (*owner == THREAD_NULL) {
452 /*
453 * Translation failed - even though the lock value is up to date,
454 * whatever was stored in the lock wasn't actually a thread port.
455 */
456 return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD;
457 }
458 return 0;
459 }
460
461 int
ulock_wait(struct proc * p,struct ulock_wait_args * args,int32_t * retval)462 ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
463 {
464 struct ulock_wait2_args args2;
465
466 args2.operation = args->operation;
467 args2.addr = args->addr;
468 args2.value = args->value;
469 args2.timeout = (uint64_t)(args->timeout) * NSEC_PER_USEC;
470 args2.value2 = 0;
471
472 return ulock_wait2(p, &args2, retval);
473 }
474
475 int
ulock_wait2(struct proc * p,struct ulock_wait2_args * args,int32_t * retval)476 ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval)
477 {
478 uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
479 uint flags = args->operation & UL_FLAGS_MASK;
480
481 if (flags & ULF_WAIT_CANCEL_POINT) {
482 __pthread_testcancel(1);
483 }
484
485 int ret = 0;
486 thread_t self = current_thread();
487 ulk_t key;
488
489 /* involved threads - each variable holds +1 ref if not null */
490 thread_t owner_thread = THREAD_NULL;
491 thread_t old_owner = THREAD_NULL;
492
493 ull_t *unused_ull = NULL;
494
495 if ((flags & ULF_WAIT_MASK) != flags) {
496 ret = EINVAL;
497 goto munge_retval;
498 }
499
500 bool set_owner = false;
501 bool xproc = false;
502 size_t lock_size = sizeof(uint32_t);
503 int copy_ret;
504
505 switch (opcode) {
506 case UL_UNFAIR_LOCK:
507 set_owner = true;
508 break;
509 case UL_COMPARE_AND_WAIT:
510 break;
511 case UL_COMPARE_AND_WAIT64:
512 lock_size = sizeof(uint64_t);
513 break;
514 case UL_COMPARE_AND_WAIT_SHARED:
515 xproc = true;
516 break;
517 case UL_COMPARE_AND_WAIT64_SHARED:
518 xproc = true;
519 lock_size = sizeof(uint64_t);
520 break;
521 default:
522 ret = EINVAL;
523 goto munge_retval;
524 }
525
526 uint64_t value = 0;
527
528 if ((args->addr == 0) || (args->addr & (lock_size - 1))) {
529 ret = EINVAL;
530 goto munge_retval;
531 }
532
533 if (xproc) {
534 uint64_t object = 0;
535 uint64_t offset = 0;
536
537 ret = uaddr_findobj(args->addr, &object, &offset);
538 if (ret) {
539 ret = EINVAL;
540 goto munge_retval;
541 }
542 key.ulk_key_type = ULK_XPROC;
543 key.ulk_object = object;
544 key.ulk_offset = offset;
545 } else {
546 key.ulk_key_type = ULK_UADDR;
547 key.ulk_pid = proc_getpid(p);
548 key.ulk_addr = args->addr;
549 }
550
551 if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) {
552 /*
553 * Attempt the copyin outside of the lock once,
554 *
555 * If it doesn't match (which is common), return right away.
556 *
557 * If it matches, resolve the current owner, and if it is on core,
558 * spin a bit waiting for the value to change. If the owner isn't on
559 * core, or if the value stays stable, then go on with the regular
560 * blocking code.
561 */
562 uint64_t end = 0;
563 uint32_t u32;
564
565 ret = copyin_atomic32(args->addr, &u32);
566 if (ret || u32 != args->value) {
567 goto munge_retval;
568 }
569 for (;;) {
570 if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) {
571 break;
572 }
573
574 /* owner_thread may have a +1 starting here */
575
576 if (!machine_thread_on_core(owner_thread)) {
577 break;
578 }
579 if (end == 0) {
580 clock_interval_to_deadline(ulock_adaptive_spin_usecs,
581 NSEC_PER_USEC, &end);
582 } else if (mach_absolute_time() > end) {
583 break;
584 }
585 if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) {
586 goto munge_retval;
587 }
588 }
589 }
590
591 ull_t *ull = ull_get(&key, 0, &unused_ull);
592 if (ull == NULL) {
593 ret = ENOMEM;
594 goto munge_retval;
595 }
596 /* ull is locked */
597
598 ull->ull_nwaiters++;
599
600 if (ull->ull_opcode == 0) {
601 ull->ull_opcode = opcode;
602 } else if (ull->ull_opcode != opcode) {
603 ret = EDOM;
604 goto out_locked;
605 }
606
607 /*
608 * We don't want this copyin to get wedged behind VM operations,
609 * but we have to read the userspace value under the ull lock for correctness.
610 *
611 * Until <rdar://problem/24999882> exists,
612 * holding the ull spinlock across copyin forces any
613 * vm_fault we encounter to fail.
614 */
615
616 /* copyin_atomicXX always checks alignment */
617
618 if (lock_size == 4) {
619 uint32_t u32;
620 copy_ret = copyin_atomic32(args->addr, &u32);
621 value = u32;
622 } else {
623 copy_ret = copyin_atomic64(args->addr, &value);
624 }
625
626 #if DEVELOPMENT || DEBUG
627 /* Occasionally simulate copyin finding the user address paged out */
628 if (((ull_simulate_copyin_fault == proc_getpid(p)) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
629 static _Atomic int fault_inject = 0;
630 if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) {
631 copy_ret = EFAULT;
632 }
633 }
634 #endif
635 if (copy_ret != 0) {
636 /* copyin() will return an error if the access to the user addr would have faulted,
637 * so just return and let the user level code fault it in.
638 */
639 ret = copy_ret;
640 goto out_locked;
641 }
642
643 if (value != args->value) {
644 /* Lock value has changed from expected so bail out */
645 goto out_locked;
646 }
647
648 if (set_owner) {
649 if (owner_thread == THREAD_NULL) {
650 ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread);
651 if (ret == EOWNERDEAD) {
652 /*
653 * Translation failed - even though the lock value is up to date,
654 * whatever was stored in the lock wasn't actually a thread port.
655 */
656 goto out_locked;
657 }
658 /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
659 ret = 0;
660 }
661 /* owner_thread has a +1 reference */
662
663 /*
664 * At this point, I know:
665 * a) owner_thread is definitely the current owner, because I just read the value
666 * b) owner_thread is either:
667 * i) holding the user lock or
668 * ii) has just unlocked the user lock after I looked
669 * and is heading toward the kernel to call ull_wake.
670 * If so, it's going to have to wait for the ull mutex.
671 *
672 * Therefore, I can ask the turnstile to promote its priority, and I can rely
673 * on it to come by later to issue the wakeup and lose its promotion.
674 */
675
676 /* Return the +1 ref from the ull_owner field */
677 old_owner = ull->ull_owner;
678 ull->ull_owner = THREAD_NULL;
679
680 if (owner_thread != THREAD_NULL) {
681 /* The ull_owner field now owns a +1 ref on owner_thread */
682 thread_reference(owner_thread);
683 ull->ull_owner = owner_thread;
684 }
685 }
686
687 wait_result_t wr;
688 uint64_t timeout = args->timeout; /* nanoseconds */
689 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
690 wait_interrupt_t interruptible = THREAD_ABORTSAFE;
691 struct turnstile *ts;
692
693 ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
694 TURNSTILE_NULL, TURNSTILE_ULOCK);
695 thread_set_pending_block_hint(self, kThreadWaitUserLock);
696
697 if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
698 interruptible |= THREAD_WAIT_NOREPORT;
699 }
700
701 if (timeout) {
702 nanoseconds_to_deadline(timeout, &deadline);
703 }
704
705 turnstile_update_inheritor(ts, owner_thread,
706 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
707
708 wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
709 interruptible, deadline);
710
711 if (wr == THREAD_WAITING) {
712 uthread_t uthread = (uthread_t)get_bsdthread_info(self);
713 uthread->uu_save.uus_ulock_wait_data.ull = ull;
714 uthread->uu_save.uus_ulock_wait_data.retval = retval;
715 uthread->uu_save.uus_ulock_wait_data.flags = flags;
716 uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread;
717 uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner;
718 }
719
720 ull_unlock(ull);
721
722 if (unused_ull) {
723 ull_free(unused_ull);
724 unused_ull = NULL;
725 }
726
727 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
728
729 if (wr == THREAD_WAITING) {
730 if (set_owner && owner_thread != THREAD_NULL) {
731 thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE);
732 } else {
733 assert(owner_thread == THREAD_NULL);
734 thread_block_parameter(ulock_wait_continue, ull);
735 }
736 /* NOT REACHED */
737 }
738
739 ret = wait_result_to_return_code(wr);
740
741 ull_lock(ull);
742 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
743
744 out_locked:
745 ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
746 owner_thread = NULL;
747
748 if (unused_ull) {
749 ull_free(unused_ull);
750 unused_ull = NULL;
751 }
752
753 assert(*retval >= 0);
754
755 munge_retval:
756 if (owner_thread) {
757 thread_deallocate(owner_thread);
758 }
759 if (ret == ESTALE) {
760 ret = 0;
761 }
762 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
763 *retval = -ret;
764 ret = 0;
765 }
766 return ret;
767 }
768
769 /*
770 * Must be called with ull_lock held
771 */
772 static void
ulock_wait_cleanup(ull_t * ull,thread_t owner_thread,thread_t old_owner,int32_t * retval)773 ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval)
774 {
775 ull_assert_owned(ull);
776
777 thread_t old_lingering_owner = THREAD_NULL;
778
779 *retval = --ull->ull_nwaiters;
780 if (ull->ull_nwaiters == 0) {
781 /*
782 * If the wait was canceled early, we might need to
783 * clear out the lingering owner reference before
784 * freeing the ull.
785 */
786 old_lingering_owner = ull->ull_owner;
787 ull->ull_owner = THREAD_NULL;
788
789 memset(&ull->ull_key, 0, sizeof ull->ull_key);
790 ull->ull_refcount--;
791 assert(ull->ull_refcount > 0);
792 }
793 ull_put(ull);
794
795 /* Need to be called after dropping the interlock */
796 turnstile_cleanup();
797
798 if (owner_thread != THREAD_NULL) {
799 thread_deallocate(owner_thread);
800 }
801
802 if (old_owner != THREAD_NULL) {
803 thread_deallocate(old_owner);
804 }
805
806 if (old_lingering_owner != THREAD_NULL) {
807 thread_deallocate(old_lingering_owner);
808 }
809
810 assert(*retval >= 0);
811 }
812
813 __attribute__((noreturn))
814 static void
ulock_wait_continue(__unused void * parameter,wait_result_t wr)815 ulock_wait_continue(__unused void * parameter, wait_result_t wr)
816 {
817 thread_t self = current_thread();
818 uthread_t uthread = (uthread_t)get_bsdthread_info(self);
819 int ret = 0;
820
821 ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull;
822 int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval;
823 uint flags = uthread->uu_save.uus_ulock_wait_data.flags;
824 thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread;
825 thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner;
826
827 ret = wait_result_to_return_code(wr);
828
829 ull_lock(ull);
830 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
831
832 ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
833
834 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
835 *retval = -ret;
836 ret = 0;
837 }
838
839 unix_syscall_return(ret);
840 }
841
842 int
ulock_wake(struct proc * p,struct ulock_wake_args * args,__unused int32_t * retval)843 ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retval)
844 {
845 uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
846 uint flags = args->operation & UL_FLAGS_MASK;
847 int ret = 0;
848 ulk_t key;
849
850 /* involved threads - each variable holds +1 ref if not null */
851 thread_t wake_thread = THREAD_NULL;
852
853 #if DEVELOPMENT || DEBUG
854 if (opcode == UL_DEBUG_HASH_DUMP_PID) {
855 *retval = ull_hash_dump(proc_getpid(p));
856 return ret;
857 } else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
858 *retval = ull_hash_dump(0);
859 return ret;
860 } else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
861 ull_simulate_copyin_fault = (int)(args->wake_value);
862 return ret;
863 }
864 #endif
865
866 bool set_owner = false;
867 bool allow_non_owner = false;
868 bool xproc = false;
869
870 switch (opcode) {
871 case UL_UNFAIR_LOCK:
872 set_owner = true;
873 break;
874 case UL_COMPARE_AND_WAIT:
875 case UL_COMPARE_AND_WAIT64:
876 break;
877 case UL_COMPARE_AND_WAIT_SHARED:
878 case UL_COMPARE_AND_WAIT64_SHARED:
879 xproc = true;
880 break;
881 default:
882 ret = EINVAL;
883 goto munge_retval;
884 }
885
886 if ((flags & ULF_WAKE_MASK) != flags) {
887 ret = EINVAL;
888 goto munge_retval;
889 }
890
891 if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) {
892 ret = EINVAL;
893 goto munge_retval;
894 }
895
896 if (flags & ULF_WAKE_ALLOW_NON_OWNER) {
897 if (!set_owner) {
898 ret = EINVAL;
899 goto munge_retval;
900 }
901
902 allow_non_owner = true;
903 }
904
905 if (args->addr == 0) {
906 ret = EINVAL;
907 goto munge_retval;
908 }
909
910 if (xproc) {
911 uint64_t object = 0;
912 uint64_t offset = 0;
913
914 ret = uaddr_findobj(args->addr, &object, &offset);
915 if (ret) {
916 ret = EINVAL;
917 goto munge_retval;
918 }
919 key.ulk_key_type = ULK_XPROC;
920 key.ulk_object = object;
921 key.ulk_offset = offset;
922 } else {
923 key.ulk_key_type = ULK_UADDR;
924 key.ulk_pid = proc_getpid(p);
925 key.ulk_addr = args->addr;
926 }
927
928 if (flags & ULF_WAKE_THREAD) {
929 mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value);
930 wake_thread = port_name_to_thread(wake_thread_name,
931 PORT_INTRANS_THREAD_IN_CURRENT_TASK |
932 PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
933 if (wake_thread == THREAD_NULL) {
934 ret = ESRCH;
935 goto munge_retval;
936 }
937 }
938
939 ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL);
940 thread_t new_owner = THREAD_NULL;
941 struct turnstile *ts = TURNSTILE_NULL;
942 thread_t cleanup_thread = THREAD_NULL;
943
944 if (ull == NULL) {
945 ret = ENOENT;
946 goto munge_retval;
947 }
948 /* ull is locked */
949
950 if (opcode != ull->ull_opcode) {
951 ret = EDOM;
952 goto out_ull_put;
953 }
954
955 if (set_owner) {
956 if ((ull->ull_owner != current_thread()) && !allow_non_owner) {
957 /*
958 * If the current thread isn't the known owner,
959 * then this wake call was late to the party,
960 * and the kernel already knows who owns the lock.
961 *
962 * This current owner already knows the lock is contended
963 * and will redrive wakes, just bail out.
964 */
965 goto out_ull_put;
966 }
967 } else {
968 assert(ull->ull_owner == THREAD_NULL);
969 }
970
971 ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
972 TURNSTILE_NULL, TURNSTILE_ULOCK);
973 assert(ts != TURNSTILE_NULL);
974
975 if (flags & ULF_WAKE_THREAD) {
976 kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq,
977 CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
978 wake_thread, THREAD_AWAKENED);
979 if (kr != KERN_SUCCESS) {
980 assert(kr == KERN_NOT_WAITING);
981 ret = EALREADY;
982 }
983 } else if (flags & ULF_WAKE_ALL) {
984 if (set_owner) {
985 turnstile_update_inheritor(ts, THREAD_NULL,
986 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
987 }
988 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
989 THREAD_AWAKENED, 0);
990 } else if (set_owner) {
991 /*
992 * The turnstile waitq is priority ordered,
993 * and will wake up the highest priority waiter
994 * and set it as the inheritor for us.
995 */
996 new_owner = waitq_wakeup64_identify(&ts->ts_waitq,
997 CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
998 THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
999 } else {
1000 waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1001 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1002 }
1003
1004 if (set_owner) {
1005 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1006 cleanup_thread = ull->ull_owner;
1007 ull->ull_owner = new_owner;
1008 }
1009
1010 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
1011
1012 out_ull_put:
1013 ull_put(ull);
1014
1015 if (ts != TURNSTILE_NULL) {
1016 /* Need to be called after dropping the interlock */
1017 turnstile_cleanup();
1018 }
1019
1020 if (cleanup_thread != THREAD_NULL) {
1021 thread_deallocate(cleanup_thread);
1022 }
1023
1024 munge_retval:
1025 if (wake_thread != THREAD_NULL) {
1026 thread_deallocate(wake_thread);
1027 }
1028
1029 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
1030 *retval = -ret;
1031 ret = 0;
1032 }
1033 return ret;
1034 }
1035
1036 void
kdp_ulock_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)1037 kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1038 {
1039 ull_t *ull = EVENT_TO_ULOCK(event);
1040
1041 zone_require(ull_zone, ull);
1042
1043 switch (ull->ull_opcode) {
1044 case UL_UNFAIR_LOCK:
1045 case UL_UNFAIR_LOCK64_SHARED:
1046 waitinfo->owner = thread_tid(ull->ull_owner);
1047 waitinfo->context = ull->ull_key.ulk_addr;
1048 break;
1049 case UL_COMPARE_AND_WAIT:
1050 case UL_COMPARE_AND_WAIT64:
1051 case UL_COMPARE_AND_WAIT_SHARED:
1052 case UL_COMPARE_AND_WAIT64_SHARED:
1053 waitinfo->owner = 0;
1054 waitinfo->context = ull->ull_key.ulk_addr;
1055 break;
1056 default:
1057 panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
1058 break;
1059 }
1060 return;
1061 }
1062