1 /*
2 * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <machine/atomic.h>
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/ioctl.h>
34 #include <sys/file_internal.h>
35 #include <sys/proc_internal.h>
36 #include <sys/kernel.h>
37 #include <sys/guarded.h>
38 #include <sys/stat.h>
39 #include <sys/malloc.h>
40 #include <sys/sysproto.h>
41 #include <sys/pthread_shims.h>
42
43 #include <mach/mach_types.h>
44
45 #include <kern/cpu_data.h>
46 #include <kern/mach_param.h>
47 #include <kern/kern_types.h>
48 #include <kern/assert.h>
49 #include <kern/zalloc.h>
50 #include <kern/thread.h>
51 #include <kern/clock.h>
52 #include <kern/ledger.h>
53 #include <kern/policy_internal.h>
54 #include <kern/task.h>
55 #include <kern/telemetry.h>
56 #include <kern/waitq.h>
57 #include <kern/sched_prim.h>
58 #include <kern/turnstile.h>
59 #include <kern/zalloc.h>
60 #include <kern/debug.h>
61
62 #include <vm/vm_map_xnu.h>
63
64 #include <pexpert/pexpert.h>
65
66 #define XNU_TEST_BITMAP
67 #include <kern/bits.h>
68
69 #include <os/hash.h>
70 #include <sys/ulock.h>
71
72 /*
73 * How ulock promotion works:
74 *
75 * There’s a requested policy field on every thread called ‘promotions’, which
76 * expresses which ulock promotions are happening to this thread.
77 * The promotion priority saturates until the promotion count goes to 0.
78 *
79 * We also track effective promotion qos, which is the qos before clamping.
80 * This value is used for promoting a thread that another thread is waiting on,
81 * so that the lock owner reinflates to the right priority after unclamping.
82 *
83 * This also works for non-QoS threads, which can donate base priority to QoS
84 * and non-QoS threads alike.
85 *
86 * ulock wait applies a promotion to the owner communicated through
87 * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
88 * there is still an owner. In ulock wake, if the waker is still the owner,
89 * then it clears its ownership and drops the boost. It does NOT transfer
90 * ownership/priority boost to the new thread. Instead, it selects the
91 * waiting thread with the highest base priority to be woken next, and
92 * relies on that thread to carry the torch for the other waiting threads.
93 */
94
95 static LCK_GRP_DECLARE(ull_lck_grp, "ulocks");
96
97 #if XNU_TARGET_OS_XR
98 #define ULL_TICKET_LOCK 1
99 #endif /* XNU_TARGET_OS_XR */
100
101 #if ULL_TICKET_LOCK
102 typedef lck_ticket_t ull_lock_t;
103 #define ull_lock_init(ull) lck_ticket_init(&ull->ull_lock, &ull_lck_grp)
104 #define ull_lock_destroy(ull) lck_ticket_destroy(&ull->ull_lock, &ull_lck_grp)
105 #define ull_lock(ull) lck_ticket_lock(&ull->ull_lock, &ull_lck_grp)
106 #define ull_unlock(ull) lck_ticket_unlock(&ull->ull_lock)
107 #define ull_assert_owned(ull) lck_ticket_assert_owned(&ull->ull_lock)
108 #define ull_assert_notwned(ull) lck_ticket_assert_not_owned(&ull->ull_lock)
109 #else
110 typedef lck_spin_t ull_lock_t;
111 #define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL)
112 #define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, &ull_lck_grp)
113 #define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp)
114 #define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock)
115 #define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED)
116 #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED)
117 #endif /* ULL_TICKET_LOCK */
118
119 #define ULOCK_TO_EVENT(ull) ((event_t)ull)
120 #define EVENT_TO_ULOCK(event) ((ull_t *)event)
121
122 typedef enum {
123 ULK_INVALID = 0,
124 ULK_UADDR,
125 ULK_XPROC,
126 } ulk_type;
127
128 typedef struct {
129 union {
130 struct __attribute__((packed)) {
131 user_addr_t ulk_addr;
132 /*
133 * We use the task address as a hashing key,
134 * so that ulock wakes across exec can't
135 * be confused.
136 */
137 task_t ulk_task __kernel_data_semantics;
138 };
139 struct __attribute__((packed)) {
140 uint64_t ulk_object;
141 uint64_t ulk_offset;
142 };
143 };
144 ulk_type ulk_key_type;
145 } ulk_t;
146
147 #define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(task_t))
148 #define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t))
149
150 inline static bool
ull_key_match(ulk_t * a,ulk_t * b)151 ull_key_match(ulk_t *a, ulk_t *b)
152 {
153 if (a->ulk_key_type != b->ulk_key_type) {
154 return false;
155 }
156
157 if (a->ulk_key_type == ULK_UADDR) {
158 return (a->ulk_task == b->ulk_task) &&
159 (a->ulk_addr == b->ulk_addr);
160 }
161
162 assert(a->ulk_key_type == ULK_XPROC);
163 return (a->ulk_object == b->ulk_object) &&
164 (a->ulk_offset == b->ulk_offset);
165 }
166
167 typedef struct ull {
168 /*
169 * ull_owner is the most recent known value for the owner of this ulock
170 * i.e. it may be out of date WRT the real value in userspace.
171 */
172 thread_t ull_owner; /* holds +1 thread reference */
173 ulk_t ull_key;
174 ull_lock_t ull_lock;
175 uint ull_bucket_index;
176 int32_t ull_nwaiters;
177 int32_t ull_refcount;
178 uint8_t ull_opcode;
179 struct turnstile *ull_turnstile;
180 queue_chain_t ull_hash_link;
181 } ull_t;
182
183 #define ULL_MUST_EXIST 0x0001
184 static void ull_put(ull_t *);
185
186 static uint32_t ulock_adaptive_spin_usecs = 20;
187
188 SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
189 &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration");
190
191 #if DEVELOPMENT || DEBUG
192 static int ull_simulate_copyin_fault = 0;
193
194 static void
ull_dump(ull_t * ull)195 ull_dump(ull_t *ull)
196 {
197 kprintf("ull\t%p\n", ull);
198 switch (ull->ull_key.ulk_key_type) {
199 case ULK_UADDR:
200 kprintf("ull_key.ulk_key_type\tULK_UADDR\n");
201 kprintf("ull_key.ulk_task\t%p\n", ull->ull_key.ulk_task);
202 kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
203 break;
204 case ULK_XPROC:
205 kprintf("ull_key.ulk_key_type\tULK_XPROC\n");
206 kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object));
207 kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset));
208 break;
209 default:
210 kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type);
211 break;
212 }
213 kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
214 kprintf("ull_refcount\t%d\n", ull->ull_refcount);
215 kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
216 kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
217 kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile);
218 }
219 #endif
220
221 typedef struct ull_bucket {
222 queue_head_t ulb_head;
223 #if ULL_TICKET_LOCK
224 lck_ticket_t ulb_lock;
225 #else
226 lck_spin_t ulb_lock;
227 #endif /* ULL_TICKET_LOCK */
228 } ull_bucket_t;
229
230 static SECURITY_READ_ONLY_LATE(int) ull_hash_buckets;
231 static SECURITY_READ_ONLY_LATE(ull_bucket_t *) ull_bucket;
232 static uint32_t ull_nzalloc = 0;
233 static KALLOC_TYPE_DEFINE(ull_zone, ull_t, KT_DEFAULT);
234
235 #if ULL_TICKET_LOCK
236 #define ull_bucket_lock(i) lck_ticket_lock(&ull_bucket[i].ulb_lock, &ull_lck_grp)
237 #define ull_bucket_unlock(i) lck_ticket_unlock(&ull_bucket[i].ulb_lock)
238 #else
239 #define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp)
240 #define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock)
241 #endif /* ULL_TICKET_LOCK */
242 static __inline__ uint32_t
ull_hash_index(const void * key,size_t length)243 ull_hash_index(const void *key, size_t length)
244 {
245 uint32_t hash = os_hash_jenkins(key, length);
246
247 hash &= (ull_hash_buckets - 1);
248
249 return hash;
250 }
251
252 #define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN)
253
254 static void
ulock_initialize(void)255 ulock_initialize(void)
256 {
257 assert(thread_max > 16);
258 /* Size ull_hash_buckets based on thread_max.
259 * Round up to nearest power of 2, then divide by 4
260 */
261 ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2));
262
263 kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
264 assert(ull_hash_buckets >= thread_max / 4);
265
266 ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets,
267 ZALIGN_PTR);
268 assert(ull_bucket != NULL);
269
270 for (int i = 0; i < ull_hash_buckets; i++) {
271 queue_init(&ull_bucket[i].ulb_head);
272 #if ULL_TICKET_LOCK
273 lck_ticket_init(&ull_bucket[i].ulb_lock, &ull_lck_grp);
274 #else
275 lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL);
276 #endif /* ULL_TICKET_LOCK */
277 }
278 }
279 STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, ulock_initialize);
280
281 #if DEVELOPMENT || DEBUG
282 /* Count the number of hash entries for a given task address.
283 * if task==0, dump the whole table.
284 */
285 static int
ull_hash_dump(task_t task)286 ull_hash_dump(task_t task)
287 {
288 int count = 0;
289 if (task == TASK_NULL) {
290 kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
291 kprintf("%s>BEGIN\n", __FUNCTION__);
292 }
293 for (int i = 0; i < ull_hash_buckets; i++) {
294 ull_bucket_lock(i);
295 if (!queue_empty(&ull_bucket[i].ulb_head)) {
296 ull_t *elem;
297 if (task == TASK_NULL) {
298 kprintf("%s>index %d:\n", __FUNCTION__, i);
299 }
300 qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
301 if ((task == TASK_NULL) || ((elem->ull_key.ulk_key_type == ULK_UADDR)
302 && (task == elem->ull_key.ulk_task))) {
303 ull_dump(elem);
304 count++;
305 }
306 }
307 }
308 ull_bucket_unlock(i);
309 }
310 if (task == TASK_NULL) {
311 kprintf("%s>END\n", __FUNCTION__);
312 ull_nzalloc = 0;
313 }
314 return count;
315 }
316 #endif
317
318 static ull_t *
ull_alloc(ulk_t * key)319 ull_alloc(ulk_t *key)
320 {
321 ull_t *ull = (ull_t *)zalloc_flags(ull_zone, Z_SET_NOTSHARED);
322 assert(ull != NULL);
323
324 ull->ull_refcount = 1;
325 ull->ull_key = *key;
326 ull->ull_bucket_index = ULL_INDEX(key);
327 ull->ull_nwaiters = 0;
328 ull->ull_opcode = 0;
329
330 ull->ull_owner = THREAD_NULL;
331 ull->ull_turnstile = TURNSTILE_NULL;
332
333 ull_lock_init(ull);
334
335 ull_nzalloc++;
336 return ull;
337 }
338
339 static void
ull_free(ull_t * ull)340 ull_free(ull_t *ull)
341 {
342 assert(ull->ull_owner == THREAD_NULL);
343 assert(ull->ull_turnstile == TURNSTILE_NULL);
344
345 ull_assert_notwned(ull);
346
347 ull_lock_destroy(ull);
348
349 zfree(ull_zone, ull);
350 }
351
352 /* Finds an existing ulock structure (ull_t), or creates a new one.
353 * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
354 * The ulock structure is returned with ull_lock locked
355 */
356 static ull_t *
ull_get(ulk_t * key,uint32_t flags,ull_t ** unused_ull)357 ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull)
358 {
359 ull_t *ull = NULL;
360 uint i = ULL_INDEX(key);
361 ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key);
362 ull_t *elem;
363
364 ull_bucket_lock(i);
365 qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
366 ull_lock(elem);
367 if (ull_key_match(&elem->ull_key, key)) {
368 ull = elem;
369 break;
370 } else {
371 ull_unlock(elem);
372 }
373 }
374 if (ull == NULL) {
375 if (flags & ULL_MUST_EXIST) {
376 /* Must already exist (called from wake) */
377 ull_bucket_unlock(i);
378 assert(new_ull == NULL);
379 assert(unused_ull == NULL);
380 return NULL;
381 }
382
383 if (new_ull == NULL) {
384 /* Alloc above failed */
385 ull_bucket_unlock(i);
386 return NULL;
387 }
388
389 ull = new_ull;
390 ull_lock(ull);
391 enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link);
392 } else if (!(flags & ULL_MUST_EXIST)) {
393 assert(new_ull);
394 assert(unused_ull);
395 assert(*unused_ull == NULL);
396 *unused_ull = new_ull;
397 }
398
399 ull->ull_refcount++;
400
401 ull_bucket_unlock(i);
402
403 return ull; /* still locked */
404 }
405
406 /*
407 * Must be called with ull_lock held
408 */
409 static void
ull_put(ull_t * ull)410 ull_put(ull_t *ull)
411 {
412 ull_assert_owned(ull);
413 int refcount = --ull->ull_refcount;
414 assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1);
415 ull_unlock(ull);
416
417 if (refcount > 0) {
418 return;
419 }
420
421 ull_bucket_lock(ull->ull_bucket_index);
422 remqueue(&ull->ull_hash_link);
423 ull_bucket_unlock(ull->ull_bucket_index);
424
425 ull_free(ull);
426 }
427
428
429 extern boolean_t machine_thread_on_core(thread_t thread);
430
431 static int
uaddr_findobj(user_addr_t uaddr,uint64_t * objectp,uint64_t * offsetp)432 uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
433 {
434 kern_return_t ret;
435 vm_page_info_basic_data_t info;
436 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
437 ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
438 if (ret != KERN_SUCCESS) {
439 return EINVAL;
440 }
441
442 if (objectp != NULL) {
443 *objectp = (uint64_t)info.object_id;
444 }
445 if (offsetp != NULL) {
446 *offsetp = (uint64_t)info.offset;
447 }
448
449 return 0;
450 }
451
452 static void ulock_wait_continue(void *, wait_result_t);
453 static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *);
454
455 inline static int
wait_result_to_return_code(wait_result_t wr)456 wait_result_to_return_code(wait_result_t wr)
457 {
458 int ret = 0;
459
460 switch (wr) {
461 case THREAD_AWAKENED:
462 break;
463 case THREAD_TIMED_OUT:
464 ret = ETIMEDOUT;
465 break;
466 case THREAD_INTERRUPTED:
467 case THREAD_RESTART:
468 default:
469 ret = EINTR;
470 break;
471 }
472
473 return ret;
474 }
475
476 static int
ulock_resolve_owner(uint32_t value,thread_t * owner)477 ulock_resolve_owner(uint32_t value, thread_t *owner)
478 {
479 mach_port_name_t owner_name = ulock_owner_value_to_port_name(value);
480
481 *owner = port_name_to_thread(owner_name,
482 PORT_INTRANS_THREAD_IN_CURRENT_TASK |
483 PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
484 if (*owner == THREAD_NULL) {
485 /*
486 * Translation failed - even though the lock value is up to date,
487 * whatever was stored in the lock wasn't actually a thread port.
488 */
489 return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD;
490 }
491 return 0;
492 }
493
494 int
sys_ulock_wait(struct proc * p,struct ulock_wait_args * args,int32_t * retval)495 sys_ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
496 {
497 struct ulock_wait2_args args2;
498
499 args2.operation = args->operation;
500 args2.addr = args->addr;
501 args2.value = args->value;
502 args2.timeout = (uint64_t)(args->timeout) * NSEC_PER_USEC;
503 args2.value2 = 0;
504
505 return sys_ulock_wait2(p, &args2, retval);
506 }
507
508 int
sys_ulock_wait2(struct proc * p,struct ulock_wait2_args * args,int32_t * retval)509 sys_ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval)
510 {
511 uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
512 uint flags = args->operation & UL_FLAGS_MASK;
513
514 if (flags & ULF_WAIT_CANCEL_POINT) {
515 __pthread_testcancel(1);
516 }
517
518 int ret = 0;
519 thread_t self = current_thread();
520 ulk_t key;
521
522 /* involved threads - each variable holds +1 ref if not null */
523 thread_t owner_thread = THREAD_NULL;
524 thread_t old_owner = THREAD_NULL;
525
526 ull_t *unused_ull = NULL;
527
528 if ((flags & ULF_WAIT_MASK) != flags) {
529 ret = EINVAL;
530 goto munge_retval;
531 }
532
533 bool set_owner = false;
534 bool xproc = false;
535 size_t lock_size = sizeof(uint32_t);
536 int copy_ret;
537
538 switch (opcode) {
539 case UL_UNFAIR_LOCK:
540 set_owner = true;
541 break;
542 case UL_COMPARE_AND_WAIT:
543 break;
544 case UL_COMPARE_AND_WAIT64:
545 lock_size = sizeof(uint64_t);
546 break;
547 case UL_COMPARE_AND_WAIT_SHARED:
548 xproc = true;
549 break;
550 case UL_COMPARE_AND_WAIT64_SHARED:
551 xproc = true;
552 lock_size = sizeof(uint64_t);
553 break;
554 default:
555 ret = EINVAL;
556 goto munge_retval;
557 }
558
559 uint64_t value = 0;
560
561 if ((args->addr == 0) || (args->addr & (lock_size - 1))) {
562 ret = EINVAL;
563 goto munge_retval;
564 }
565
566 if (xproc) {
567 uint64_t object = 0;
568 uint64_t offset = 0;
569
570 ret = uaddr_findobj(args->addr, &object, &offset);
571 if (ret) {
572 ret = EINVAL;
573 goto munge_retval;
574 }
575 key.ulk_key_type = ULK_XPROC;
576 key.ulk_object = object;
577 key.ulk_offset = offset;
578 } else {
579 key.ulk_key_type = ULK_UADDR;
580 key.ulk_task = proc_task(p);
581 key.ulk_addr = args->addr;
582 }
583
584 if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) {
585 /*
586 * Attempt the copyin outside of the lock once,
587 *
588 * If it doesn't match (which is common), return right away.
589 *
590 * If it matches, resolve the current owner, and if it is on core,
591 * spin a bit waiting for the value to change. If the owner isn't on
592 * core, or if the value stays stable, then go on with the regular
593 * blocking code.
594 */
595 uint64_t end = 0;
596 uint32_t u32;
597
598 ret = copyin_atomic32(args->addr, &u32);
599 if (ret || u32 != args->value) {
600 goto munge_retval;
601 }
602 for (;;) {
603 if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) {
604 break;
605 }
606
607 /* owner_thread may have a +1 starting here */
608
609 if (!machine_thread_on_core(owner_thread)) {
610 break;
611 }
612 if (end == 0) {
613 clock_interval_to_deadline(ulock_adaptive_spin_usecs,
614 NSEC_PER_USEC, &end);
615 } else if (mach_absolute_time() > end) {
616 break;
617 }
618 if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) {
619 goto munge_retval;
620 }
621 }
622 }
623
624 ull_t *ull = ull_get(&key, 0, &unused_ull);
625 if (ull == NULL) {
626 ret = ENOMEM;
627 goto munge_retval;
628 }
629 /* ull is locked */
630
631 ull->ull_nwaiters++;
632
633 if (ull->ull_opcode == 0) {
634 ull->ull_opcode = opcode;
635 } else if (ull->ull_opcode != opcode) {
636 ret = EDOM;
637 goto out_locked;
638 }
639
640 /*
641 * We don't want this copyin to get wedged behind VM operations,
642 * but we have to read the userspace value under the ull lock for correctness.
643 *
644 * Until <rdar://problem/24999882> exists,
645 * holding the ull spinlock across copyin forces any
646 * vm_fault we encounter to fail.
647 */
648
649 /* copyin_atomicXX always checks alignment */
650
651 if (lock_size == 4) {
652 uint32_t u32;
653 copy_ret = copyin_atomic32(args->addr, &u32);
654 value = u32;
655 } else {
656 copy_ret = copyin_atomic64(args->addr, &value);
657 }
658
659 #if DEVELOPMENT || DEBUG
660 /* Occasionally simulate copyin finding the user address paged out */
661 if (((ull_simulate_copyin_fault == proc_getpid(p)) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
662 static _Atomic int fault_inject = 0;
663 if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) {
664 copy_ret = EFAULT;
665 }
666 }
667 #endif
668 if (copy_ret != 0) {
669 /* copyin() will return an error if the access to the user addr would have faulted,
670 * so just return and let the user level code fault it in.
671 */
672 ret = copy_ret;
673 goto out_locked;
674 }
675
676 if (value != args->value) {
677 /* Lock value has changed from expected so bail out */
678 goto out_locked;
679 }
680
681 if (set_owner) {
682 if (owner_thread == THREAD_NULL) {
683 ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread);
684 if (ret == EOWNERDEAD) {
685 /*
686 * Translation failed - even though the lock value is up to date,
687 * whatever was stored in the lock wasn't actually a thread port.
688 */
689 goto out_locked;
690 }
691 /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
692 ret = 0;
693 }
694 /* owner_thread has a +1 reference */
695
696 /*
697 * At this point, I know:
698 * a) owner_thread is definitely the current owner, because I just read the value
699 * b) owner_thread is either:
700 * i) holding the user lock or
701 * ii) has just unlocked the user lock after I looked
702 * and is heading toward the kernel to call ull_wake.
703 * If so, it's going to have to wait for the ull mutex.
704 *
705 * Therefore, I can ask the turnstile to promote its priority, and I can rely
706 * on it to come by later to issue the wakeup and lose its promotion.
707 */
708
709 /* Return the +1 ref from the ull_owner field */
710 old_owner = ull->ull_owner;
711 ull->ull_owner = THREAD_NULL;
712
713 if (owner_thread != THREAD_NULL) {
714 /* The ull_owner field now owns a +1 ref on owner_thread */
715 thread_reference(owner_thread);
716 ull->ull_owner = owner_thread;
717 }
718 }
719
720 wait_result_t wr;
721 uint64_t timeout = args->timeout; /* nanoseconds */
722 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
723 wait_interrupt_t interruptible = THREAD_ABORTSAFE;
724 struct turnstile *ts;
725
726 ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
727 TURNSTILE_NULL, TURNSTILE_ULOCK);
728 thread_set_pending_block_hint(self, kThreadWaitUserLock);
729
730 if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
731 interruptible |= THREAD_WAIT_NOREPORT;
732 }
733
734 turnstile_update_inheritor(ts, owner_thread,
735 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
736
737 if (timeout) {
738 if (flags & ULF_DEADLINE) {
739 deadline = timeout;
740 } else {
741 nanoseconds_to_deadline(timeout, &deadline);
742 }
743 }
744
745 wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
746 interruptible, deadline);
747
748 if (wr == THREAD_WAITING) {
749 uthread_t uthread = (uthread_t)get_bsdthread_info(self);
750 uthread->uu_save.uus_ulock_wait_data.ull = ull;
751 uthread->uu_save.uus_ulock_wait_data.retval = retval;
752 uthread->uu_save.uus_ulock_wait_data.flags = flags;
753 uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread;
754 uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner;
755 }
756
757 ull_unlock(ull);
758
759 if (unused_ull) {
760 ull_free(unused_ull);
761 unused_ull = NULL;
762 }
763
764 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
765
766 if (wr == THREAD_WAITING) {
767 if (set_owner && owner_thread != THREAD_NULL) {
768 thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE);
769 } else {
770 assert(owner_thread == THREAD_NULL);
771 thread_block_parameter(ulock_wait_continue, ull);
772 }
773 /* NOT REACHED */
774 }
775
776 ret = wait_result_to_return_code(wr);
777
778 ull_lock(ull);
779 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
780
781 out_locked:
782 ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
783 owner_thread = NULL;
784
785 if (unused_ull) {
786 ull_free(unused_ull);
787 unused_ull = NULL;
788 }
789
790 assert(*retval >= 0);
791
792 munge_retval:
793 if (owner_thread) {
794 thread_deallocate(owner_thread);
795 }
796 if (ret == ESTALE) {
797 ret = 0;
798 }
799 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
800 *retval = -ret;
801 ret = 0;
802 }
803 return ret;
804 }
805
806 /*
807 * Must be called with ull_lock held
808 */
809 static void
ulock_wait_cleanup(ull_t * ull,thread_t owner_thread,thread_t old_owner,int32_t * retval)810 ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval)
811 {
812 ull_assert_owned(ull);
813
814 thread_t old_lingering_owner = THREAD_NULL;
815
816 *retval = --ull->ull_nwaiters;
817 if (ull->ull_nwaiters == 0) {
818 /*
819 * If the wait was canceled early, we might need to
820 * clear out the lingering owner reference before
821 * freeing the ull.
822 */
823 old_lingering_owner = ull->ull_owner;
824 ull->ull_owner = THREAD_NULL;
825
826 memset(&ull->ull_key, 0, sizeof ull->ull_key);
827 ull->ull_refcount--;
828 assert(ull->ull_refcount > 0);
829 }
830 ull_put(ull);
831
832 /* Need to be called after dropping the interlock */
833 turnstile_cleanup();
834
835 if (owner_thread != THREAD_NULL) {
836 thread_deallocate(owner_thread);
837 }
838
839 if (old_owner != THREAD_NULL) {
840 thread_deallocate(old_owner);
841 }
842
843 if (old_lingering_owner != THREAD_NULL) {
844 thread_deallocate(old_lingering_owner);
845 }
846
847 assert(*retval >= 0);
848 }
849
850 __attribute__((noreturn))
851 static void
ulock_wait_continue(__unused void * parameter,wait_result_t wr)852 ulock_wait_continue(__unused void * parameter, wait_result_t wr)
853 {
854 uthread_t uthread = current_uthread();
855 int ret = 0;
856
857 ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull;
858 int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval;
859 uint flags = uthread->uu_save.uus_ulock_wait_data.flags;
860 thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread;
861 thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner;
862
863 ret = wait_result_to_return_code(wr);
864
865 ull_lock(ull);
866 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
867
868 ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
869
870 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
871 *retval = -ret;
872 ret = 0;
873 }
874
875 unix_syscall_return(ret);
876 }
877
878 int
sys_ulock_wake(struct proc * p,struct ulock_wake_args * args,int32_t * retval)879 sys_ulock_wake(struct proc *p, struct ulock_wake_args *args, int32_t *retval)
880 {
881 int ret = 0;
882 #if DEVELOPMENT || DEBUG
883 uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
884
885 if (opcode == UL_DEBUG_HASH_DUMP_PID) {
886 *retval = ull_hash_dump(proc_task(p));
887 return ret;
888 } else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
889 *retval = ull_hash_dump(TASK_NULL);
890 return ret;
891 } else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
892 ull_simulate_copyin_fault = (int)(args->wake_value);
893 return ret;
894 }
895 #endif
896 ret = ulock_wake(proc_task(p), args->operation, args->addr, args->wake_value);
897
898 if ((args->operation & ULF_NO_ERRNO) && (ret != 0)) {
899 *retval = -ret;
900 ret = 0;
901 }
902
903 return ret;
904 }
905
906 int
ulock_wake(task_t task,uint32_t operation,user_addr_t addr,uint64_t wake_value)907 ulock_wake(task_t task, uint32_t operation, user_addr_t addr, uint64_t wake_value)
908 {
909 uint8_t opcode = (uint8_t)(operation & UL_OPCODE_MASK);
910 uint flags = operation & UL_FLAGS_MASK;
911 int ret = 0;
912 ulk_t key;
913
914 /* involved threads - each variable holds +1 ref if not null */
915 thread_t wake_thread = THREAD_NULL;
916
917 bool set_owner = false;
918 bool allow_non_owner = false;
919 bool xproc = false;
920
921 switch (opcode) {
922 case UL_UNFAIR_LOCK:
923 set_owner = true;
924 break;
925 case UL_COMPARE_AND_WAIT:
926 case UL_COMPARE_AND_WAIT64:
927 break;
928 case UL_COMPARE_AND_WAIT_SHARED:
929 case UL_COMPARE_AND_WAIT64_SHARED:
930 xproc = true;
931 break;
932 default:
933 ret = EINVAL;
934 goto munge_retval;
935 }
936
937 if ((flags & ULF_WAKE_MASK) != flags) {
938 ret = EINVAL;
939 goto munge_retval;
940 }
941
942 if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) {
943 ret = EINVAL;
944 goto munge_retval;
945 }
946
947 if (flags & ULF_WAKE_ALLOW_NON_OWNER) {
948 if (!set_owner) {
949 ret = EINVAL;
950 goto munge_retval;
951 }
952
953 allow_non_owner = true;
954 }
955
956 if (addr == 0) {
957 ret = EINVAL;
958 goto munge_retval;
959 }
960
961 if (xproc) {
962 uint64_t object = 0;
963 uint64_t offset = 0;
964
965 ret = uaddr_findobj(addr, &object, &offset);
966 if (ret) {
967 ret = EINVAL;
968 goto munge_retval;
969 }
970 key.ulk_key_type = ULK_XPROC;
971 key.ulk_object = object;
972 key.ulk_offset = offset;
973 } else {
974 key.ulk_key_type = ULK_UADDR;
975 key.ulk_task = task;
976 key.ulk_addr = addr;
977 }
978
979 if (flags & ULF_WAKE_THREAD) {
980 mach_port_name_t wake_thread_name = (mach_port_name_t)(wake_value);
981 wake_thread = port_name_to_thread(wake_thread_name,
982 PORT_INTRANS_THREAD_IN_CURRENT_TASK |
983 PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
984 if (wake_thread == THREAD_NULL) {
985 ret = ESRCH;
986 goto munge_retval;
987 }
988 }
989
990 ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL);
991 thread_t new_owner = THREAD_NULL;
992 struct turnstile *ts = TURNSTILE_NULL;
993 thread_t cleanup_thread = THREAD_NULL;
994
995 if (ull == NULL) {
996 ret = ENOENT;
997 goto munge_retval;
998 }
999 /* ull is locked */
1000
1001 if (opcode != ull->ull_opcode) {
1002 ret = EDOM;
1003 goto out_ull_put;
1004 }
1005
1006 if (set_owner) {
1007 if ((ull->ull_owner != current_thread()) && !allow_non_owner) {
1008 /*
1009 * If the current thread isn't the known owner,
1010 * then this wake call was late to the party,
1011 * and the kernel already knows who owns the lock.
1012 *
1013 * This current owner already knows the lock is contended
1014 * and will redrive wakes, just bail out.
1015 */
1016 goto out_ull_put;
1017 }
1018 } else {
1019 assert(ull->ull_owner == THREAD_NULL);
1020 }
1021
1022 ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
1023 TURNSTILE_NULL, TURNSTILE_ULOCK);
1024 assert(ts != TURNSTILE_NULL);
1025
1026 if (flags & ULF_WAKE_THREAD) {
1027 kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq,
1028 CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1029 wake_thread, THREAD_AWAKENED);
1030 if (kr != KERN_SUCCESS) {
1031 assert(kr == KERN_NOT_WAITING);
1032 ret = EALREADY;
1033 }
1034 } else if (flags & ULF_WAKE_ALL) {
1035 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1036 THREAD_AWAKENED,
1037 set_owner ? WAITQ_UPDATE_INHERITOR : WAITQ_WAKEUP_DEFAULT);
1038 } else if (set_owner) {
1039 /*
1040 * The turnstile waitq is priority ordered,
1041 * and will wake up the highest priority waiter
1042 * and set it as the inheritor for us.
1043 */
1044 new_owner = waitq_wakeup64_identify(&ts->ts_waitq,
1045 CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1046 THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
1047 } else {
1048 waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1049 THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1050 }
1051
1052 if (set_owner) {
1053 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1054 cleanup_thread = ull->ull_owner;
1055 ull->ull_owner = new_owner;
1056 }
1057
1058 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
1059
1060 out_ull_put:
1061 ull_put(ull);
1062
1063 if (ts != TURNSTILE_NULL) {
1064 /* Need to be called after dropping the interlock */
1065 turnstile_cleanup();
1066 }
1067
1068 if (cleanup_thread != THREAD_NULL) {
1069 thread_deallocate(cleanup_thread);
1070 }
1071
1072 munge_retval:
1073 if (wake_thread != THREAD_NULL) {
1074 thread_deallocate(wake_thread);
1075 }
1076
1077 return ret;
1078 }
1079
1080 void
kdp_ulock_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)1081 kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1082 {
1083 ull_t *ull = EVENT_TO_ULOCK(event);
1084
1085 zone_require(ull_zone->kt_zv.zv_zone, ull);
1086
1087 switch (ull->ull_opcode) {
1088 case UL_UNFAIR_LOCK:
1089 case UL_UNFAIR_LOCK64_SHARED:
1090 waitinfo->owner = thread_tid(ull->ull_owner);
1091 waitinfo->context = ull->ull_key.ulk_addr;
1092 break;
1093 case UL_COMPARE_AND_WAIT:
1094 case UL_COMPARE_AND_WAIT64:
1095 case UL_COMPARE_AND_WAIT_SHARED:
1096 case UL_COMPARE_AND_WAIT64_SHARED:
1097 waitinfo->owner = 0;
1098 waitinfo->context = ull->ull_key.ulk_addr;
1099 break;
1100 default:
1101 panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
1102 break;
1103 }
1104 return;
1105 }
1106