xref: /xnu-8019.80.24/bsd/kern/sys_ulock.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <machine/atomic.h>
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/ioctl.h>
34 #include <sys/file_internal.h>
35 #include <sys/proc_internal.h>
36 #include <sys/kernel.h>
37 #include <sys/guarded.h>
38 #include <sys/stat.h>
39 #include <sys/malloc.h>
40 #include <sys/sysproto.h>
41 #include <sys/pthread_shims.h>
42 
43 #include <mach/mach_types.h>
44 
45 #include <kern/cpu_data.h>
46 #include <kern/mach_param.h>
47 #include <kern/kern_types.h>
48 #include <kern/assert.h>
49 #include <kern/zalloc.h>
50 #include <kern/thread.h>
51 #include <kern/clock.h>
52 #include <kern/ledger.h>
53 #include <kern/policy_internal.h>
54 #include <kern/task.h>
55 #include <kern/telemetry.h>
56 #include <kern/waitq.h>
57 #include <kern/sched_prim.h>
58 #include <kern/turnstile.h>
59 #include <kern/zalloc.h>
60 #include <kern/debug.h>
61 
62 #include <pexpert/pexpert.h>
63 
64 #define XNU_TEST_BITMAP
65 #include <kern/bits.h>
66 
67 #include <os/hash.h>
68 #include <sys/ulock.h>
69 
70 /*
71  * How ulock promotion works:
72  *
73  * There’s a requested policy field on every thread called ‘promotions’, which
74  * expresses which ulock promotions are happening to this thread.
75  * The promotion priority saturates until the promotion count goes to 0.
76  *
77  * We also track effective promotion qos, which is the qos before clamping.
78  * This value is used for promoting a thread that another thread is waiting on,
79  * so that the lock owner reinflates to the right priority after unclamping.
80  *
81  * This also works for non-QoS threads, which can donate base priority to QoS
82  * and non-QoS threads alike.
83  *
84  * ulock wait applies a promotion to the owner communicated through
85  * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
86  * there is still an owner.  In ulock wake, if the waker is still the owner,
87  * then it clears its ownership and drops the boost.  It does NOT transfer
88  * ownership/priority boost to the new thread.  Instead, it selects the
89  * waiting thread with the highest base priority to be woken next, and
90  * relies on that thread to carry the torch for the other waiting threads.
91  */
92 
93 static LCK_GRP_DECLARE(ull_lck_grp, "ulocks");
94 
95 typedef lck_spin_t ull_lock_t;
96 #define ull_lock_init(ull)      lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL)
97 #define ull_lock_destroy(ull)   lck_spin_destroy(&ull->ull_lock, &ull_lck_grp)
98 #define ull_lock(ull)           lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp)
99 #define ull_unlock(ull)         lck_spin_unlock(&ull->ull_lock)
100 #define ull_assert_owned(ull)   LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED)
101 #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED)
102 
103 #define ULOCK_TO_EVENT(ull)   ((event_t)ull)
104 #define EVENT_TO_ULOCK(event) ((ull_t *)event)
105 
106 typedef enum {
107 	ULK_INVALID = 0,
108 	ULK_UADDR,
109 	ULK_XPROC,
110 } ulk_type;
111 
112 typedef struct {
113 	union {
114 		struct __attribute__((packed)) {
115 			user_addr_t     ulk_addr;
116 			pid_t           ulk_pid;
117 		};
118 		struct __attribute__((packed)) {
119 			uint64_t        ulk_object;
120 			uint64_t        ulk_offset;
121 		};
122 	};
123 	ulk_type        ulk_key_type;
124 } ulk_t;
125 
126 #define ULK_UADDR_LEN   (sizeof(user_addr_t) + sizeof(pid_t))
127 #define ULK_XPROC_LEN   (sizeof(uint64_t) + sizeof(uint64_t))
128 
129 inline static bool
ull_key_match(ulk_t * a,ulk_t * b)130 ull_key_match(ulk_t *a, ulk_t *b)
131 {
132 	if (a->ulk_key_type != b->ulk_key_type) {
133 		return false;
134 	}
135 
136 	if (a->ulk_key_type == ULK_UADDR) {
137 		return (a->ulk_pid == b->ulk_pid) &&
138 		       (a->ulk_addr == b->ulk_addr);
139 	}
140 
141 	assert(a->ulk_key_type == ULK_XPROC);
142 	return (a->ulk_object == b->ulk_object) &&
143 	       (a->ulk_offset == b->ulk_offset);
144 }
145 
146 typedef struct ull {
147 	/*
148 	 * ull_owner is the most recent known value for the owner of this ulock
149 	 * i.e. it may be out of date WRT the real value in userspace.
150 	 */
151 	thread_t        ull_owner; /* holds +1 thread reference */
152 	ulk_t           ull_key;
153 	ull_lock_t      ull_lock;
154 	uint            ull_bucket_index;
155 	int32_t         ull_nwaiters;
156 	int32_t         ull_refcount;
157 	uint8_t         ull_opcode;
158 	struct turnstile *ull_turnstile;
159 	queue_chain_t   ull_hash_link;
160 } ull_t;
161 
162 #define ULL_MUST_EXIST  0x0001
163 static void ull_put(ull_t *);
164 
165 static uint32_t ulock_adaptive_spin_usecs = 20;
166 
167 SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
168     &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration");
169 
170 #if DEVELOPMENT || DEBUG
171 static int ull_simulate_copyin_fault = 0;
172 
173 static void
ull_dump(ull_t * ull)174 ull_dump(ull_t *ull)
175 {
176 	kprintf("ull\t%p\n", ull);
177 	switch (ull->ull_key.ulk_key_type) {
178 	case ULK_UADDR:
179 		kprintf("ull_key.ulk_key_type\tULK_UADDR\n");
180 		kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid);
181 		kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
182 		break;
183 	case ULK_XPROC:
184 		kprintf("ull_key.ulk_key_type\tULK_XPROC\n");
185 		kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object));
186 		kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset));
187 		break;
188 	default:
189 		kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type);
190 		break;
191 	}
192 	kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
193 	kprintf("ull_refcount\t%d\n", ull->ull_refcount);
194 	kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
195 	kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
196 	kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile);
197 }
198 #endif
199 
200 typedef struct ull_bucket {
201 	queue_head_t ulb_head;
202 	lck_spin_t   ulb_lock;
203 } ull_bucket_t;
204 
205 static SECURITY_READ_ONLY_LATE(int) ull_hash_buckets;
206 static SECURITY_READ_ONLY_LATE(ull_bucket_t *) ull_bucket;
207 static uint32_t ull_nzalloc = 0;
208 static ZONE_DECLARE(ull_zone, "ulocks", sizeof(ull_t), ZC_CACHING);
209 
210 #define ull_bucket_lock(i)       lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp)
211 #define ull_bucket_unlock(i)     lck_spin_unlock(&ull_bucket[i].ulb_lock)
212 
213 static __inline__ uint32_t
ull_hash_index(const void * key,size_t length)214 ull_hash_index(const void *key, size_t length)
215 {
216 	uint32_t hash = os_hash_jenkins(key, length);
217 
218 	hash &= (ull_hash_buckets - 1);
219 
220 	return hash;
221 }
222 
223 #define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN)
224 
225 static void
ulock_initialize(void)226 ulock_initialize(void)
227 {
228 	assert(thread_max > 16);
229 	/* Size ull_hash_buckets based on thread_max.
230 	 * Round up to nearest power of 2, then divide by 4
231 	 */
232 	ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2));
233 
234 	kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
235 	assert(ull_hash_buckets >= thread_max / 4);
236 
237 	ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets,
238 	    ZALIGN_PTR);
239 	assert(ull_bucket != NULL);
240 
241 	for (int i = 0; i < ull_hash_buckets; i++) {
242 		queue_init(&ull_bucket[i].ulb_head);
243 		lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL);
244 	}
245 }
246 STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, ulock_initialize);
247 
248 #if DEVELOPMENT || DEBUG
249 /* Count the number of hash entries for a given pid.
250  * if pid==0, dump the whole table.
251  */
252 static int
ull_hash_dump(pid_t pid)253 ull_hash_dump(pid_t pid)
254 {
255 	int count = 0;
256 	if (pid == 0) {
257 		kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
258 		kprintf("%s>BEGIN\n", __FUNCTION__);
259 	}
260 	for (int i = 0; i < ull_hash_buckets; i++) {
261 		ull_bucket_lock(i);
262 		if (!queue_empty(&ull_bucket[i].ulb_head)) {
263 			ull_t *elem;
264 			if (pid == 0) {
265 				kprintf("%s>index %d:\n", __FUNCTION__, i);
266 			}
267 			qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
268 				if ((pid == 0) || ((elem->ull_key.ulk_key_type == ULK_UADDR) && (pid == elem->ull_key.ulk_pid))) {
269 					ull_dump(elem);
270 					count++;
271 				}
272 			}
273 		}
274 		ull_bucket_unlock(i);
275 	}
276 	if (pid == 0) {
277 		kprintf("%s>END\n", __FUNCTION__);
278 		ull_nzalloc = 0;
279 	}
280 	return count;
281 }
282 #endif
283 
284 static ull_t *
ull_alloc(ulk_t * key)285 ull_alloc(ulk_t *key)
286 {
287 	ull_t *ull = (ull_t *)zalloc(ull_zone);
288 	assert(ull != NULL);
289 
290 	ull->ull_refcount = 1;
291 	ull->ull_key = *key;
292 	ull->ull_bucket_index = ULL_INDEX(key);
293 	ull->ull_nwaiters = 0;
294 	ull->ull_opcode = 0;
295 
296 	ull->ull_owner = THREAD_NULL;
297 	ull->ull_turnstile = TURNSTILE_NULL;
298 
299 	ull_lock_init(ull);
300 
301 	ull_nzalloc++;
302 	return ull;
303 }
304 
305 static void
ull_free(ull_t * ull)306 ull_free(ull_t *ull)
307 {
308 	assert(ull->ull_owner == THREAD_NULL);
309 	assert(ull->ull_turnstile == TURNSTILE_NULL);
310 
311 	ull_assert_notwned(ull);
312 
313 	ull_lock_destroy(ull);
314 
315 	zfree(ull_zone, ull);
316 }
317 
318 /* Finds an existing ulock structure (ull_t), or creates a new one.
319  * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
320  * The ulock structure is returned with ull_lock locked
321  */
322 static ull_t *
ull_get(ulk_t * key,uint32_t flags,ull_t ** unused_ull)323 ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull)
324 {
325 	ull_t *ull = NULL;
326 	uint i = ULL_INDEX(key);
327 	ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key);
328 	ull_t *elem;
329 
330 	ull_bucket_lock(i);
331 	qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
332 		ull_lock(elem);
333 		if (ull_key_match(&elem->ull_key, key)) {
334 			ull = elem;
335 			break;
336 		} else {
337 			ull_unlock(elem);
338 		}
339 	}
340 	if (ull == NULL) {
341 		if (flags & ULL_MUST_EXIST) {
342 			/* Must already exist (called from wake) */
343 			ull_bucket_unlock(i);
344 			assert(new_ull == NULL);
345 			assert(unused_ull == NULL);
346 			return NULL;
347 		}
348 
349 		if (new_ull == NULL) {
350 			/* Alloc above failed */
351 			ull_bucket_unlock(i);
352 			return NULL;
353 		}
354 
355 		ull = new_ull;
356 		ull_lock(ull);
357 		enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link);
358 	} else if (!(flags & ULL_MUST_EXIST)) {
359 		assert(new_ull);
360 		assert(unused_ull);
361 		assert(*unused_ull == NULL);
362 		*unused_ull = new_ull;
363 	}
364 
365 	ull->ull_refcount++;
366 
367 	ull_bucket_unlock(i);
368 
369 	return ull; /* still locked */
370 }
371 
372 /*
373  * Must be called with ull_lock held
374  */
375 static void
ull_put(ull_t * ull)376 ull_put(ull_t *ull)
377 {
378 	ull_assert_owned(ull);
379 	int refcount = --ull->ull_refcount;
380 	assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1);
381 	ull_unlock(ull);
382 
383 	if (refcount > 0) {
384 		return;
385 	}
386 
387 	ull_bucket_lock(ull->ull_bucket_index);
388 	remqueue(&ull->ull_hash_link);
389 	ull_bucket_unlock(ull->ull_bucket_index);
390 
391 	ull_free(ull);
392 }
393 
394 extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count);
395 extern vm_map_t current_map(void);
396 extern boolean_t machine_thread_on_core(thread_t thread);
397 
398 static int
uaddr_findobj(user_addr_t uaddr,uint64_t * objectp,uint64_t * offsetp)399 uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
400 {
401 	kern_return_t ret;
402 	vm_page_info_basic_data_t info;
403 	mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
404 	ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
405 	if (ret != KERN_SUCCESS) {
406 		return EINVAL;
407 	}
408 
409 	if (objectp != NULL) {
410 		*objectp = (uint64_t)info.object_id;
411 	}
412 	if (offsetp != NULL) {
413 		*offsetp = (uint64_t)info.offset;
414 	}
415 
416 	return 0;
417 }
418 
419 static void ulock_wait_continue(void *, wait_result_t);
420 static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *);
421 
422 inline static int
wait_result_to_return_code(wait_result_t wr)423 wait_result_to_return_code(wait_result_t wr)
424 {
425 	int ret = 0;
426 
427 	switch (wr) {
428 	case THREAD_AWAKENED:
429 		break;
430 	case THREAD_TIMED_OUT:
431 		ret = ETIMEDOUT;
432 		break;
433 	case THREAD_INTERRUPTED:
434 	case THREAD_RESTART:
435 	default:
436 		ret = EINTR;
437 		break;
438 	}
439 
440 	return ret;
441 }
442 
443 static int
ulock_resolve_owner(uint32_t value,thread_t * owner)444 ulock_resolve_owner(uint32_t value, thread_t *owner)
445 {
446 	mach_port_name_t owner_name = ulock_owner_value_to_port_name(value);
447 
448 	*owner = port_name_to_thread(owner_name,
449 	    PORT_INTRANS_THREAD_IN_CURRENT_TASK |
450 	    PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
451 	if (*owner == THREAD_NULL) {
452 		/*
453 		 * Translation failed - even though the lock value is up to date,
454 		 * whatever was stored in the lock wasn't actually a thread port.
455 		 */
456 		return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD;
457 	}
458 	return 0;
459 }
460 
461 int
ulock_wait(struct proc * p,struct ulock_wait_args * args,int32_t * retval)462 ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
463 {
464 	struct ulock_wait2_args args2;
465 
466 	args2.operation = args->operation;
467 	args2.addr      = args->addr;
468 	args2.value     = args->value;
469 	args2.timeout   = (uint64_t)(args->timeout) * NSEC_PER_USEC;
470 	args2.value2    = 0;
471 
472 	return ulock_wait2(p, &args2, retval);
473 }
474 
475 int
ulock_wait2(struct proc * p,struct ulock_wait2_args * args,int32_t * retval)476 ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval)
477 {
478 	uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
479 	uint flags = args->operation & UL_FLAGS_MASK;
480 
481 	if (flags & ULF_WAIT_CANCEL_POINT) {
482 		__pthread_testcancel(1);
483 	}
484 
485 	int ret = 0;
486 	thread_t self = current_thread();
487 	ulk_t key;
488 
489 	/* involved threads - each variable holds +1 ref if not null */
490 	thread_t owner_thread   = THREAD_NULL;
491 	thread_t old_owner      = THREAD_NULL;
492 
493 	ull_t *unused_ull = NULL;
494 
495 	if ((flags & ULF_WAIT_MASK) != flags) {
496 		ret = EINVAL;
497 		goto munge_retval;
498 	}
499 
500 	bool set_owner = false;
501 	bool xproc = false;
502 	size_t lock_size = sizeof(uint32_t);
503 	int copy_ret;
504 
505 	switch (opcode) {
506 	case UL_UNFAIR_LOCK:
507 		set_owner = true;
508 		break;
509 	case UL_COMPARE_AND_WAIT:
510 		break;
511 	case UL_COMPARE_AND_WAIT64:
512 		lock_size = sizeof(uint64_t);
513 		break;
514 	case UL_COMPARE_AND_WAIT_SHARED:
515 		xproc = true;
516 		break;
517 	case UL_COMPARE_AND_WAIT64_SHARED:
518 		xproc = true;
519 		lock_size = sizeof(uint64_t);
520 		break;
521 	default:
522 		ret = EINVAL;
523 		goto munge_retval;
524 	}
525 
526 	uint64_t value = 0;
527 
528 	if ((args->addr == 0) || (args->addr & (lock_size - 1))) {
529 		ret = EINVAL;
530 		goto munge_retval;
531 	}
532 
533 	if (xproc) {
534 		uint64_t object = 0;
535 		uint64_t offset = 0;
536 
537 		ret = uaddr_findobj(args->addr, &object, &offset);
538 		if (ret) {
539 			ret = EINVAL;
540 			goto munge_retval;
541 		}
542 		key.ulk_key_type = ULK_XPROC;
543 		key.ulk_object = object;
544 		key.ulk_offset = offset;
545 	} else {
546 		key.ulk_key_type = ULK_UADDR;
547 		key.ulk_pid = proc_getpid(p);
548 		key.ulk_addr = args->addr;
549 	}
550 
551 	if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) {
552 		/*
553 		 * Attempt the copyin outside of the lock once,
554 		 *
555 		 * If it doesn't match (which is common), return right away.
556 		 *
557 		 * If it matches, resolve the current owner, and if it is on core,
558 		 * spin a bit waiting for the value to change. If the owner isn't on
559 		 * core, or if the value stays stable, then go on with the regular
560 		 * blocking code.
561 		 */
562 		uint64_t end = 0;
563 		uint32_t u32;
564 
565 		ret = copyin_atomic32(args->addr, &u32);
566 		if (ret || u32 != args->value) {
567 			goto munge_retval;
568 		}
569 		for (;;) {
570 			if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) {
571 				break;
572 			}
573 
574 			/* owner_thread may have a +1 starting here */
575 
576 			if (!machine_thread_on_core(owner_thread)) {
577 				break;
578 			}
579 			if (end == 0) {
580 				clock_interval_to_deadline(ulock_adaptive_spin_usecs,
581 				    NSEC_PER_USEC, &end);
582 			} else if (mach_absolute_time() > end) {
583 				break;
584 			}
585 			if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) {
586 				goto munge_retval;
587 			}
588 		}
589 	}
590 
591 	ull_t *ull = ull_get(&key, 0, &unused_ull);
592 	if (ull == NULL) {
593 		ret = ENOMEM;
594 		goto munge_retval;
595 	}
596 	/* ull is locked */
597 
598 	ull->ull_nwaiters++;
599 
600 	if (ull->ull_opcode == 0) {
601 		ull->ull_opcode = opcode;
602 	} else if (ull->ull_opcode != opcode) {
603 		ret = EDOM;
604 		goto out_locked;
605 	}
606 
607 	/*
608 	 * We don't want this copyin to get wedged behind VM operations,
609 	 * but we have to read the userspace value under the ull lock for correctness.
610 	 *
611 	 * Until <rdar://problem/24999882> exists,
612 	 * holding the ull spinlock across copyin forces any
613 	 * vm_fault we encounter to fail.
614 	 */
615 
616 	/* copyin_atomicXX always checks alignment */
617 
618 	if (lock_size == 4) {
619 		uint32_t u32;
620 		copy_ret = copyin_atomic32(args->addr, &u32);
621 		value = u32;
622 	} else {
623 		copy_ret = copyin_atomic64(args->addr, &value);
624 	}
625 
626 #if DEVELOPMENT || DEBUG
627 	/* Occasionally simulate copyin finding the user address paged out */
628 	if (((ull_simulate_copyin_fault == proc_getpid(p)) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
629 		static _Atomic int fault_inject = 0;
630 		if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) {
631 			copy_ret = EFAULT;
632 		}
633 	}
634 #endif
635 	if (copy_ret != 0) {
636 		/* copyin() will return an error if the access to the user addr would have faulted,
637 		 * so just return and let the user level code fault it in.
638 		 */
639 		ret = copy_ret;
640 		goto out_locked;
641 	}
642 
643 	if (value != args->value) {
644 		/* Lock value has changed from expected so bail out */
645 		goto out_locked;
646 	}
647 
648 	if (set_owner) {
649 		if (owner_thread == THREAD_NULL) {
650 			ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread);
651 			if (ret == EOWNERDEAD) {
652 				/*
653 				 * Translation failed - even though the lock value is up to date,
654 				 * whatever was stored in the lock wasn't actually a thread port.
655 				 */
656 				goto out_locked;
657 			}
658 			/* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
659 			ret = 0;
660 		}
661 		/* owner_thread has a +1 reference */
662 
663 		/*
664 		 * At this point, I know:
665 		 * a) owner_thread is definitely the current owner, because I just read the value
666 		 * b) owner_thread is either:
667 		 *      i) holding the user lock or
668 		 *      ii) has just unlocked the user lock after I looked
669 		 *              and is heading toward the kernel to call ull_wake.
670 		 *              If so, it's going to have to wait for the ull mutex.
671 		 *
672 		 * Therefore, I can ask the turnstile to promote its priority, and I can rely
673 		 * on it to come by later to issue the wakeup and lose its promotion.
674 		 */
675 
676 		/* Return the +1 ref from the ull_owner field */
677 		old_owner = ull->ull_owner;
678 		ull->ull_owner = THREAD_NULL;
679 
680 		if (owner_thread != THREAD_NULL) {
681 			/* The ull_owner field now owns a +1 ref on owner_thread */
682 			thread_reference(owner_thread);
683 			ull->ull_owner = owner_thread;
684 		}
685 	}
686 
687 	wait_result_t wr;
688 	uint64_t timeout = args->timeout; /* nanoseconds */
689 	uint64_t deadline = TIMEOUT_WAIT_FOREVER;
690 	wait_interrupt_t interruptible = THREAD_ABORTSAFE;
691 	struct turnstile *ts;
692 
693 	ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
694 	    TURNSTILE_NULL, TURNSTILE_ULOCK);
695 	thread_set_pending_block_hint(self, kThreadWaitUserLock);
696 
697 	if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
698 		interruptible |= THREAD_WAIT_NOREPORT;
699 	}
700 
701 	if (timeout) {
702 		nanoseconds_to_deadline(timeout, &deadline);
703 	}
704 
705 	turnstile_update_inheritor(ts, owner_thread,
706 	    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
707 
708 	wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
709 	    interruptible, deadline);
710 
711 	if (wr == THREAD_WAITING) {
712 		uthread_t uthread = (uthread_t)get_bsdthread_info(self);
713 		uthread->uu_save.uus_ulock_wait_data.ull = ull;
714 		uthread->uu_save.uus_ulock_wait_data.retval = retval;
715 		uthread->uu_save.uus_ulock_wait_data.flags = flags;
716 		uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread;
717 		uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner;
718 	}
719 
720 	ull_unlock(ull);
721 
722 	if (unused_ull) {
723 		ull_free(unused_ull);
724 		unused_ull = NULL;
725 	}
726 
727 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
728 
729 	if (wr == THREAD_WAITING) {
730 		if (set_owner && owner_thread != THREAD_NULL) {
731 			thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE);
732 		} else {
733 			assert(owner_thread == THREAD_NULL);
734 			thread_block_parameter(ulock_wait_continue, ull);
735 		}
736 		/* NOT REACHED */
737 	}
738 
739 	ret = wait_result_to_return_code(wr);
740 
741 	ull_lock(ull);
742 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
743 
744 out_locked:
745 	ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
746 	owner_thread = NULL;
747 
748 	if (unused_ull) {
749 		ull_free(unused_ull);
750 		unused_ull = NULL;
751 	}
752 
753 	assert(*retval >= 0);
754 
755 munge_retval:
756 	if (owner_thread) {
757 		thread_deallocate(owner_thread);
758 	}
759 	if (ret == ESTALE) {
760 		ret = 0;
761 	}
762 	if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
763 		*retval = -ret;
764 		ret = 0;
765 	}
766 	return ret;
767 }
768 
769 /*
770  * Must be called with ull_lock held
771  */
772 static void
ulock_wait_cleanup(ull_t * ull,thread_t owner_thread,thread_t old_owner,int32_t * retval)773 ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval)
774 {
775 	ull_assert_owned(ull);
776 
777 	thread_t old_lingering_owner = THREAD_NULL;
778 
779 	*retval = --ull->ull_nwaiters;
780 	if (ull->ull_nwaiters == 0) {
781 		/*
782 		 * If the wait was canceled early, we might need to
783 		 * clear out the lingering owner reference before
784 		 * freeing the ull.
785 		 */
786 		old_lingering_owner = ull->ull_owner;
787 		ull->ull_owner = THREAD_NULL;
788 
789 		memset(&ull->ull_key, 0, sizeof ull->ull_key);
790 		ull->ull_refcount--;
791 		assert(ull->ull_refcount > 0);
792 	}
793 	ull_put(ull);
794 
795 	/* Need to be called after dropping the interlock */
796 	turnstile_cleanup();
797 
798 	if (owner_thread != THREAD_NULL) {
799 		thread_deallocate(owner_thread);
800 	}
801 
802 	if (old_owner != THREAD_NULL) {
803 		thread_deallocate(old_owner);
804 	}
805 
806 	if (old_lingering_owner != THREAD_NULL) {
807 		thread_deallocate(old_lingering_owner);
808 	}
809 
810 	assert(*retval >= 0);
811 }
812 
813 __attribute__((noreturn))
814 static void
ulock_wait_continue(__unused void * parameter,wait_result_t wr)815 ulock_wait_continue(__unused void * parameter, wait_result_t wr)
816 {
817 	thread_t self = current_thread();
818 	uthread_t uthread = (uthread_t)get_bsdthread_info(self);
819 	int ret = 0;
820 
821 	ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull;
822 	int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval;
823 	uint flags = uthread->uu_save.uus_ulock_wait_data.flags;
824 	thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread;
825 	thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner;
826 
827 	ret = wait_result_to_return_code(wr);
828 
829 	ull_lock(ull);
830 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
831 
832 	ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
833 
834 	if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
835 		*retval = -ret;
836 		ret = 0;
837 	}
838 
839 	unix_syscall_return(ret);
840 }
841 
842 int
ulock_wake(struct proc * p,struct ulock_wake_args * args,__unused int32_t * retval)843 ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retval)
844 {
845 	uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
846 	uint flags = args->operation & UL_FLAGS_MASK;
847 	int ret = 0;
848 	ulk_t key;
849 
850 	/* involved threads - each variable holds +1 ref if not null */
851 	thread_t wake_thread    = THREAD_NULL;
852 
853 #if DEVELOPMENT || DEBUG
854 	if (opcode == UL_DEBUG_HASH_DUMP_PID) {
855 		*retval = ull_hash_dump(proc_getpid(p));
856 		return ret;
857 	} else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
858 		*retval = ull_hash_dump(0);
859 		return ret;
860 	} else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
861 		ull_simulate_copyin_fault = (int)(args->wake_value);
862 		return ret;
863 	}
864 #endif
865 
866 	bool set_owner = false;
867 	bool allow_non_owner = false;
868 	bool xproc = false;
869 
870 	switch (opcode) {
871 	case UL_UNFAIR_LOCK:
872 		set_owner = true;
873 		break;
874 	case UL_COMPARE_AND_WAIT:
875 	case UL_COMPARE_AND_WAIT64:
876 		break;
877 	case UL_COMPARE_AND_WAIT_SHARED:
878 	case UL_COMPARE_AND_WAIT64_SHARED:
879 		xproc = true;
880 		break;
881 	default:
882 		ret = EINVAL;
883 		goto munge_retval;
884 	}
885 
886 	if ((flags & ULF_WAKE_MASK) != flags) {
887 		ret = EINVAL;
888 		goto munge_retval;
889 	}
890 
891 	if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) {
892 		ret = EINVAL;
893 		goto munge_retval;
894 	}
895 
896 	if (flags & ULF_WAKE_ALLOW_NON_OWNER) {
897 		if (!set_owner) {
898 			ret = EINVAL;
899 			goto munge_retval;
900 		}
901 
902 		allow_non_owner = true;
903 	}
904 
905 	if (args->addr == 0) {
906 		ret = EINVAL;
907 		goto munge_retval;
908 	}
909 
910 	if (xproc) {
911 		uint64_t object = 0;
912 		uint64_t offset = 0;
913 
914 		ret = uaddr_findobj(args->addr, &object, &offset);
915 		if (ret) {
916 			ret = EINVAL;
917 			goto munge_retval;
918 		}
919 		key.ulk_key_type = ULK_XPROC;
920 		key.ulk_object = object;
921 		key.ulk_offset = offset;
922 	} else {
923 		key.ulk_key_type = ULK_UADDR;
924 		key.ulk_pid = proc_getpid(p);
925 		key.ulk_addr = args->addr;
926 	}
927 
928 	if (flags & ULF_WAKE_THREAD) {
929 		mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value);
930 		wake_thread = port_name_to_thread(wake_thread_name,
931 		    PORT_INTRANS_THREAD_IN_CURRENT_TASK |
932 		    PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
933 		if (wake_thread == THREAD_NULL) {
934 			ret = ESRCH;
935 			goto munge_retval;
936 		}
937 	}
938 
939 	ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL);
940 	thread_t new_owner = THREAD_NULL;
941 	struct turnstile *ts = TURNSTILE_NULL;
942 	thread_t cleanup_thread = THREAD_NULL;
943 
944 	if (ull == NULL) {
945 		ret = ENOENT;
946 		goto munge_retval;
947 	}
948 	/* ull is locked */
949 
950 	if (opcode != ull->ull_opcode) {
951 		ret = EDOM;
952 		goto out_ull_put;
953 	}
954 
955 	if (set_owner) {
956 		if ((ull->ull_owner != current_thread()) && !allow_non_owner) {
957 			/*
958 			 * If the current thread isn't the known owner,
959 			 * then this wake call was late to the party,
960 			 * and the kernel already knows who owns the lock.
961 			 *
962 			 * This current owner already knows the lock is contended
963 			 * and will redrive wakes, just bail out.
964 			 */
965 			goto out_ull_put;
966 		}
967 	} else {
968 		assert(ull->ull_owner == THREAD_NULL);
969 	}
970 
971 	ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
972 	    TURNSTILE_NULL, TURNSTILE_ULOCK);
973 	assert(ts != TURNSTILE_NULL);
974 
975 	if (flags & ULF_WAKE_THREAD) {
976 		kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq,
977 		    CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
978 		    wake_thread, THREAD_AWAKENED);
979 		if (kr != KERN_SUCCESS) {
980 			assert(kr == KERN_NOT_WAITING);
981 			ret = EALREADY;
982 		}
983 	} else if (flags & ULF_WAKE_ALL) {
984 		if (set_owner) {
985 			turnstile_update_inheritor(ts, THREAD_NULL,
986 			    TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
987 		}
988 		waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
989 		    THREAD_AWAKENED, 0);
990 	} else if (set_owner) {
991 		/*
992 		 * The turnstile waitq is priority ordered,
993 		 * and will wake up the highest priority waiter
994 		 * and set it as the inheritor for us.
995 		 */
996 		new_owner = waitq_wakeup64_identify(&ts->ts_waitq,
997 		    CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
998 		    THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
999 	} else {
1000 		waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1001 		    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1002 	}
1003 
1004 	if (set_owner) {
1005 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1006 		cleanup_thread = ull->ull_owner;
1007 		ull->ull_owner = new_owner;
1008 	}
1009 
1010 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
1011 
1012 out_ull_put:
1013 	ull_put(ull);
1014 
1015 	if (ts != TURNSTILE_NULL) {
1016 		/* Need to be called after dropping the interlock */
1017 		turnstile_cleanup();
1018 	}
1019 
1020 	if (cleanup_thread != THREAD_NULL) {
1021 		thread_deallocate(cleanup_thread);
1022 	}
1023 
1024 munge_retval:
1025 	if (wake_thread != THREAD_NULL) {
1026 		thread_deallocate(wake_thread);
1027 	}
1028 
1029 	if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
1030 		*retval = -ret;
1031 		ret = 0;
1032 	}
1033 	return ret;
1034 }
1035 
1036 void
kdp_ulock_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)1037 kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1038 {
1039 	ull_t *ull = EVENT_TO_ULOCK(event);
1040 
1041 	zone_require(ull_zone, ull);
1042 
1043 	switch (ull->ull_opcode) {
1044 	case UL_UNFAIR_LOCK:
1045 	case UL_UNFAIR_LOCK64_SHARED:
1046 		waitinfo->owner   = thread_tid(ull->ull_owner);
1047 		waitinfo->context = ull->ull_key.ulk_addr;
1048 		break;
1049 	case UL_COMPARE_AND_WAIT:
1050 	case UL_COMPARE_AND_WAIT64:
1051 	case UL_COMPARE_AND_WAIT_SHARED:
1052 	case UL_COMPARE_AND_WAIT64_SHARED:
1053 		waitinfo->owner   = 0;
1054 		waitinfo->context = ull->ull_key.ulk_addr;
1055 		break;
1056 	default:
1057 		panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
1058 		break;
1059 	}
1060 	return;
1061 }
1062