xref: /xnu-8792.61.2/bsd/kern/sys_ulock.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <machine/atomic.h>
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/ioctl.h>
34 #include <sys/file_internal.h>
35 #include <sys/proc_internal.h>
36 #include <sys/kernel.h>
37 #include <sys/guarded.h>
38 #include <sys/stat.h>
39 #include <sys/malloc.h>
40 #include <sys/sysproto.h>
41 #include <sys/pthread_shims.h>
42 
43 #include <mach/mach_types.h>
44 
45 #include <kern/cpu_data.h>
46 #include <kern/mach_param.h>
47 #include <kern/kern_types.h>
48 #include <kern/assert.h>
49 #include <kern/zalloc.h>
50 #include <kern/thread.h>
51 #include <kern/clock.h>
52 #include <kern/ledger.h>
53 #include <kern/policy_internal.h>
54 #include <kern/task.h>
55 #include <kern/telemetry.h>
56 #include <kern/waitq.h>
57 #include <kern/sched_prim.h>
58 #include <kern/turnstile.h>
59 #include <kern/zalloc.h>
60 #include <kern/debug.h>
61 
62 #include <pexpert/pexpert.h>
63 
64 #define XNU_TEST_BITMAP
65 #include <kern/bits.h>
66 
67 #include <os/hash.h>
68 #include <sys/ulock.h>
69 
70 /*
71  * How ulock promotion works:
72  *
73  * There’s a requested policy field on every thread called ‘promotions’, which
74  * expresses which ulock promotions are happening to this thread.
75  * The promotion priority saturates until the promotion count goes to 0.
76  *
77  * We also track effective promotion qos, which is the qos before clamping.
78  * This value is used for promoting a thread that another thread is waiting on,
79  * so that the lock owner reinflates to the right priority after unclamping.
80  *
81  * This also works for non-QoS threads, which can donate base priority to QoS
82  * and non-QoS threads alike.
83  *
84  * ulock wait applies a promotion to the owner communicated through
85  * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
86  * there is still an owner.  In ulock wake, if the waker is still the owner,
87  * then it clears its ownership and drops the boost.  It does NOT transfer
88  * ownership/priority boost to the new thread.  Instead, it selects the
89  * waiting thread with the highest base priority to be woken next, and
90  * relies on that thread to carry the torch for the other waiting threads.
91  */
92 
93 static LCK_GRP_DECLARE(ull_lck_grp, "ulocks");
94 
95 typedef lck_spin_t ull_lock_t;
96 #define ull_lock_init(ull)      lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL)
97 #define ull_lock_destroy(ull)   lck_spin_destroy(&ull->ull_lock, &ull_lck_grp)
98 #define ull_lock(ull)           lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp)
99 #define ull_unlock(ull)         lck_spin_unlock(&ull->ull_lock)
100 #define ull_assert_owned(ull)   LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED)
101 #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED)
102 
103 #define ULOCK_TO_EVENT(ull)   ((event_t)ull)
104 #define EVENT_TO_ULOCK(event) ((ull_t *)event)
105 
106 typedef enum {
107 	ULK_INVALID = 0,
108 	ULK_UADDR,
109 	ULK_XPROC,
110 } ulk_type;
111 
112 typedef struct {
113 	union {
114 		struct __attribute__((packed)) {
115 			user_addr_t     ulk_addr;
116 			/*
117 			 * We use the task address as a hashing key,
118 			 * so that ulock wakes across exec can't
119 			 * be confused.
120 			 */
121 			task_t          ulk_task __kernel_data_semantics;
122 		};
123 		struct __attribute__((packed)) {
124 			uint64_t        ulk_object;
125 			uint64_t        ulk_offset;
126 		};
127 	};
128 	ulk_type        ulk_key_type;
129 } ulk_t;
130 
131 #define ULK_UADDR_LEN   (sizeof(user_addr_t) + sizeof(task_t))
132 #define ULK_XPROC_LEN   (sizeof(uint64_t) + sizeof(uint64_t))
133 
134 inline static bool
ull_key_match(ulk_t * a,ulk_t * b)135 ull_key_match(ulk_t *a, ulk_t *b)
136 {
137 	if (a->ulk_key_type != b->ulk_key_type) {
138 		return false;
139 	}
140 
141 	if (a->ulk_key_type == ULK_UADDR) {
142 		return (a->ulk_task == b->ulk_task) &&
143 		       (a->ulk_addr == b->ulk_addr);
144 	}
145 
146 	assert(a->ulk_key_type == ULK_XPROC);
147 	return (a->ulk_object == b->ulk_object) &&
148 	       (a->ulk_offset == b->ulk_offset);
149 }
150 
151 typedef struct ull {
152 	/*
153 	 * ull_owner is the most recent known value for the owner of this ulock
154 	 * i.e. it may be out of date WRT the real value in userspace.
155 	 */
156 	thread_t        ull_owner; /* holds +1 thread reference */
157 	ulk_t           ull_key;
158 	ull_lock_t      ull_lock;
159 	uint            ull_bucket_index;
160 	int32_t         ull_nwaiters;
161 	int32_t         ull_refcount;
162 	uint8_t         ull_opcode;
163 	struct turnstile *ull_turnstile;
164 	queue_chain_t   ull_hash_link;
165 } ull_t;
166 
167 #define ULL_MUST_EXIST  0x0001
168 static void ull_put(ull_t *);
169 
170 static uint32_t ulock_adaptive_spin_usecs = 20;
171 
172 SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
173     &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration");
174 
175 #if DEVELOPMENT || DEBUG
176 static int ull_simulate_copyin_fault = 0;
177 
178 static void
ull_dump(ull_t * ull)179 ull_dump(ull_t *ull)
180 {
181 	kprintf("ull\t%p\n", ull);
182 	switch (ull->ull_key.ulk_key_type) {
183 	case ULK_UADDR:
184 		kprintf("ull_key.ulk_key_type\tULK_UADDR\n");
185 		kprintf("ull_key.ulk_task\t%p\n", ull->ull_key.ulk_task);
186 		kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
187 		break;
188 	case ULK_XPROC:
189 		kprintf("ull_key.ulk_key_type\tULK_XPROC\n");
190 		kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object));
191 		kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset));
192 		break;
193 	default:
194 		kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type);
195 		break;
196 	}
197 	kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
198 	kprintf("ull_refcount\t%d\n", ull->ull_refcount);
199 	kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
200 	kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
201 	kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile);
202 }
203 #endif
204 
205 typedef struct ull_bucket {
206 	queue_head_t ulb_head;
207 	lck_spin_t   ulb_lock;
208 } ull_bucket_t;
209 
210 static SECURITY_READ_ONLY_LATE(int) ull_hash_buckets;
211 static SECURITY_READ_ONLY_LATE(ull_bucket_t *) ull_bucket;
212 static uint32_t ull_nzalloc = 0;
213 static ZONE_DEFINE_TYPE(ull_zone, "ulocks", ull_t, ZC_CACHING);
214 
215 #define ull_bucket_lock(i)       lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp)
216 #define ull_bucket_unlock(i)     lck_spin_unlock(&ull_bucket[i].ulb_lock)
217 
218 static __inline__ uint32_t
ull_hash_index(const void * key,size_t length)219 ull_hash_index(const void *key, size_t length)
220 {
221 	uint32_t hash = os_hash_jenkins(key, length);
222 
223 	hash &= (ull_hash_buckets - 1);
224 
225 	return hash;
226 }
227 
228 #define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN)
229 
230 static void
ulock_initialize(void)231 ulock_initialize(void)
232 {
233 	assert(thread_max > 16);
234 	/* Size ull_hash_buckets based on thread_max.
235 	 * Round up to nearest power of 2, then divide by 4
236 	 */
237 	ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2));
238 
239 	kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
240 	assert(ull_hash_buckets >= thread_max / 4);
241 
242 	ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets,
243 	    ZALIGN_PTR);
244 	assert(ull_bucket != NULL);
245 
246 	for (int i = 0; i < ull_hash_buckets; i++) {
247 		queue_init(&ull_bucket[i].ulb_head);
248 		lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL);
249 	}
250 }
251 STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, ulock_initialize);
252 
253 #if DEVELOPMENT || DEBUG
254 /* Count the number of hash entries for a given task address.
255  * if task==0, dump the whole table.
256  */
257 static int
ull_hash_dump(task_t task)258 ull_hash_dump(task_t task)
259 {
260 	int count = 0;
261 	if (task == TASK_NULL) {
262 		kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
263 		kprintf("%s>BEGIN\n", __FUNCTION__);
264 	}
265 	for (int i = 0; i < ull_hash_buckets; i++) {
266 		ull_bucket_lock(i);
267 		if (!queue_empty(&ull_bucket[i].ulb_head)) {
268 			ull_t *elem;
269 			if (task == TASK_NULL) {
270 				kprintf("%s>index %d:\n", __FUNCTION__, i);
271 			}
272 			qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
273 				if ((task == TASK_NULL) || ((elem->ull_key.ulk_key_type == ULK_UADDR)
274 				    && (task == elem->ull_key.ulk_task))) {
275 					ull_dump(elem);
276 					count++;
277 				}
278 			}
279 		}
280 		ull_bucket_unlock(i);
281 	}
282 	if (task == TASK_NULL) {
283 		kprintf("%s>END\n", __FUNCTION__);
284 		ull_nzalloc = 0;
285 	}
286 	return count;
287 }
288 #endif
289 
290 static ull_t *
ull_alloc(ulk_t * key)291 ull_alloc(ulk_t *key)
292 {
293 	ull_t *ull = (ull_t *)zalloc(ull_zone);
294 	assert(ull != NULL);
295 
296 	ull->ull_refcount = 1;
297 	ull->ull_key = *key;
298 	ull->ull_bucket_index = ULL_INDEX(key);
299 	ull->ull_nwaiters = 0;
300 	ull->ull_opcode = 0;
301 
302 	ull->ull_owner = THREAD_NULL;
303 	ull->ull_turnstile = TURNSTILE_NULL;
304 
305 	ull_lock_init(ull);
306 
307 	ull_nzalloc++;
308 	return ull;
309 }
310 
311 static void
ull_free(ull_t * ull)312 ull_free(ull_t *ull)
313 {
314 	assert(ull->ull_owner == THREAD_NULL);
315 	assert(ull->ull_turnstile == TURNSTILE_NULL);
316 
317 	ull_assert_notwned(ull);
318 
319 	ull_lock_destroy(ull);
320 
321 	zfree(ull_zone, ull);
322 }
323 
324 /* Finds an existing ulock structure (ull_t), or creates a new one.
325  * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
326  * The ulock structure is returned with ull_lock locked
327  */
328 static ull_t *
ull_get(ulk_t * key,uint32_t flags,ull_t ** unused_ull)329 ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull)
330 {
331 	ull_t *ull = NULL;
332 	uint i = ULL_INDEX(key);
333 	ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key);
334 	ull_t *elem;
335 
336 	ull_bucket_lock(i);
337 	qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
338 		ull_lock(elem);
339 		if (ull_key_match(&elem->ull_key, key)) {
340 			ull = elem;
341 			break;
342 		} else {
343 			ull_unlock(elem);
344 		}
345 	}
346 	if (ull == NULL) {
347 		if (flags & ULL_MUST_EXIST) {
348 			/* Must already exist (called from wake) */
349 			ull_bucket_unlock(i);
350 			assert(new_ull == NULL);
351 			assert(unused_ull == NULL);
352 			return NULL;
353 		}
354 
355 		if (new_ull == NULL) {
356 			/* Alloc above failed */
357 			ull_bucket_unlock(i);
358 			return NULL;
359 		}
360 
361 		ull = new_ull;
362 		ull_lock(ull);
363 		enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link);
364 	} else if (!(flags & ULL_MUST_EXIST)) {
365 		assert(new_ull);
366 		assert(unused_ull);
367 		assert(*unused_ull == NULL);
368 		*unused_ull = new_ull;
369 	}
370 
371 	ull->ull_refcount++;
372 
373 	ull_bucket_unlock(i);
374 
375 	return ull; /* still locked */
376 }
377 
378 /*
379  * Must be called with ull_lock held
380  */
381 static void
ull_put(ull_t * ull)382 ull_put(ull_t *ull)
383 {
384 	ull_assert_owned(ull);
385 	int refcount = --ull->ull_refcount;
386 	assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1);
387 	ull_unlock(ull);
388 
389 	if (refcount > 0) {
390 		return;
391 	}
392 
393 	ull_bucket_lock(ull->ull_bucket_index);
394 	remqueue(&ull->ull_hash_link);
395 	ull_bucket_unlock(ull->ull_bucket_index);
396 
397 	ull_free(ull);
398 }
399 
400 extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count);
401 extern vm_map_t current_map(void);
402 extern boolean_t machine_thread_on_core(thread_t thread);
403 
404 static int
uaddr_findobj(user_addr_t uaddr,uint64_t * objectp,uint64_t * offsetp)405 uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
406 {
407 	kern_return_t ret;
408 	vm_page_info_basic_data_t info;
409 	mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
410 	ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
411 	if (ret != KERN_SUCCESS) {
412 		return EINVAL;
413 	}
414 
415 	if (objectp != NULL) {
416 		*objectp = (uint64_t)info.object_id;
417 	}
418 	if (offsetp != NULL) {
419 		*offsetp = (uint64_t)info.offset;
420 	}
421 
422 	return 0;
423 }
424 
425 static void ulock_wait_continue(void *, wait_result_t);
426 static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *);
427 
428 inline static int
wait_result_to_return_code(wait_result_t wr)429 wait_result_to_return_code(wait_result_t wr)
430 {
431 	int ret = 0;
432 
433 	switch (wr) {
434 	case THREAD_AWAKENED:
435 		break;
436 	case THREAD_TIMED_OUT:
437 		ret = ETIMEDOUT;
438 		break;
439 	case THREAD_INTERRUPTED:
440 	case THREAD_RESTART:
441 	default:
442 		ret = EINTR;
443 		break;
444 	}
445 
446 	return ret;
447 }
448 
449 static int
ulock_resolve_owner(uint32_t value,thread_t * owner)450 ulock_resolve_owner(uint32_t value, thread_t *owner)
451 {
452 	mach_port_name_t owner_name = ulock_owner_value_to_port_name(value);
453 
454 	*owner = port_name_to_thread(owner_name,
455 	    PORT_INTRANS_THREAD_IN_CURRENT_TASK |
456 	    PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
457 	if (*owner == THREAD_NULL) {
458 		/*
459 		 * Translation failed - even though the lock value is up to date,
460 		 * whatever was stored in the lock wasn't actually a thread port.
461 		 */
462 		return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD;
463 	}
464 	return 0;
465 }
466 
467 int
sys_ulock_wait(struct proc * p,struct ulock_wait_args * args,int32_t * retval)468 sys_ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
469 {
470 	struct ulock_wait2_args args2;
471 
472 	args2.operation = args->operation;
473 	args2.addr      = args->addr;
474 	args2.value     = args->value;
475 	args2.timeout   = (uint64_t)(args->timeout) * NSEC_PER_USEC;
476 	args2.value2    = 0;
477 
478 	return sys_ulock_wait2(p, &args2, retval);
479 }
480 
481 int
sys_ulock_wait2(struct proc * p,struct ulock_wait2_args * args,int32_t * retval)482 sys_ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval)
483 {
484 	uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
485 	uint flags = args->operation & UL_FLAGS_MASK;
486 
487 	if (flags & ULF_WAIT_CANCEL_POINT) {
488 		__pthread_testcancel(1);
489 	}
490 
491 	int ret = 0;
492 	thread_t self = current_thread();
493 	ulk_t key;
494 
495 	/* involved threads - each variable holds +1 ref if not null */
496 	thread_t owner_thread   = THREAD_NULL;
497 	thread_t old_owner      = THREAD_NULL;
498 
499 	ull_t *unused_ull = NULL;
500 
501 	if ((flags & ULF_WAIT_MASK) != flags) {
502 		ret = EINVAL;
503 		goto munge_retval;
504 	}
505 
506 	bool set_owner = false;
507 	bool xproc = false;
508 	size_t lock_size = sizeof(uint32_t);
509 	int copy_ret;
510 
511 	switch (opcode) {
512 	case UL_UNFAIR_LOCK:
513 		set_owner = true;
514 		break;
515 	case UL_COMPARE_AND_WAIT:
516 		break;
517 	case UL_COMPARE_AND_WAIT64:
518 		lock_size = sizeof(uint64_t);
519 		break;
520 	case UL_COMPARE_AND_WAIT_SHARED:
521 		xproc = true;
522 		break;
523 	case UL_COMPARE_AND_WAIT64_SHARED:
524 		xproc = true;
525 		lock_size = sizeof(uint64_t);
526 		break;
527 	default:
528 		ret = EINVAL;
529 		goto munge_retval;
530 	}
531 
532 	uint64_t value = 0;
533 
534 	if ((args->addr == 0) || (args->addr & (lock_size - 1))) {
535 		ret = EINVAL;
536 		goto munge_retval;
537 	}
538 
539 	if (xproc) {
540 		uint64_t object = 0;
541 		uint64_t offset = 0;
542 
543 		ret = uaddr_findobj(args->addr, &object, &offset);
544 		if (ret) {
545 			ret = EINVAL;
546 			goto munge_retval;
547 		}
548 		key.ulk_key_type = ULK_XPROC;
549 		key.ulk_object = object;
550 		key.ulk_offset = offset;
551 	} else {
552 		key.ulk_key_type = ULK_UADDR;
553 		key.ulk_task = proc_task(p);
554 		key.ulk_addr = args->addr;
555 	}
556 
557 	if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) {
558 		/*
559 		 * Attempt the copyin outside of the lock once,
560 		 *
561 		 * If it doesn't match (which is common), return right away.
562 		 *
563 		 * If it matches, resolve the current owner, and if it is on core,
564 		 * spin a bit waiting for the value to change. If the owner isn't on
565 		 * core, or if the value stays stable, then go on with the regular
566 		 * blocking code.
567 		 */
568 		uint64_t end = 0;
569 		uint32_t u32;
570 
571 		ret = copyin_atomic32(args->addr, &u32);
572 		if (ret || u32 != args->value) {
573 			goto munge_retval;
574 		}
575 		for (;;) {
576 			if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) {
577 				break;
578 			}
579 
580 			/* owner_thread may have a +1 starting here */
581 
582 			if (!machine_thread_on_core(owner_thread)) {
583 				break;
584 			}
585 			if (end == 0) {
586 				clock_interval_to_deadline(ulock_adaptive_spin_usecs,
587 				    NSEC_PER_USEC, &end);
588 			} else if (mach_absolute_time() > end) {
589 				break;
590 			}
591 			if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) {
592 				goto munge_retval;
593 			}
594 		}
595 	}
596 
597 	ull_t *ull = ull_get(&key, 0, &unused_ull);
598 	if (ull == NULL) {
599 		ret = ENOMEM;
600 		goto munge_retval;
601 	}
602 	/* ull is locked */
603 
604 	ull->ull_nwaiters++;
605 
606 	if (ull->ull_opcode == 0) {
607 		ull->ull_opcode = opcode;
608 	} else if (ull->ull_opcode != opcode) {
609 		ret = EDOM;
610 		goto out_locked;
611 	}
612 
613 	/*
614 	 * We don't want this copyin to get wedged behind VM operations,
615 	 * but we have to read the userspace value under the ull lock for correctness.
616 	 *
617 	 * Until <rdar://problem/24999882> exists,
618 	 * holding the ull spinlock across copyin forces any
619 	 * vm_fault we encounter to fail.
620 	 */
621 
622 	/* copyin_atomicXX always checks alignment */
623 
624 	if (lock_size == 4) {
625 		uint32_t u32;
626 		copy_ret = copyin_atomic32(args->addr, &u32);
627 		value = u32;
628 	} else {
629 		copy_ret = copyin_atomic64(args->addr, &value);
630 	}
631 
632 #if DEVELOPMENT || DEBUG
633 	/* Occasionally simulate copyin finding the user address paged out */
634 	if (((ull_simulate_copyin_fault == proc_getpid(p)) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
635 		static _Atomic int fault_inject = 0;
636 		if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) {
637 			copy_ret = EFAULT;
638 		}
639 	}
640 #endif
641 	if (copy_ret != 0) {
642 		/* copyin() will return an error if the access to the user addr would have faulted,
643 		 * so just return and let the user level code fault it in.
644 		 */
645 		ret = copy_ret;
646 		goto out_locked;
647 	}
648 
649 	if (value != args->value) {
650 		/* Lock value has changed from expected so bail out */
651 		goto out_locked;
652 	}
653 
654 	if (set_owner) {
655 		if (owner_thread == THREAD_NULL) {
656 			ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread);
657 			if (ret == EOWNERDEAD) {
658 				/*
659 				 * Translation failed - even though the lock value is up to date,
660 				 * whatever was stored in the lock wasn't actually a thread port.
661 				 */
662 				goto out_locked;
663 			}
664 			/* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
665 			ret = 0;
666 		}
667 		/* owner_thread has a +1 reference */
668 
669 		/*
670 		 * At this point, I know:
671 		 * a) owner_thread is definitely the current owner, because I just read the value
672 		 * b) owner_thread is either:
673 		 *      i) holding the user lock or
674 		 *      ii) has just unlocked the user lock after I looked
675 		 *              and is heading toward the kernel to call ull_wake.
676 		 *              If so, it's going to have to wait for the ull mutex.
677 		 *
678 		 * Therefore, I can ask the turnstile to promote its priority, and I can rely
679 		 * on it to come by later to issue the wakeup and lose its promotion.
680 		 */
681 
682 		/* Return the +1 ref from the ull_owner field */
683 		old_owner = ull->ull_owner;
684 		ull->ull_owner = THREAD_NULL;
685 
686 		if (owner_thread != THREAD_NULL) {
687 			/* The ull_owner field now owns a +1 ref on owner_thread */
688 			thread_reference(owner_thread);
689 			ull->ull_owner = owner_thread;
690 		}
691 	}
692 
693 	wait_result_t wr;
694 	uint64_t timeout = args->timeout; /* nanoseconds */
695 	uint64_t deadline = TIMEOUT_WAIT_FOREVER;
696 	wait_interrupt_t interruptible = THREAD_ABORTSAFE;
697 	struct turnstile *ts;
698 
699 	ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
700 	    TURNSTILE_NULL, TURNSTILE_ULOCK);
701 	thread_set_pending_block_hint(self, kThreadWaitUserLock);
702 
703 	if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
704 		interruptible |= THREAD_WAIT_NOREPORT;
705 	}
706 
707 	if (timeout) {
708 		nanoseconds_to_deadline(timeout, &deadline);
709 	}
710 
711 	turnstile_update_inheritor(ts, owner_thread,
712 	    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
713 
714 	wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
715 	    interruptible, deadline);
716 
717 	if (wr == THREAD_WAITING) {
718 		uthread_t uthread = (uthread_t)get_bsdthread_info(self);
719 		uthread->uu_save.uus_ulock_wait_data.ull = ull;
720 		uthread->uu_save.uus_ulock_wait_data.retval = retval;
721 		uthread->uu_save.uus_ulock_wait_data.flags = flags;
722 		uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread;
723 		uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner;
724 	}
725 
726 	ull_unlock(ull);
727 
728 	if (unused_ull) {
729 		ull_free(unused_ull);
730 		unused_ull = NULL;
731 	}
732 
733 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
734 
735 	if (wr == THREAD_WAITING) {
736 		if (set_owner && owner_thread != THREAD_NULL) {
737 			thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE);
738 		} else {
739 			assert(owner_thread == THREAD_NULL);
740 			thread_block_parameter(ulock_wait_continue, ull);
741 		}
742 		/* NOT REACHED */
743 	}
744 
745 	ret = wait_result_to_return_code(wr);
746 
747 	ull_lock(ull);
748 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
749 
750 out_locked:
751 	ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
752 	owner_thread = NULL;
753 
754 	if (unused_ull) {
755 		ull_free(unused_ull);
756 		unused_ull = NULL;
757 	}
758 
759 	assert(*retval >= 0);
760 
761 munge_retval:
762 	if (owner_thread) {
763 		thread_deallocate(owner_thread);
764 	}
765 	if (ret == ESTALE) {
766 		ret = 0;
767 	}
768 	if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
769 		*retval = -ret;
770 		ret = 0;
771 	}
772 	return ret;
773 }
774 
775 /*
776  * Must be called with ull_lock held
777  */
778 static void
ulock_wait_cleanup(ull_t * ull,thread_t owner_thread,thread_t old_owner,int32_t * retval)779 ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval)
780 {
781 	ull_assert_owned(ull);
782 
783 	thread_t old_lingering_owner = THREAD_NULL;
784 
785 	*retval = --ull->ull_nwaiters;
786 	if (ull->ull_nwaiters == 0) {
787 		/*
788 		 * If the wait was canceled early, we might need to
789 		 * clear out the lingering owner reference before
790 		 * freeing the ull.
791 		 */
792 		old_lingering_owner = ull->ull_owner;
793 		ull->ull_owner = THREAD_NULL;
794 
795 		memset(&ull->ull_key, 0, sizeof ull->ull_key);
796 		ull->ull_refcount--;
797 		assert(ull->ull_refcount > 0);
798 	}
799 	ull_put(ull);
800 
801 	/* Need to be called after dropping the interlock */
802 	turnstile_cleanup();
803 
804 	if (owner_thread != THREAD_NULL) {
805 		thread_deallocate(owner_thread);
806 	}
807 
808 	if (old_owner != THREAD_NULL) {
809 		thread_deallocate(old_owner);
810 	}
811 
812 	if (old_lingering_owner != THREAD_NULL) {
813 		thread_deallocate(old_lingering_owner);
814 	}
815 
816 	assert(*retval >= 0);
817 }
818 
819 __attribute__((noreturn))
820 static void
ulock_wait_continue(__unused void * parameter,wait_result_t wr)821 ulock_wait_continue(__unused void * parameter, wait_result_t wr)
822 {
823 	uthread_t uthread = current_uthread();
824 	int ret = 0;
825 
826 	ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull;
827 	int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval;
828 	uint flags = uthread->uu_save.uus_ulock_wait_data.flags;
829 	thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread;
830 	thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner;
831 
832 	ret = wait_result_to_return_code(wr);
833 
834 	ull_lock(ull);
835 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
836 
837 	ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
838 
839 	if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
840 		*retval = -ret;
841 		ret = 0;
842 	}
843 
844 	unix_syscall_return(ret);
845 }
846 
847 int
sys_ulock_wake(struct proc * p,struct ulock_wake_args * args,int32_t * retval)848 sys_ulock_wake(struct proc *p, struct ulock_wake_args *args, int32_t *retval)
849 {
850 	int ret = 0;
851 #if DEVELOPMENT || DEBUG
852 	uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
853 
854 	if (opcode == UL_DEBUG_HASH_DUMP_PID) {
855 		*retval = ull_hash_dump(proc_task(p));
856 		return ret;
857 	} else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
858 		*retval = ull_hash_dump(TASK_NULL);
859 		return ret;
860 	} else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
861 		ull_simulate_copyin_fault = (int)(args->wake_value);
862 		return ret;
863 	}
864 #endif
865 	ret = ulock_wake(proc_task(p), args->operation, args->addr, args->wake_value);
866 
867 	if ((args->operation & ULF_NO_ERRNO) && (ret != 0)) {
868 		*retval = -ret;
869 		ret = 0;
870 	}
871 
872 	return ret;
873 }
874 
875 int
ulock_wake(task_t task,uint32_t operation,user_addr_t addr,uint64_t wake_value)876 ulock_wake(task_t task, uint32_t operation, user_addr_t addr, uint64_t wake_value)
877 {
878 	uint8_t opcode = (uint8_t)(operation & UL_OPCODE_MASK);
879 	uint flags = operation & UL_FLAGS_MASK;
880 	int ret = 0;
881 	ulk_t key;
882 
883 	/* involved threads - each variable holds +1 ref if not null */
884 	thread_t wake_thread    = THREAD_NULL;
885 
886 	bool set_owner = false;
887 	bool allow_non_owner = false;
888 	bool xproc = false;
889 
890 	switch (opcode) {
891 	case UL_UNFAIR_LOCK:
892 		set_owner = true;
893 		break;
894 	case UL_COMPARE_AND_WAIT:
895 	case UL_COMPARE_AND_WAIT64:
896 		break;
897 	case UL_COMPARE_AND_WAIT_SHARED:
898 	case UL_COMPARE_AND_WAIT64_SHARED:
899 		xproc = true;
900 		break;
901 	default:
902 		ret = EINVAL;
903 		goto munge_retval;
904 	}
905 
906 	if ((flags & ULF_WAKE_MASK) != flags) {
907 		ret = EINVAL;
908 		goto munge_retval;
909 	}
910 
911 	if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) {
912 		ret = EINVAL;
913 		goto munge_retval;
914 	}
915 
916 	if (flags & ULF_WAKE_ALLOW_NON_OWNER) {
917 		if (!set_owner) {
918 			ret = EINVAL;
919 			goto munge_retval;
920 		}
921 
922 		allow_non_owner = true;
923 	}
924 
925 	if (addr == 0) {
926 		ret = EINVAL;
927 		goto munge_retval;
928 	}
929 
930 	if (xproc) {
931 		uint64_t object = 0;
932 		uint64_t offset = 0;
933 
934 		ret = uaddr_findobj(addr, &object, &offset);
935 		if (ret) {
936 			ret = EINVAL;
937 			goto munge_retval;
938 		}
939 		key.ulk_key_type = ULK_XPROC;
940 		key.ulk_object = object;
941 		key.ulk_offset = offset;
942 	} else {
943 		key.ulk_key_type = ULK_UADDR;
944 		key.ulk_task = task;
945 		key.ulk_addr = addr;
946 	}
947 
948 	if (flags & ULF_WAKE_THREAD) {
949 		mach_port_name_t wake_thread_name = (mach_port_name_t)(wake_value);
950 		wake_thread = port_name_to_thread(wake_thread_name,
951 		    PORT_INTRANS_THREAD_IN_CURRENT_TASK |
952 		    PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
953 		if (wake_thread == THREAD_NULL) {
954 			ret = ESRCH;
955 			goto munge_retval;
956 		}
957 	}
958 
959 	ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL);
960 	thread_t new_owner = THREAD_NULL;
961 	struct turnstile *ts = TURNSTILE_NULL;
962 	thread_t cleanup_thread = THREAD_NULL;
963 
964 	if (ull == NULL) {
965 		ret = ENOENT;
966 		goto munge_retval;
967 	}
968 	/* ull is locked */
969 
970 	if (opcode != ull->ull_opcode) {
971 		ret = EDOM;
972 		goto out_ull_put;
973 	}
974 
975 	if (set_owner) {
976 		if ((ull->ull_owner != current_thread()) && !allow_non_owner) {
977 			/*
978 			 * If the current thread isn't the known owner,
979 			 * then this wake call was late to the party,
980 			 * and the kernel already knows who owns the lock.
981 			 *
982 			 * This current owner already knows the lock is contended
983 			 * and will redrive wakes, just bail out.
984 			 */
985 			goto out_ull_put;
986 		}
987 	} else {
988 		assert(ull->ull_owner == THREAD_NULL);
989 	}
990 
991 	ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
992 	    TURNSTILE_NULL, TURNSTILE_ULOCK);
993 	assert(ts != TURNSTILE_NULL);
994 
995 	if (flags & ULF_WAKE_THREAD) {
996 		kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq,
997 		    CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
998 		    wake_thread, THREAD_AWAKENED);
999 		if (kr != KERN_SUCCESS) {
1000 			assert(kr == KERN_NOT_WAITING);
1001 			ret = EALREADY;
1002 		}
1003 	} else if (flags & ULF_WAKE_ALL) {
1004 		waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1005 		    THREAD_AWAKENED,
1006 		    set_owner ? WAITQ_UPDATE_INHERITOR : WAITQ_WAKEUP_DEFAULT);
1007 	} else if (set_owner) {
1008 		/*
1009 		 * The turnstile waitq is priority ordered,
1010 		 * and will wake up the highest priority waiter
1011 		 * and set it as the inheritor for us.
1012 		 */
1013 		new_owner = waitq_wakeup64_identify(&ts->ts_waitq,
1014 		    CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1015 		    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
1016 	} else {
1017 		waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1018 		    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1019 	}
1020 
1021 	if (set_owner) {
1022 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1023 		cleanup_thread = ull->ull_owner;
1024 		ull->ull_owner = new_owner;
1025 	}
1026 
1027 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
1028 
1029 out_ull_put:
1030 	ull_put(ull);
1031 
1032 	if (ts != TURNSTILE_NULL) {
1033 		/* Need to be called after dropping the interlock */
1034 		turnstile_cleanup();
1035 	}
1036 
1037 	if (cleanup_thread != THREAD_NULL) {
1038 		thread_deallocate(cleanup_thread);
1039 	}
1040 
1041 munge_retval:
1042 	if (wake_thread != THREAD_NULL) {
1043 		thread_deallocate(wake_thread);
1044 	}
1045 
1046 	return ret;
1047 }
1048 
1049 void
kdp_ulock_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)1050 kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1051 {
1052 	ull_t *ull = EVENT_TO_ULOCK(event);
1053 
1054 	zone_require(ull_zone, ull);
1055 
1056 	switch (ull->ull_opcode) {
1057 	case UL_UNFAIR_LOCK:
1058 	case UL_UNFAIR_LOCK64_SHARED:
1059 		waitinfo->owner   = thread_tid(ull->ull_owner);
1060 		waitinfo->context = ull->ull_key.ulk_addr;
1061 		break;
1062 	case UL_COMPARE_AND_WAIT:
1063 	case UL_COMPARE_AND_WAIT64:
1064 	case UL_COMPARE_AND_WAIT_SHARED:
1065 	case UL_COMPARE_AND_WAIT64_SHARED:
1066 		waitinfo->owner   = 0;
1067 		waitinfo->context = ull->ull_key.ulk_addr;
1068 		break;
1069 	default:
1070 		panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
1071 		break;
1072 	}
1073 	return;
1074 }
1075