xref: /xnu-10063.121.3/bsd/kern/sys_ulock.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <machine/atomic.h>
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/ioctl.h>
34 #include <sys/file_internal.h>
35 #include <sys/proc_internal.h>
36 #include <sys/kernel.h>
37 #include <sys/guarded.h>
38 #include <sys/stat.h>
39 #include <sys/malloc.h>
40 #include <sys/sysproto.h>
41 #include <sys/pthread_shims.h>
42 
43 #include <mach/mach_types.h>
44 
45 #include <kern/cpu_data.h>
46 #include <kern/mach_param.h>
47 #include <kern/kern_types.h>
48 #include <kern/assert.h>
49 #include <kern/zalloc.h>
50 #include <kern/thread.h>
51 #include <kern/clock.h>
52 #include <kern/ledger.h>
53 #include <kern/policy_internal.h>
54 #include <kern/task.h>
55 #include <kern/telemetry.h>
56 #include <kern/waitq.h>
57 #include <kern/sched_prim.h>
58 #include <kern/turnstile.h>
59 #include <kern/zalloc.h>
60 #include <kern/debug.h>
61 
62 #include <pexpert/pexpert.h>
63 
64 #define XNU_TEST_BITMAP
65 #include <kern/bits.h>
66 
67 #include <os/hash.h>
68 #include <sys/ulock.h>
69 
70 /*
71  * How ulock promotion works:
72  *
73  * There’s a requested policy field on every thread called ‘promotions’, which
74  * expresses which ulock promotions are happening to this thread.
75  * The promotion priority saturates until the promotion count goes to 0.
76  *
77  * We also track effective promotion qos, which is the qos before clamping.
78  * This value is used for promoting a thread that another thread is waiting on,
79  * so that the lock owner reinflates to the right priority after unclamping.
80  *
81  * This also works for non-QoS threads, which can donate base priority to QoS
82  * and non-QoS threads alike.
83  *
84  * ulock wait applies a promotion to the owner communicated through
85  * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
86  * there is still an owner.  In ulock wake, if the waker is still the owner,
87  * then it clears its ownership and drops the boost.  It does NOT transfer
88  * ownership/priority boost to the new thread.  Instead, it selects the
89  * waiting thread with the highest base priority to be woken next, and
90  * relies on that thread to carry the torch for the other waiting threads.
91  */
92 
93 static LCK_GRP_DECLARE(ull_lck_grp, "ulocks");
94 
95 
96 #if ULL_TICKET_LOCK
97 typedef lck_ticket_t ull_lock_t;
98 #define ull_lock_init(ull)      lck_ticket_init(&ull->ull_lock, &ull_lck_grp)
99 #define ull_lock_destroy(ull)   lck_ticket_destroy(&ull->ull_lock, &ull_lck_grp)
100 #define ull_lock(ull)           lck_ticket_lock(&ull->ull_lock, &ull_lck_grp)
101 #define ull_unlock(ull)         lck_ticket_unlock(&ull->ull_lock)
102 #define ull_assert_owned(ull)   lck_ticket_assert_owned(&ull->ull_lock)
103 #define ull_assert_notwned(ull) lck_ticket_assert_not_owned(&ull->ull_lock)
104 #else
105 typedef lck_spin_t ull_lock_t;
106 #define ull_lock_init(ull)      lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL)
107 #define ull_lock_destroy(ull)   lck_spin_destroy(&ull->ull_lock, &ull_lck_grp)
108 #define ull_lock(ull)           lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp)
109 #define ull_unlock(ull)         lck_spin_unlock(&ull->ull_lock)
110 #define ull_assert_owned(ull)   LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED)
111 #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED)
112 #endif /* ULL_TICKET_LOCK */
113 
114 #define ULOCK_TO_EVENT(ull)   ((event_t)ull)
115 #define EVENT_TO_ULOCK(event) ((ull_t *)event)
116 
117 typedef enum {
118 	ULK_INVALID = 0,
119 	ULK_UADDR,
120 	ULK_XPROC,
121 } ulk_type;
122 
123 typedef struct {
124 	union {
125 		struct __attribute__((packed)) {
126 			user_addr_t     ulk_addr;
127 			/*
128 			 * We use the task address as a hashing key,
129 			 * so that ulock wakes across exec can't
130 			 * be confused.
131 			 */
132 			task_t          ulk_task __kernel_data_semantics;
133 		};
134 		struct __attribute__((packed)) {
135 			uint64_t        ulk_object;
136 			uint64_t        ulk_offset;
137 		};
138 	};
139 	ulk_type        ulk_key_type;
140 } ulk_t;
141 
142 #define ULK_UADDR_LEN   (sizeof(user_addr_t) + sizeof(task_t))
143 #define ULK_XPROC_LEN   (sizeof(uint64_t) + sizeof(uint64_t))
144 
145 inline static bool
ull_key_match(ulk_t * a,ulk_t * b)146 ull_key_match(ulk_t *a, ulk_t *b)
147 {
148 	if (a->ulk_key_type != b->ulk_key_type) {
149 		return false;
150 	}
151 
152 	if (a->ulk_key_type == ULK_UADDR) {
153 		return (a->ulk_task == b->ulk_task) &&
154 		       (a->ulk_addr == b->ulk_addr);
155 	}
156 
157 	assert(a->ulk_key_type == ULK_XPROC);
158 	return (a->ulk_object == b->ulk_object) &&
159 	       (a->ulk_offset == b->ulk_offset);
160 }
161 
162 typedef struct ull {
163 	/*
164 	 * ull_owner is the most recent known value for the owner of this ulock
165 	 * i.e. it may be out of date WRT the real value in userspace.
166 	 */
167 	thread_t        ull_owner; /* holds +1 thread reference */
168 	ulk_t           ull_key;
169 	ull_lock_t      ull_lock;
170 	uint            ull_bucket_index;
171 	int32_t         ull_nwaiters;
172 	int32_t         ull_refcount;
173 	uint8_t         ull_opcode;
174 	struct turnstile *ull_turnstile;
175 	queue_chain_t   ull_hash_link;
176 } ull_t;
177 
178 #define ULL_MUST_EXIST  0x0001
179 static void ull_put(ull_t *);
180 
181 static uint32_t ulock_adaptive_spin_usecs = 20;
182 
183 SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
184     &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration");
185 
186 #if DEVELOPMENT || DEBUG
187 static int ull_simulate_copyin_fault = 0;
188 
189 static void
ull_dump(ull_t * ull)190 ull_dump(ull_t *ull)
191 {
192 	kprintf("ull\t%p\n", ull);
193 	switch (ull->ull_key.ulk_key_type) {
194 	case ULK_UADDR:
195 		kprintf("ull_key.ulk_key_type\tULK_UADDR\n");
196 		kprintf("ull_key.ulk_task\t%p\n", ull->ull_key.ulk_task);
197 		kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
198 		break;
199 	case ULK_XPROC:
200 		kprintf("ull_key.ulk_key_type\tULK_XPROC\n");
201 		kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object));
202 		kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset));
203 		break;
204 	default:
205 		kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type);
206 		break;
207 	}
208 	kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
209 	kprintf("ull_refcount\t%d\n", ull->ull_refcount);
210 	kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
211 	kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
212 	kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile);
213 }
214 #endif
215 
216 typedef struct ull_bucket {
217 	queue_head_t ulb_head;
218 #if ULL_TICKET_LOCK
219 	lck_ticket_t ulb_lock;
220 #else
221 	lck_spin_t   ulb_lock;
222 #endif /* ULL_TICKET_LOCK */
223 } ull_bucket_t;
224 
225 static SECURITY_READ_ONLY_LATE(int) ull_hash_buckets;
226 static SECURITY_READ_ONLY_LATE(ull_bucket_t *) ull_bucket;
227 static uint32_t ull_nzalloc = 0;
228 static KALLOC_TYPE_DEFINE(ull_zone, ull_t, KT_DEFAULT);
229 
230 #if ULL_TICKET_LOCK
231 #define ull_bucket_lock(i)       lck_ticket_lock(&ull_bucket[i].ulb_lock, &ull_lck_grp)
232 #define ull_bucket_unlock(i)     lck_ticket_unlock(&ull_bucket[i].ulb_lock)
233 #else
234 #define ull_bucket_lock(i)       lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp)
235 #define ull_bucket_unlock(i)     lck_spin_unlock(&ull_bucket[i].ulb_lock)
236 #endif /* ULL_TICKET_LOCK */
237 static __inline__ uint32_t
ull_hash_index(const void * key,size_t length)238 ull_hash_index(const void *key, size_t length)
239 {
240 	uint32_t hash = os_hash_jenkins(key, length);
241 
242 	hash &= (ull_hash_buckets - 1);
243 
244 	return hash;
245 }
246 
247 #define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN)
248 
249 static void
ulock_initialize(void)250 ulock_initialize(void)
251 {
252 	assert(thread_max > 16);
253 	/* Size ull_hash_buckets based on thread_max.
254 	 * Round up to nearest power of 2, then divide by 4
255 	 */
256 	ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2));
257 
258 	kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
259 	assert(ull_hash_buckets >= thread_max / 4);
260 
261 	ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets,
262 	    ZALIGN_PTR);
263 	assert(ull_bucket != NULL);
264 
265 	for (int i = 0; i < ull_hash_buckets; i++) {
266 		queue_init(&ull_bucket[i].ulb_head);
267 #if ULL_TICKET_LOCK
268 		lck_ticket_init(&ull_bucket[i].ulb_lock, &ull_lck_grp);
269 #else
270 		lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL);
271 #endif /* ULL_TICKET_LOCK */
272 	}
273 }
274 STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, ulock_initialize);
275 
276 #if DEVELOPMENT || DEBUG
277 /* Count the number of hash entries for a given task address.
278  * if task==0, dump the whole table.
279  */
280 static int
ull_hash_dump(task_t task)281 ull_hash_dump(task_t task)
282 {
283 	int count = 0;
284 	if (task == TASK_NULL) {
285 		kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
286 		kprintf("%s>BEGIN\n", __FUNCTION__);
287 	}
288 	for (int i = 0; i < ull_hash_buckets; i++) {
289 		ull_bucket_lock(i);
290 		if (!queue_empty(&ull_bucket[i].ulb_head)) {
291 			ull_t *elem;
292 			if (task == TASK_NULL) {
293 				kprintf("%s>index %d:\n", __FUNCTION__, i);
294 			}
295 			qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
296 				if ((task == TASK_NULL) || ((elem->ull_key.ulk_key_type == ULK_UADDR)
297 				    && (task == elem->ull_key.ulk_task))) {
298 					ull_dump(elem);
299 					count++;
300 				}
301 			}
302 		}
303 		ull_bucket_unlock(i);
304 	}
305 	if (task == TASK_NULL) {
306 		kprintf("%s>END\n", __FUNCTION__);
307 		ull_nzalloc = 0;
308 	}
309 	return count;
310 }
311 #endif
312 
313 static ull_t *
ull_alloc(ulk_t * key)314 ull_alloc(ulk_t *key)
315 {
316 	ull_t *ull = (ull_t *)zalloc_flags(ull_zone, Z_SET_NOTSHARED);
317 	assert(ull != NULL);
318 
319 	ull->ull_refcount = 1;
320 	ull->ull_key = *key;
321 	ull->ull_bucket_index = ULL_INDEX(key);
322 	ull->ull_nwaiters = 0;
323 	ull->ull_opcode = 0;
324 
325 	ull->ull_owner = THREAD_NULL;
326 	ull->ull_turnstile = TURNSTILE_NULL;
327 
328 	ull_lock_init(ull);
329 
330 	ull_nzalloc++;
331 	return ull;
332 }
333 
334 static void
ull_free(ull_t * ull)335 ull_free(ull_t *ull)
336 {
337 	assert(ull->ull_owner == THREAD_NULL);
338 	assert(ull->ull_turnstile == TURNSTILE_NULL);
339 
340 	ull_assert_notwned(ull);
341 
342 	ull_lock_destroy(ull);
343 
344 	zfree(ull_zone, ull);
345 }
346 
347 /* Finds an existing ulock structure (ull_t), or creates a new one.
348  * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
349  * The ulock structure is returned with ull_lock locked
350  */
351 static ull_t *
ull_get(ulk_t * key,uint32_t flags,ull_t ** unused_ull)352 ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull)
353 {
354 	ull_t *ull = NULL;
355 	uint i = ULL_INDEX(key);
356 	ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key);
357 	ull_t *elem;
358 
359 	ull_bucket_lock(i);
360 	qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
361 		ull_lock(elem);
362 		if (ull_key_match(&elem->ull_key, key)) {
363 			ull = elem;
364 			break;
365 		} else {
366 			ull_unlock(elem);
367 		}
368 	}
369 	if (ull == NULL) {
370 		if (flags & ULL_MUST_EXIST) {
371 			/* Must already exist (called from wake) */
372 			ull_bucket_unlock(i);
373 			assert(new_ull == NULL);
374 			assert(unused_ull == NULL);
375 			return NULL;
376 		}
377 
378 		if (new_ull == NULL) {
379 			/* Alloc above failed */
380 			ull_bucket_unlock(i);
381 			return NULL;
382 		}
383 
384 		ull = new_ull;
385 		ull_lock(ull);
386 		enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link);
387 	} else if (!(flags & ULL_MUST_EXIST)) {
388 		assert(new_ull);
389 		assert(unused_ull);
390 		assert(*unused_ull == NULL);
391 		*unused_ull = new_ull;
392 	}
393 
394 	ull->ull_refcount++;
395 
396 	ull_bucket_unlock(i);
397 
398 	return ull; /* still locked */
399 }
400 
401 /*
402  * Must be called with ull_lock held
403  */
404 static void
ull_put(ull_t * ull)405 ull_put(ull_t *ull)
406 {
407 	ull_assert_owned(ull);
408 	int refcount = --ull->ull_refcount;
409 	assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1);
410 	ull_unlock(ull);
411 
412 	if (refcount > 0) {
413 		return;
414 	}
415 
416 	ull_bucket_lock(ull->ull_bucket_index);
417 	remqueue(&ull->ull_hash_link);
418 	ull_bucket_unlock(ull->ull_bucket_index);
419 
420 	ull_free(ull);
421 }
422 
423 extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count);
424 extern vm_map_t current_map(void);
425 extern boolean_t machine_thread_on_core(thread_t thread);
426 
427 static int
uaddr_findobj(user_addr_t uaddr,uint64_t * objectp,uint64_t * offsetp)428 uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
429 {
430 	kern_return_t ret;
431 	vm_page_info_basic_data_t info;
432 	mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
433 	ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
434 	if (ret != KERN_SUCCESS) {
435 		return EINVAL;
436 	}
437 
438 	if (objectp != NULL) {
439 		*objectp = (uint64_t)info.object_id;
440 	}
441 	if (offsetp != NULL) {
442 		*offsetp = (uint64_t)info.offset;
443 	}
444 
445 	return 0;
446 }
447 
448 static void ulock_wait_continue(void *, wait_result_t);
449 static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *);
450 
451 inline static int
wait_result_to_return_code(wait_result_t wr)452 wait_result_to_return_code(wait_result_t wr)
453 {
454 	int ret = 0;
455 
456 	switch (wr) {
457 	case THREAD_AWAKENED:
458 		break;
459 	case THREAD_TIMED_OUT:
460 		ret = ETIMEDOUT;
461 		break;
462 	case THREAD_INTERRUPTED:
463 	case THREAD_RESTART:
464 	default:
465 		ret = EINTR;
466 		break;
467 	}
468 
469 	return ret;
470 }
471 
472 static int
ulock_resolve_owner(uint32_t value,thread_t * owner)473 ulock_resolve_owner(uint32_t value, thread_t *owner)
474 {
475 	mach_port_name_t owner_name = ulock_owner_value_to_port_name(value);
476 
477 	*owner = port_name_to_thread(owner_name,
478 	    PORT_INTRANS_THREAD_IN_CURRENT_TASK |
479 	    PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
480 	if (*owner == THREAD_NULL) {
481 		/*
482 		 * Translation failed - even though the lock value is up to date,
483 		 * whatever was stored in the lock wasn't actually a thread port.
484 		 */
485 		return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD;
486 	}
487 	return 0;
488 }
489 
490 int
sys_ulock_wait(struct proc * p,struct ulock_wait_args * args,int32_t * retval)491 sys_ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
492 {
493 	struct ulock_wait2_args args2;
494 
495 	args2.operation = args->operation;
496 	args2.addr      = args->addr;
497 	args2.value     = args->value;
498 	args2.timeout   = (uint64_t)(args->timeout) * NSEC_PER_USEC;
499 	args2.value2    = 0;
500 
501 	return sys_ulock_wait2(p, &args2, retval);
502 }
503 
504 int
sys_ulock_wait2(struct proc * p,struct ulock_wait2_args * args,int32_t * retval)505 sys_ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval)
506 {
507 	uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
508 	uint flags = args->operation & UL_FLAGS_MASK;
509 
510 	if (flags & ULF_WAIT_CANCEL_POINT) {
511 		__pthread_testcancel(1);
512 	}
513 
514 	int ret = 0;
515 	thread_t self = current_thread();
516 	ulk_t key;
517 
518 	/* involved threads - each variable holds +1 ref if not null */
519 	thread_t owner_thread   = THREAD_NULL;
520 	thread_t old_owner      = THREAD_NULL;
521 
522 	ull_t *unused_ull = NULL;
523 
524 	if ((flags & ULF_WAIT_MASK) != flags) {
525 		ret = EINVAL;
526 		goto munge_retval;
527 	}
528 
529 	bool set_owner = false;
530 	bool xproc = false;
531 	size_t lock_size = sizeof(uint32_t);
532 	int copy_ret;
533 
534 	switch (opcode) {
535 	case UL_UNFAIR_LOCK:
536 		set_owner = true;
537 		break;
538 	case UL_COMPARE_AND_WAIT:
539 		break;
540 	case UL_COMPARE_AND_WAIT64:
541 		lock_size = sizeof(uint64_t);
542 		break;
543 	case UL_COMPARE_AND_WAIT_SHARED:
544 		xproc = true;
545 		break;
546 	case UL_COMPARE_AND_WAIT64_SHARED:
547 		xproc = true;
548 		lock_size = sizeof(uint64_t);
549 		break;
550 	default:
551 		ret = EINVAL;
552 		goto munge_retval;
553 	}
554 
555 	uint64_t value = 0;
556 
557 	if ((args->addr == 0) || (args->addr & (lock_size - 1))) {
558 		ret = EINVAL;
559 		goto munge_retval;
560 	}
561 
562 	if (xproc) {
563 		uint64_t object = 0;
564 		uint64_t offset = 0;
565 
566 		ret = uaddr_findobj(args->addr, &object, &offset);
567 		if (ret) {
568 			ret = EINVAL;
569 			goto munge_retval;
570 		}
571 		key.ulk_key_type = ULK_XPROC;
572 		key.ulk_object = object;
573 		key.ulk_offset = offset;
574 	} else {
575 		key.ulk_key_type = ULK_UADDR;
576 		key.ulk_task = proc_task(p);
577 		key.ulk_addr = args->addr;
578 	}
579 
580 	if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) {
581 		/*
582 		 * Attempt the copyin outside of the lock once,
583 		 *
584 		 * If it doesn't match (which is common), return right away.
585 		 *
586 		 * If it matches, resolve the current owner, and if it is on core,
587 		 * spin a bit waiting for the value to change. If the owner isn't on
588 		 * core, or if the value stays stable, then go on with the regular
589 		 * blocking code.
590 		 */
591 		uint64_t end = 0;
592 		uint32_t u32;
593 
594 		ret = copyin_atomic32(args->addr, &u32);
595 		if (ret || u32 != args->value) {
596 			goto munge_retval;
597 		}
598 		for (;;) {
599 			if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) {
600 				break;
601 			}
602 
603 			/* owner_thread may have a +1 starting here */
604 
605 			if (!machine_thread_on_core(owner_thread)) {
606 				break;
607 			}
608 			if (end == 0) {
609 				clock_interval_to_deadline(ulock_adaptive_spin_usecs,
610 				    NSEC_PER_USEC, &end);
611 			} else if (mach_absolute_time() > end) {
612 				break;
613 			}
614 			if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) {
615 				goto munge_retval;
616 			}
617 		}
618 	}
619 
620 	ull_t *ull = ull_get(&key, 0, &unused_ull);
621 	if (ull == NULL) {
622 		ret = ENOMEM;
623 		goto munge_retval;
624 	}
625 	/* ull is locked */
626 
627 	ull->ull_nwaiters++;
628 
629 	if (ull->ull_opcode == 0) {
630 		ull->ull_opcode = opcode;
631 	} else if (ull->ull_opcode != opcode) {
632 		ret = EDOM;
633 		goto out_locked;
634 	}
635 
636 	/*
637 	 * We don't want this copyin to get wedged behind VM operations,
638 	 * but we have to read the userspace value under the ull lock for correctness.
639 	 *
640 	 * Until <rdar://problem/24999882> exists,
641 	 * holding the ull spinlock across copyin forces any
642 	 * vm_fault we encounter to fail.
643 	 */
644 
645 	/* copyin_atomicXX always checks alignment */
646 
647 	if (lock_size == 4) {
648 		uint32_t u32;
649 		copy_ret = copyin_atomic32(args->addr, &u32);
650 		value = u32;
651 	} else {
652 		copy_ret = copyin_atomic64(args->addr, &value);
653 	}
654 
655 #if DEVELOPMENT || DEBUG
656 	/* Occasionally simulate copyin finding the user address paged out */
657 	if (((ull_simulate_copyin_fault == proc_getpid(p)) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
658 		static _Atomic int fault_inject = 0;
659 		if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) {
660 			copy_ret = EFAULT;
661 		}
662 	}
663 #endif
664 	if (copy_ret != 0) {
665 		/* copyin() will return an error if the access to the user addr would have faulted,
666 		 * so just return and let the user level code fault it in.
667 		 */
668 		ret = copy_ret;
669 		goto out_locked;
670 	}
671 
672 	if (value != args->value) {
673 		/* Lock value has changed from expected so bail out */
674 		goto out_locked;
675 	}
676 
677 	if (set_owner) {
678 		if (owner_thread == THREAD_NULL) {
679 			ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread);
680 			if (ret == EOWNERDEAD) {
681 				/*
682 				 * Translation failed - even though the lock value is up to date,
683 				 * whatever was stored in the lock wasn't actually a thread port.
684 				 */
685 				goto out_locked;
686 			}
687 			/* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
688 			ret = 0;
689 		}
690 		/* owner_thread has a +1 reference */
691 
692 		/*
693 		 * At this point, I know:
694 		 * a) owner_thread is definitely the current owner, because I just read the value
695 		 * b) owner_thread is either:
696 		 *      i) holding the user lock or
697 		 *      ii) has just unlocked the user lock after I looked
698 		 *              and is heading toward the kernel to call ull_wake.
699 		 *              If so, it's going to have to wait for the ull mutex.
700 		 *
701 		 * Therefore, I can ask the turnstile to promote its priority, and I can rely
702 		 * on it to come by later to issue the wakeup and lose its promotion.
703 		 */
704 
705 		/* Return the +1 ref from the ull_owner field */
706 		old_owner = ull->ull_owner;
707 		ull->ull_owner = THREAD_NULL;
708 
709 		if (owner_thread != THREAD_NULL) {
710 			/* The ull_owner field now owns a +1 ref on owner_thread */
711 			thread_reference(owner_thread);
712 			ull->ull_owner = owner_thread;
713 		}
714 	}
715 
716 	wait_result_t wr;
717 	uint64_t timeout = args->timeout; /* nanoseconds */
718 	uint64_t deadline = TIMEOUT_WAIT_FOREVER;
719 	wait_interrupt_t interruptible = THREAD_ABORTSAFE;
720 	struct turnstile *ts;
721 
722 	ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
723 	    TURNSTILE_NULL, TURNSTILE_ULOCK);
724 	thread_set_pending_block_hint(self, kThreadWaitUserLock);
725 
726 	if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
727 		interruptible |= THREAD_WAIT_NOREPORT;
728 	}
729 
730 	turnstile_update_inheritor(ts, owner_thread,
731 	    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
732 
733 	if (timeout) {
734 		if (flags & ULF_DEADLINE) {
735 			deadline = timeout;
736 		} else {
737 			nanoseconds_to_deadline(timeout, &deadline);
738 		}
739 	}
740 
741 	wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
742 	    interruptible, deadline);
743 
744 	if (wr == THREAD_WAITING) {
745 		uthread_t uthread = (uthread_t)get_bsdthread_info(self);
746 		uthread->uu_save.uus_ulock_wait_data.ull = ull;
747 		uthread->uu_save.uus_ulock_wait_data.retval = retval;
748 		uthread->uu_save.uus_ulock_wait_data.flags = flags;
749 		uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread;
750 		uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner;
751 	}
752 
753 	ull_unlock(ull);
754 
755 	if (unused_ull) {
756 		ull_free(unused_ull);
757 		unused_ull = NULL;
758 	}
759 
760 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
761 
762 	if (wr == THREAD_WAITING) {
763 		if (set_owner && owner_thread != THREAD_NULL) {
764 			thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE);
765 		} else {
766 			assert(owner_thread == THREAD_NULL);
767 			thread_block_parameter(ulock_wait_continue, ull);
768 		}
769 		/* NOT REACHED */
770 	}
771 
772 	ret = wait_result_to_return_code(wr);
773 
774 	ull_lock(ull);
775 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
776 
777 out_locked:
778 	ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
779 	owner_thread = NULL;
780 
781 	if (unused_ull) {
782 		ull_free(unused_ull);
783 		unused_ull = NULL;
784 	}
785 
786 	assert(*retval >= 0);
787 
788 munge_retval:
789 	if (owner_thread) {
790 		thread_deallocate(owner_thread);
791 	}
792 	if (ret == ESTALE) {
793 		ret = 0;
794 	}
795 	if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
796 		*retval = -ret;
797 		ret = 0;
798 	}
799 	return ret;
800 }
801 
802 /*
803  * Must be called with ull_lock held
804  */
805 static void
ulock_wait_cleanup(ull_t * ull,thread_t owner_thread,thread_t old_owner,int32_t * retval)806 ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval)
807 {
808 	ull_assert_owned(ull);
809 
810 	thread_t old_lingering_owner = THREAD_NULL;
811 
812 	*retval = --ull->ull_nwaiters;
813 	if (ull->ull_nwaiters == 0) {
814 		/*
815 		 * If the wait was canceled early, we might need to
816 		 * clear out the lingering owner reference before
817 		 * freeing the ull.
818 		 */
819 		old_lingering_owner = ull->ull_owner;
820 		ull->ull_owner = THREAD_NULL;
821 
822 		memset(&ull->ull_key, 0, sizeof ull->ull_key);
823 		ull->ull_refcount--;
824 		assert(ull->ull_refcount > 0);
825 	}
826 	ull_put(ull);
827 
828 	/* Need to be called after dropping the interlock */
829 	turnstile_cleanup();
830 
831 	if (owner_thread != THREAD_NULL) {
832 		thread_deallocate(owner_thread);
833 	}
834 
835 	if (old_owner != THREAD_NULL) {
836 		thread_deallocate(old_owner);
837 	}
838 
839 	if (old_lingering_owner != THREAD_NULL) {
840 		thread_deallocate(old_lingering_owner);
841 	}
842 
843 	assert(*retval >= 0);
844 }
845 
846 __attribute__((noreturn))
847 static void
ulock_wait_continue(__unused void * parameter,wait_result_t wr)848 ulock_wait_continue(__unused void * parameter, wait_result_t wr)
849 {
850 	uthread_t uthread = current_uthread();
851 	int ret = 0;
852 
853 	ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull;
854 	int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval;
855 	uint flags = uthread->uu_save.uus_ulock_wait_data.flags;
856 	thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread;
857 	thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner;
858 
859 	ret = wait_result_to_return_code(wr);
860 
861 	ull_lock(ull);
862 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
863 
864 	ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
865 
866 	if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
867 		*retval = -ret;
868 		ret = 0;
869 	}
870 
871 	unix_syscall_return(ret);
872 }
873 
874 int
sys_ulock_wake(struct proc * p,struct ulock_wake_args * args,int32_t * retval)875 sys_ulock_wake(struct proc *p, struct ulock_wake_args *args, int32_t *retval)
876 {
877 	int ret = 0;
878 #if DEVELOPMENT || DEBUG
879 	uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
880 
881 	if (opcode == UL_DEBUG_HASH_DUMP_PID) {
882 		*retval = ull_hash_dump(proc_task(p));
883 		return ret;
884 	} else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
885 		*retval = ull_hash_dump(TASK_NULL);
886 		return ret;
887 	} else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
888 		ull_simulate_copyin_fault = (int)(args->wake_value);
889 		return ret;
890 	}
891 #endif
892 	ret = ulock_wake(proc_task(p), args->operation, args->addr, args->wake_value);
893 
894 	if ((args->operation & ULF_NO_ERRNO) && (ret != 0)) {
895 		*retval = -ret;
896 		ret = 0;
897 	}
898 
899 	return ret;
900 }
901 
902 int
ulock_wake(task_t task,uint32_t operation,user_addr_t addr,uint64_t wake_value)903 ulock_wake(task_t task, uint32_t operation, user_addr_t addr, uint64_t wake_value)
904 {
905 	uint8_t opcode = (uint8_t)(operation & UL_OPCODE_MASK);
906 	uint flags = operation & UL_FLAGS_MASK;
907 	int ret = 0;
908 	ulk_t key;
909 
910 	/* involved threads - each variable holds +1 ref if not null */
911 	thread_t wake_thread    = THREAD_NULL;
912 
913 	bool set_owner = false;
914 	bool allow_non_owner = false;
915 	bool xproc = false;
916 
917 	switch (opcode) {
918 	case UL_UNFAIR_LOCK:
919 		set_owner = true;
920 		break;
921 	case UL_COMPARE_AND_WAIT:
922 	case UL_COMPARE_AND_WAIT64:
923 		break;
924 	case UL_COMPARE_AND_WAIT_SHARED:
925 	case UL_COMPARE_AND_WAIT64_SHARED:
926 		xproc = true;
927 		break;
928 	default:
929 		ret = EINVAL;
930 		goto munge_retval;
931 	}
932 
933 	if ((flags & ULF_WAKE_MASK) != flags) {
934 		ret = EINVAL;
935 		goto munge_retval;
936 	}
937 
938 	if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) {
939 		ret = EINVAL;
940 		goto munge_retval;
941 	}
942 
943 	if (flags & ULF_WAKE_ALLOW_NON_OWNER) {
944 		if (!set_owner) {
945 			ret = EINVAL;
946 			goto munge_retval;
947 		}
948 
949 		allow_non_owner = true;
950 	}
951 
952 	if (addr == 0) {
953 		ret = EINVAL;
954 		goto munge_retval;
955 	}
956 
957 	if (xproc) {
958 		uint64_t object = 0;
959 		uint64_t offset = 0;
960 
961 		ret = uaddr_findobj(addr, &object, &offset);
962 		if (ret) {
963 			ret = EINVAL;
964 			goto munge_retval;
965 		}
966 		key.ulk_key_type = ULK_XPROC;
967 		key.ulk_object = object;
968 		key.ulk_offset = offset;
969 	} else {
970 		key.ulk_key_type = ULK_UADDR;
971 		key.ulk_task = task;
972 		key.ulk_addr = addr;
973 	}
974 
975 	if (flags & ULF_WAKE_THREAD) {
976 		mach_port_name_t wake_thread_name = (mach_port_name_t)(wake_value);
977 		wake_thread = port_name_to_thread(wake_thread_name,
978 		    PORT_INTRANS_THREAD_IN_CURRENT_TASK |
979 		    PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
980 		if (wake_thread == THREAD_NULL) {
981 			ret = ESRCH;
982 			goto munge_retval;
983 		}
984 	}
985 
986 	ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL);
987 	thread_t new_owner = THREAD_NULL;
988 	struct turnstile *ts = TURNSTILE_NULL;
989 	thread_t cleanup_thread = THREAD_NULL;
990 
991 	if (ull == NULL) {
992 		ret = ENOENT;
993 		goto munge_retval;
994 	}
995 	/* ull is locked */
996 
997 	if (opcode != ull->ull_opcode) {
998 		ret = EDOM;
999 		goto out_ull_put;
1000 	}
1001 
1002 	if (set_owner) {
1003 		if ((ull->ull_owner != current_thread()) && !allow_non_owner) {
1004 			/*
1005 			 * If the current thread isn't the known owner,
1006 			 * then this wake call was late to the party,
1007 			 * and the kernel already knows who owns the lock.
1008 			 *
1009 			 * This current owner already knows the lock is contended
1010 			 * and will redrive wakes, just bail out.
1011 			 */
1012 			goto out_ull_put;
1013 		}
1014 	} else {
1015 		assert(ull->ull_owner == THREAD_NULL);
1016 	}
1017 
1018 	ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
1019 	    TURNSTILE_NULL, TURNSTILE_ULOCK);
1020 	assert(ts != TURNSTILE_NULL);
1021 
1022 	if (flags & ULF_WAKE_THREAD) {
1023 		kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq,
1024 		    CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1025 		    wake_thread, THREAD_AWAKENED);
1026 		if (kr != KERN_SUCCESS) {
1027 			assert(kr == KERN_NOT_WAITING);
1028 			ret = EALREADY;
1029 		}
1030 	} else if (flags & ULF_WAKE_ALL) {
1031 		waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1032 		    THREAD_AWAKENED,
1033 		    set_owner ? WAITQ_UPDATE_INHERITOR : WAITQ_WAKEUP_DEFAULT);
1034 	} else if (set_owner) {
1035 		/*
1036 		 * The turnstile waitq is priority ordered,
1037 		 * and will wake up the highest priority waiter
1038 		 * and set it as the inheritor for us.
1039 		 */
1040 		new_owner = waitq_wakeup64_identify(&ts->ts_waitq,
1041 		    CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1042 		    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
1043 	} else {
1044 		waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1045 		    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1046 	}
1047 
1048 	if (set_owner) {
1049 		turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1050 		cleanup_thread = ull->ull_owner;
1051 		ull->ull_owner = new_owner;
1052 	}
1053 
1054 	turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
1055 
1056 out_ull_put:
1057 	ull_put(ull);
1058 
1059 	if (ts != TURNSTILE_NULL) {
1060 		/* Need to be called after dropping the interlock */
1061 		turnstile_cleanup();
1062 	}
1063 
1064 	if (cleanup_thread != THREAD_NULL) {
1065 		thread_deallocate(cleanup_thread);
1066 	}
1067 
1068 munge_retval:
1069 	if (wake_thread != THREAD_NULL) {
1070 		thread_deallocate(wake_thread);
1071 	}
1072 
1073 	return ret;
1074 }
1075 
1076 void
kdp_ulock_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)1077 kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1078 {
1079 	ull_t *ull = EVENT_TO_ULOCK(event);
1080 
1081 	zone_require(ull_zone->kt_zv.zv_zone, ull);
1082 
1083 	switch (ull->ull_opcode) {
1084 	case UL_UNFAIR_LOCK:
1085 	case UL_UNFAIR_LOCK64_SHARED:
1086 		waitinfo->owner   = thread_tid(ull->ull_owner);
1087 		waitinfo->context = ull->ull_key.ulk_addr;
1088 		break;
1089 	case UL_COMPARE_AND_WAIT:
1090 	case UL_COMPARE_AND_WAIT64:
1091 	case UL_COMPARE_AND_WAIT_SHARED:
1092 	case UL_COMPARE_AND_WAIT64_SHARED:
1093 		waitinfo->owner   = 0;
1094 		waitinfo->context = ull->ull_key.ulk_addr;
1095 		break;
1096 	default:
1097 		panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
1098 		break;
1099 	}
1100 	return;
1101 }
1102