xref: /xnu-11215.81.4/osfmk/kern/epoch_sync.c (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <kern/assert.h>
30 #include <kern/epoch_sync.h>
31 #include <kern/kalloc.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/turnstile.h>
35 
36 #include <os/atomic.h>
37 #include <os/hash.h>
38 #include <os/overflow.h>
39 
40 #include <sys/kdebug.h>
41 
42 #include <stdint.h>
43 #include <stdbool.h>
44 
45 #include <kern/block_hint.h>
46 
47 #define ES_INVALID_ID UINT64_MAX
48 
49 /*
50  * Combine space and ID into a unique identifier.
51  * The ID passed in must leave the top byte clear.
52  */
53 #define ES_UNIQUE_ID(space, id) ({                                        \
54 	assert3u(id >> 56, ==, 0); /* IDs must have the top byte clear */ \
55 	(id | ((uint64_t)space << 56));                                   \
56 })
57 
58 static LCK_GRP_DECLARE(esync_lckgrp, "esync");
59 os_refgrp_decl(static, esync_refgrp, "esync", NULL);
60 
61 typedef struct {
62 	uint64_t          es_id;            /* Synchronization ID. */
63 	struct turnstile *es_turnstile;     /* Associated turnstile. */
64 	esync_policy_t    es_policy;        /* Determines turnstile policy. */
65 	lck_spin_t        es_lock;          /* Interlock. */
66 	os_refcnt_t       es_refcnt;        /* Reference count for lifecycle. */
67 	queue_chain_t     es_link;          /* Link for hash table. */
68 } esync_t;
69 
70 #pragma mark - Hash Table Implementation -
71 
72 static LCK_GRP_DECLARE(ht_lck_grp, "ht");
73 
74 typedef struct {
75 	queue_head_t  htb_head;
76 	lck_spin_t    htb_lock;
77 } ht_bucket_t;
78 
79 typedef struct ht {
80 	const uint32_t  ht_size;
81 	ht_bucket_t    *ht_bucket;
82 } ht_t;
83 
84 /*
85  * Eventually it would be better to have "clients" just dynamically allocate
86  * these as needed and not only support two static ID spaces.
87  */
88 
89 #define NBUCKETS_QUEUE 512
90 static ht_t exclaves_queue_ht = {
91 	.ht_size = NBUCKETS_QUEUE,
92 	.ht_bucket = &(ht_bucket_t[NBUCKETS_QUEUE]){}[0],
93 };
94 
95 #define NBUCKETS_THREAD 64
96 static ht_t exclaves_thread_ht = {
97 	.ht_size = NBUCKETS_THREAD,
98 	.ht_bucket = &(ht_bucket_t[NBUCKETS_THREAD]){}[0],
99 };
100 
101 #if DEVELOPMENT || DEBUG
102 
103 #define NBUCKETS_TEST 8
104 static ht_t esync_test_ht = {
105 	.ht_size = NBUCKETS_TEST,
106 	.ht_bucket = &(ht_bucket_t[NBUCKETS_TEST]){}[0],
107 };
108 #endif /* DEVELOPMENT || DEBUG */
109 
110 
111 static ht_t *
space_to_ht(esync_space_t space)112 space_to_ht(esync_space_t space)
113 {
114 	switch (space) {
115 	case ESYNC_SPACE_EXCLAVES_Q:
116 		return &exclaves_queue_ht;
117 
118 	case ESYNC_SPACE_EXCLAVES_T:
119 		return &exclaves_thread_ht;
120 
121 #if DEVELOPMENT || DEBUG
122 	case ESYNC_SPACE_TEST:
123 		return &esync_test_ht;
124 #endif /* DEVELOPMENT || DEBUG */
125 
126 	default:
127 		panic("unknown epoch sync space");
128 	}
129 }
130 
131 static __startup_func void
ht_startup_init(ht_t * ht)132 ht_startup_init(ht_t *ht)
133 {
134 	for (uint32_t i = 0; i < ht->ht_size; i++) {
135 		queue_init(&ht->ht_bucket[i].htb_head);
136 		lck_spin_init(&ht->ht_bucket[i].htb_lock, &ht_lck_grp, NULL);
137 	}
138 }
139 STARTUP_ARG(LOCKS, STARTUP_RANK_LAST, ht_startup_init, &exclaves_queue_ht);
140 STARTUP_ARG(LOCKS, STARTUP_RANK_LAST, ht_startup_init, &exclaves_thread_ht);
141 
142 static inline ht_bucket_t *
ht_get_bucket(ht_t * ht,const uint64_t key)143 ht_get_bucket(ht_t *ht, const uint64_t key)
144 {
145 	assert3u((ht->ht_size & (ht->ht_size - 1)), ==, 0);
146 
147 	const uint32_t idx = os_hash_jenkins(&key, sizeof(key)) & (ht->ht_size - 1);
148 	return &ht->ht_bucket[idx];
149 }
150 
151 static esync_t *
ht_put(ht_t * ht,const uint64_t key,esync_t * new_value)152 ht_put(ht_t *ht, const uint64_t key, esync_t *new_value)
153 {
154 	/* 'new_value' shouldn't be part of an existing queue. */
155 	assert3p(new_value->es_link.next, ==, NULL);
156 	assert3p(new_value->es_link.prev, ==, NULL);
157 
158 	ht_bucket_t *bucket = ht_get_bucket(ht, key);
159 
160 	lck_spin_lock_grp(&bucket->htb_lock, &ht_lck_grp);
161 
162 	esync_t *value = NULL;
163 	esync_t *elem = NULL;
164 	qe_foreach_element(elem, &bucket->htb_head, es_link) {
165 		if (elem->es_id != key) {
166 			continue;
167 		}
168 
169 		lck_spin_lock_grp(&elem->es_lock, &esync_lckgrp);
170 		if (elem->es_id == key) {
171 			value = elem;
172 			break;
173 		}
174 		lck_spin_unlock(&elem->es_lock);
175 	}
176 
177 	if (value == NULL) {
178 		value = new_value;
179 		lck_spin_lock_grp(&value->es_lock, &esync_lckgrp);
180 		enqueue(&bucket->htb_head, &value->es_link);
181 	}
182 
183 	lck_spin_unlock(&bucket->htb_lock);
184 
185 	return value;
186 }
187 
188 static void
ht_remove(ht_t * ht,const uint64_t key,esync_t * value)189 ht_remove(ht_t *ht, const uint64_t key, esync_t *value)
190 {
191 	ht_bucket_t *bucket = ht_get_bucket(ht, key);
192 
193 	lck_spin_lock_grp(&bucket->htb_lock, &ht_lck_grp);
194 	remqueue(&value->es_link);
195 	lck_spin_unlock(&bucket->htb_lock);
196 
197 	assert3p(value->es_link.next, ==, NULL);
198 	assert3p(value->es_link.prev, ==, NULL);
199 }
200 
201 static esync_t *
ht_get(ht_t * ht,const uint64_t key)202 ht_get(ht_t *ht, const uint64_t key)
203 {
204 	ht_bucket_t *bucket = ht_get_bucket(ht, key);
205 
206 	lck_spin_lock_grp(&bucket->htb_lock, &ht_lck_grp);
207 
208 	esync_t *value = NULL;
209 	esync_t *elem = NULL;
210 	qe_foreach_element(elem, &bucket->htb_head, es_link) {
211 		if (elem->es_id != key) {
212 			continue;
213 		}
214 
215 		lck_spin_lock_grp(&elem->es_lock, &esync_lckgrp);
216 		if (elem->es_id == key) {
217 			value = elem;
218 			break;
219 		}
220 		lck_spin_unlock(&elem->es_lock);
221 	}
222 
223 	lck_spin_unlock(&bucket->htb_lock);
224 
225 	return value;
226 }
227 
228 #pragma mark - Epoch Sync Implementation -
229 
230 /*
231  * Allocate a backing object.
232  */
233 static esync_t *
esync_alloc(const uint64_t id,const esync_policy_t policy)234 esync_alloc(const uint64_t id, const esync_policy_t policy)
235 {
236 	assert3u(id, !=, ES_INVALID_ID);
237 
238 	esync_t *sync = kalloc_type(esync_t, Z_WAITOK | Z_ZERO | Z_NOFAIL);
239 	assert3p(sync, !=, NULL);
240 
241 	sync->es_id = id;
242 	sync->es_turnstile = TURNSTILE_NULL;
243 	sync->es_policy = policy;
244 
245 	lck_spin_init(&sync->es_lock, &esync_lckgrp, NULL);
246 
247 	os_ref_init_count(&sync->es_refcnt, &esync_refgrp, 1);
248 
249 	return sync;
250 }
251 
252 /*
253  * Free a backing object.
254  */
255 static void
esync_free(esync_t * sync)256 esync_free(esync_t *sync)
257 {
258 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_NOTOWNED);
259 	assert3p(sync->es_turnstile, ==, TURNSTILE_NULL);
260 	assert3u(os_ref_get_count(&sync->es_refcnt), ==, 0);
261 
262 	lck_spin_destroy(&sync->es_lock, &esync_lckgrp);
263 
264 	kfree_type(esync_t, sync);
265 }
266 
267 /*
268  * Stop using 'sync'. Drop the ref count and possibly remove it from the hash
269  * table and free it.  Free up an unused entry if not NULL.
270  * Called with the object locked.
271  */
272 static void
esync_put(ht_t * ht,esync_t * sync,esync_t * to_be_freed)273 esync_put(ht_t *ht, esync_t *sync, esync_t *to_be_freed)
274 {
275 	os_ref_count_t cnt = 0;
276 
277 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_OWNED);
278 
279 	/* The last owner will remove it from the hash table. */
280 	cnt = os_ref_get_count(&sync->es_refcnt);
281 	if (cnt == 2) {
282 		/*
283 		 * Make sure no other thread will match it during the window
284 		 * where the lock is dropped but before it's been removed from
285 		 * the hash table (lookups are protected by es_lock as called
286 		 * from esync_acquire).
287 		 */
288 		const uint64_t id = sync->es_id;
289 		sync->es_id = ES_INVALID_ID;
290 		lck_spin_unlock(&sync->es_lock);
291 
292 		ht_remove(ht, id, sync);
293 
294 		/* Drop the ref associated with the hash table. */
295 		(void) os_ref_release(&sync->es_refcnt);
296 
297 		/* Drop the final refcnt and free it. */
298 		cnt = os_ref_release(&sync->es_refcnt);
299 		assert3u(cnt, ==, 0);
300 
301 		/*
302 		 * Before freeing (and potentially taking another lock), call
303 		 * turnstile_cleanup().
304 		 */
305 		turnstile_cleanup();
306 		esync_free(sync);
307 	} else {
308 		cnt = os_ref_release_locked(&sync->es_refcnt);
309 		assert3u(cnt, >=, 2);
310 		lck_spin_unlock(&sync->es_lock);
311 		turnstile_cleanup();
312 	}
313 
314 	/* An unused entry, free it. */
315 	if (to_be_freed != NULL) {
316 		cnt = os_ref_release(&to_be_freed->es_refcnt);
317 		assert3u(cnt, ==, 0);
318 		esync_free(to_be_freed);
319 	}
320 }
321 
322 /*
323  * Get an object associated with 'id'. If there isn't one already, allocate one
324  * and insert it.
325  * Returns with the object locked and a +1 on the refcount.
326  */
327 static esync_t *
esync_get(ht_t * ht,const uint64_t id,const esync_policy_t policy,esync_t ** const to_be_freed)328 esync_get(ht_t *ht, const uint64_t id, const esync_policy_t policy,
329     esync_t **const to_be_freed)
330 {
331 	esync_t *new = esync_alloc(id, policy);
332 	esync_t *sync = ht_put(ht, id, new);
333 
334 	/*
335 	 * See if the newly allocated entry was inserted. If so, then there's
336 	 * nothing extra to clean up later (in case cleanup is needed, it must
337 	 * be done later as the spinlock is held at this point).
338 	 * ht_put consumes the refcount of new if the entry was inserted.
339 	 */
340 	*to_be_freed = (sync != new) ? new : NULL;
341 
342 	/*
343 	 * The policy of the sync object should always match. i.e. the
344 	 * consumer of the esync interfaces must guarantee that all waiters use
345 	 * the same policy.
346 	 */
347 	assert3u(sync->es_policy, ==, policy);
348 
349 	os_ref_retain_locked(&sync->es_refcnt);
350 
351 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_OWNED);
352 	return sync;
353 }
354 
355 /*
356  * Update the epoch counter with a new epoch.
357  * Returns true if the epoch was newer or equal to the existing epoch.
358  */
359 static bool
esync_update_epoch(const uint64_t epoch,os_atomic (uint64_t)* counter)360 esync_update_epoch(const uint64_t epoch, os_atomic(uint64_t) *counter)
361 {
362 	uint64_t old, new;
363 
364 	return os_atomic_rmw_loop(counter, old, new, acq_rel, {
365 		if (old > epoch) {
366 		        os_atomic_rmw_loop_give_up();
367 		}
368 		new = epoch;
369 	}) == 1;
370 }
371 
372 
373 static void
esync_wait_set_block_hint(const esync_space_t space,const esync_policy_t policy)374 esync_wait_set_block_hint(const esync_space_t space, const esync_policy_t policy)
375 {
376 	thread_t self = current_thread();
377 
378 	switch (space) {
379 	case ESYNC_SPACE_EXCLAVES_Q:
380 	case ESYNC_SPACE_EXCLAVES_T:
381 		switch (policy) {
382 		case ESYNC_POLICY_KERNEL:
383 			thread_set_pending_block_hint(self, kThreadWaitExclaveCore);
384 			return;
385 		case ESYNC_POLICY_USER:
386 			thread_set_pending_block_hint(self, kThreadWaitExclaveKit);
387 			return;
388 		default:
389 			return;
390 		}
391 	default:
392 		return;
393 	}
394 }
395 
396 void
kdp_esync_find_owner(struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)397 kdp_esync_find_owner(struct waitq *waitq, event64_t event,
398     thread_waitinfo_t *waitinfo)
399 {
400 	const esync_t *esync = (esync_t *)event;
401 	waitinfo->context = esync->es_id;
402 
403 	if (waitq_held(waitq)) {
404 		return;
405 	}
406 
407 	const struct turnstile *turnstile = waitq_to_turnstile(waitq);
408 	waitinfo->owner = thread_tid(turnstile->ts_inheritor);
409 }
410 
411 /*
412  * Block until esync_wake() is called on this id.
413  * The epoch is incremented by the client on wakes. If the epoch is stale, then
414  * don't block and return immediately.
415  * Can allocate a new epoch synchronization object if needed.
416  * Will only use "owner" if the epoch is fresh.
417  */
418 wait_result_t
esync_wait(esync_space_t space,const uint64_t id,const uint64_t epoch,os_atomic (uint64_t)* counter,const ctid_t owner_ctid,const esync_policy_t policy,const wait_interrupt_t interruptible)419 esync_wait(esync_space_t space, const uint64_t id, const uint64_t epoch,
420     os_atomic(uint64_t) *counter, const ctid_t owner_ctid,
421     const esync_policy_t policy, const wait_interrupt_t interruptible)
422 {
423 	assert3u(id, !=, ES_INVALID_ID);
424 	const uint64_t unique_id = ES_UNIQUE_ID(space, id);
425 
426 	ht_t *ht = space_to_ht(space);
427 
428 	esync_t *to_be_freed = NULL;
429 	esync_t *sync = esync_get(ht, unique_id, policy, &to_be_freed);
430 
431 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_OWNED);
432 
433 	const bool fresh_epoch = esync_update_epoch(epoch, counter);
434 	if (!fresh_epoch) {
435 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
436 		    MACH_EPOCH_SYNC_WAIT_STALE), unique_id, epoch, NULL);
437 		esync_put(ht, sync, to_be_freed);
438 		return THREAD_NOT_WAITING;
439 	}
440 
441 	/*
442 	 * It is safe to lookup the thread from the ctid here as the lock is
443 	 * held and the epoch is fresh.
444 	 * ctid and hence tid may be 0.
445 	 */
446 	thread_t owner_thread = ctid_get_thread(owner_ctid);
447 	uint64_t tid = thread_tid(owner_thread);
448 
449 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC, MACH_EPOCH_SYNC_WAIT) |
450 	    DBG_FUNC_START, unique_id, epoch, tid);
451 
452 	assert(sync->es_policy == ESYNC_POLICY_KERNEL ||
453 	    sync->es_policy == ESYNC_POLICY_USER);
454 	turnstile_type_t tt = sync->es_policy == ESYNC_POLICY_KERNEL ?
455 	    TURNSTILE_EPOCH_KERNEL : TURNSTILE_EPOCH_USER;
456 	struct turnstile *ts = turnstile_prepare((uintptr_t)sync,
457 	    &sync->es_turnstile, TURNSTILE_NULL, tt);
458 
459 	/*
460 	 * owner_thread may not be set, that's fine, the inheritor will be
461 	 * cleared.
462 	 */
463 	turnstile_update_inheritor(ts, owner_thread,
464 	    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
465 
466 	esync_wait_set_block_hint(space, policy);
467 
468 	wait_result_t wr = waitq_assert_wait64(&ts->ts_waitq,
469 	    CAST_EVENT64_T(sync), interruptible, TIMEOUT_WAIT_FOREVER);
470 
471 	lck_spin_unlock(&sync->es_lock);
472 
473 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
474 
475 	if (wr == THREAD_WAITING) {
476 		wr = thread_block(THREAD_CONTINUE_NULL);
477 	}
478 
479 	lck_spin_lock(&sync->es_lock);
480 
481 	turnstile_complete((uintptr_t)sync, &sync->es_turnstile, NULL, tt);
482 
483 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC, MACH_EPOCH_SYNC_WAIT) |
484 	    DBG_FUNC_END, wr);
485 
486 	/* Drops the lock, refcount and possibly frees sync. */
487 	esync_put(ht, sync, to_be_freed);
488 
489 	return wr;
490 }
491 
492 /*
493  * Wake up a waiter. Pre-posted wakes (wakes which happen when there is no
494  * active waiter) just return. The epoch is always updated.
495  */
496 kern_return_t
esync_wake(esync_space_t space,const uint64_t id,const uint64_t epoch,os_atomic (uint64_t)* counter,const esync_wake_mode_t mode,const ctid_t ctid)497 esync_wake(esync_space_t space, const uint64_t id, const uint64_t epoch,
498     os_atomic(uint64_t) *counter, const esync_wake_mode_t mode,
499     const ctid_t ctid)
500 {
501 	assert3u(id, !=, ES_INVALID_ID);
502 	assert(
503 		mode == ESYNC_WAKE_ONE ||
504 		mode == ESYNC_WAKE_ALL ||
505 		mode == ESYNC_WAKE_ONE_WITH_OWNER ||
506 		mode == ESYNC_WAKE_THREAD);
507 
508 	ht_t *ht = space_to_ht(space);
509 	const uint64_t unique_id = ES_UNIQUE_ID(space, id);
510 
511 	kern_return_t kr = KERN_FAILURE;
512 
513 	/*
514 	 * Update the epoch regardless of whether there's a waiter or not. (If
515 	 * there's no waiter, there will be no sync object).
516 	 * The epoch is read by waiters under the object lock to ensure that it
517 	 * doesn't miss a wake.
518 	 */
519 	(void) esync_update_epoch(epoch, counter);
520 
521 	esync_t *sync = ht_get(ht, unique_id);
522 	if (sync == NULL) {
523 		/* Drop pre-posted WAKEs. */
524 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
525 		    MACH_EPOCH_SYNC_WAKE_NO_WAITERS), unique_id,
526 		    epoch, mode);
527 		return KERN_NOT_WAITING;
528 	}
529 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_OWNED);
530 
531 	os_ref_retain_locked(&sync->es_refcnt);
532 
533 	assert(sync->es_policy == ESYNC_POLICY_KERNEL ||
534 	    sync->es_policy == ESYNC_POLICY_USER);
535 	turnstile_type_t tt = sync->es_policy == ESYNC_POLICY_KERNEL ?
536 	    TURNSTILE_EPOCH_KERNEL : TURNSTILE_EPOCH_USER;
537 	struct turnstile *ts = turnstile_prepare((uintptr_t)sync,
538 	    &sync->es_turnstile, TURNSTILE_NULL, tt);
539 
540 	/* ctid and hence tid may be 0. */
541 	thread_t thread = ctid_get_thread(ctid);
542 	uint64_t tid = thread_tid(thread);
543 
544 	switch (mode) {
545 	case ESYNC_WAKE_ONE:
546 		/* The woken thread is the new inheritor. */
547 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
548 		    MACH_EPOCH_SYNC_WAKE_ONE), unique_id, epoch);
549 		kr = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(sync),
550 		    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
551 		break;
552 
553 	case ESYNC_WAKE_ALL:
554 		/* The inheritor is cleared. */
555 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
556 		    MACH_EPOCH_SYNC_WAKE_ALL), unique_id, epoch);
557 		kr = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(sync),
558 		    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
559 		break;
560 
561 	case ESYNC_WAKE_ONE_WITH_OWNER:
562 		/* The specified thread is the new inheritor (may be NULL). */
563 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
564 		    MACH_EPOCH_SYNC_WAKE_ONE_WITH_OWNER), unique_id, epoch, tid);
565 		kr = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(sync),
566 		    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
567 		turnstile_update_inheritor(ts, thread,
568 		    TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
569 		break;
570 
571 	case ESYNC_WAKE_THREAD:
572 		/* No new inheritor. Wake the specified thread (if waiting). */
573 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
574 		    MACH_EPOCH_SYNC_WAKE_THREAD), unique_id, epoch, tid);
575 		kr = waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T(sync),
576 		    thread, WAITQ_WAKEUP_DEFAULT);
577 	}
578 
579 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
580 
581 	turnstile_complete((uintptr_t)sync, &sync->es_turnstile, NULL, tt);
582 
583 	/* Drops the lock, refcount and possibly frees sync. */
584 	esync_put(ht, sync, NULL);
585 
586 	assert(kr == KERN_SUCCESS || kr == KERN_NOT_WAITING);
587 	return kr;
588 }
589 
590 #if DEVELOPMENT || DEBUG
591 
592 #pragma mark - Tests -
593 
594 /* For SYSCTL_TEST_REGISTER. */
595 #include <kern/startup.h>
596 
597 /*
598  * Delay for a random amount up to ~1/2ms.
599  */
600 static void
random_delay(void)601 random_delay(void)
602 {
603 	extern void read_random(void* buffer, u_int numBytes);
604 	uint64_t random = 0;
605 	read_random(&random, sizeof(random));
606 	delay(random % 512);
607 }
608 
609 /*
610  * Basic mutex-like primitive to test the epoch synchronization primitives.
611  */
612 
613 /* Counter for the "client"-side. */
614 static os_atomic(uint64_t) client_counter = 0;
615 
616 /* Counter for the "server"-side. */
617 static os_atomic(uint64_t) server_counter = 0;
618 
619 /*
620  * The lock object stores 0 when not held and the thread's CTID when held.
621  * If there's an active waiter, bit 32 is set.
622  */
623 #define OWNER(x) ((x) & ((1ull << 32) - 1))
624 #define WAITER_BIT (1ull << 32)
625 
626 /* The "mutex" itself. */
627 static uint64_t test_mutex;
628 
629 
630 STARTUP_ARG(LOCKS, STARTUP_RANK_LAST, ht_startup_init, &esync_test_ht);
631 
632 /*
633  * Grab the lock.
634  * If already held, set a waiters bit and call esync_wait.
635  * On acquisition, if there are still waiters, set the waiters bit when taking
636  * the lock.
637  */
638 static void
test_lock(uint64_t * lock)639 test_lock(uint64_t *lock)
640 {
641 	/* Counter to keep track of the number of active waiters. */
642 	static os_atomic(uint32_t) test_waiter_count = 0;
643 
644 	const ctid_t ctid = thread_get_ctid(current_thread());
645 	uint64_t old = 0;
646 	uint64_t new = ctid;
647 
648 	while (true) {
649 		/* Try to grab the lock. */
650 		if (os_atomic_cmpxchgv(lock, 0, new, &old, relaxed) == 1) {
651 			return;
652 		}
653 
654 		/* Failed to grab the lock, add a waiter bit and wait. */
655 		do {
656 			uint64_t epoch = os_atomic_load(&client_counter, acquire);
657 
658 			if (os_atomic_cmpxchgv(lock, old, old | WAITER_BIT, &old, relaxed) == 1) {
659 				os_atomic_inc(&test_waiter_count, acq_rel);
660 
661 				random_delay();
662 				const wait_result_t wr = esync_wait(ESYNC_SPACE_TEST, (uint32_t)(uintptr_t)lock, epoch,
663 				    &server_counter, OWNER(old), ESYNC_POLICY_KERNEL, THREAD_UNINT);
664 				assert(wr == THREAD_NOT_WAITING || wr == THREAD_AWAKENED);
665 				random_delay();
666 
667 				/*
668 				 * When acquiring the lock, if there are waiters make sure to
669 				 * set the waiters bit.
670 				 */
671 				new = ctid;
672 				if (os_atomic_dec(&test_waiter_count, acq_rel) != 0) {
673 					new |= WAITER_BIT;
674 				}
675 				break;
676 			}
677 		} while (old != 0);
678 	}
679 }
680 
681 /*
682  * Drop the lock.
683  */
684 static void
test_unlock(uint64_t * lock)685 test_unlock(uint64_t *lock)
686 {
687 	const ctid_t ctid = thread_get_ctid(current_thread());
688 
689 	/* Drop the lock. */
690 	uint64_t old = os_atomic_xchg(lock, 0, relaxed);
691 	assert3u(OWNER(old), ==, ctid);
692 
693 	uint64_t epoch = os_atomic_inc(&client_counter, release);
694 
695 	if ((old & WAITER_BIT) != 0) {
696 		random_delay();
697 		(void) esync_wake(ESYNC_SPACE_TEST, (uint32_t)(uintptr_t)lock, epoch,
698 		    &server_counter, ESYNC_WAKE_ONE, 0);
699 		random_delay();
700 	}
701 }
702 
703 
704 /* Count to keep track of completed test threads. */
705 static os_atomic(uint64_t) test_complete_count = 0;
706 
707 static void
test_lock_unlock(__unused void * arg,__unused int a)708 test_lock_unlock(__unused void *arg, __unused int a)
709 {
710 	for (int c = 0; c < 10; c++) {
711 		test_lock(&test_mutex);
712 		random_delay();
713 		test_unlock(&test_mutex);
714 	}
715 
716 	os_atomic_inc(&test_complete_count, relaxed);
717 }
718 
719 static LCK_MTX_DECLARE(esync_test_mtx, &esync_lckgrp);
720 
721 /* Wait then wake. */
722 static int
esync_test(int64_t count,int64_t * out)723 esync_test(int64_t count, int64_t *out)
724 {
725 	kern_return_t ret;
726 	thread_t *thread = kalloc_type(thread_t, count,
727 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
728 
729 	printf("%s: STARTING\n", __func__);
730 
731 	lck_mtx_lock(&esync_test_mtx);
732 
733 	for (int64_t i = 0; i < count; i++) {
734 		ret = kernel_thread_start_priority(test_lock_unlock, NULL,
735 		    BASEPRI_DEFAULT, &thread[i]);
736 		assert3u(ret, ==, KERN_SUCCESS);
737 	}
738 
739 	/* Wait for completion. */
740 	while (test_complete_count != count) {
741 		delay(100000);
742 	}
743 
744 	os_atomic_store(&test_complete_count, 0, relaxed);
745 
746 	/* Drop the thread refs. */
747 	for (int i = 0; i < count; i++) {
748 		thread_deallocate(thread[i]);
749 	}
750 
751 	os_atomic_store(&server_counter, 0, relaxed);
752 	os_atomic_store(&client_counter, 0, relaxed);
753 
754 	lck_mtx_unlock(&esync_test_mtx);
755 
756 	printf("%s: SUCCESS\n", __func__);
757 
758 	kfree_type(thread_t, count, thread);
759 
760 	*out = 1;
761 
762 	return 0;
763 }
764 
765 SYSCTL_TEST_REGISTER(esync_test, esync_test);
766 
767 /*
768  * Block the caller on an interruptible wait. The thread must be terminated in
769  * order for this test to return.
770  */
771 static int
esync_test_wait(__unused int64_t in,__unused int64_t * out)772 esync_test_wait(__unused int64_t in, __unused int64_t *out)
773 {
774 	os_atomic(uint64_t) counter = 0;
775 
776 	printf("%s: STARTING\n", __func__);
777 
778 	wait_result_t wr = esync_wait(ESYNC_SPACE_TEST, 0, 0, &counter, 0,
779 	    ESYNC_POLICY_USER, THREAD_INTERRUPTIBLE);
780 	if (wr != THREAD_INTERRUPTED) {
781 		printf("%s: FAILURE - unexpected wait result (%d)\n", __func__, wr);
782 		*out = -1;
783 		return 0;
784 	}
785 
786 	printf("%s: SUCCESS\n", __func__);
787 
788 	*out = 1;
789 
790 	return 0;
791 }
792 
793 SYSCTL_TEST_REGISTER(esync_test_wait, esync_test_wait);
794 
795 #endif /* DEVELOPMENT  || DEBUG */
796