xref: /xnu-11417.140.69/osfmk/kern/epoch_sync.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <kern/assert.h>
30 #include <kern/epoch_sync.h>
31 #include <kern/kalloc.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/turnstile.h>
35 
36 #include <os/atomic.h>
37 #include <os/hash.h>
38 #include <os/overflow.h>
39 
40 #include <sys/kdebug.h>
41 
42 #include <stdint.h>
43 #include <stdbool.h>
44 
45 #include <kern/block_hint.h>
46 
47 #define ES_INVALID_ID UINT64_MAX
48 
49 /*
50  * Combine space and ID into a unique identifier.
51  * The ID passed in must leave the top byte clear.
52  */
53 #define ES_UNIQUE_ID(space, id) ({                                        \
54 	assert3u(id >> 56, ==, 0); /* IDs must have the top byte clear */ \
55 	(id | ((uint64_t)space << 56));                                   \
56 })
57 
58 static LCK_GRP_DECLARE(esync_lckgrp, "esync");
59 os_refgrp_decl(static, esync_refgrp, "esync", NULL);
60 
61 typedef struct {
62 	uint64_t          es_id;            /* Synchronization ID. */
63 	struct turnstile *es_turnstile;     /* Associated turnstile. */
64 	esync_policy_t    es_policy;        /* Determines turnstile policy. */
65 	lck_spin_t        es_lock;          /* Interlock. */
66 	os_refcnt_t       es_refcnt;        /* Reference count for lifecycle. */
67 	queue_chain_t     es_link;          /* Link for hash table. */
68 } esync_t;
69 
70 #pragma mark - Hash Table Implementation -
71 
72 static LCK_GRP_DECLARE(ht_lck_grp, "ht");
73 
74 typedef struct {
75 	queue_head_t  htb_head;
76 	lck_spin_t    htb_lock;
77 } ht_bucket_t;
78 
79 typedef struct ht {
80 	const uint32_t  ht_size;
81 	ht_bucket_t    *ht_bucket;
82 } ht_t;
83 
84 /*
85  * Eventually it would be better to have "clients" just dynamically allocate
86  * these as needed and not only support two static ID spaces.
87  */
88 
89 #define NBUCKETS_QUEUE 512
90 static ht_t exclaves_queue_ht = {
91 	.ht_size = NBUCKETS_QUEUE,
92 	.ht_bucket = &(ht_bucket_t[NBUCKETS_QUEUE]){}[0],
93 };
94 
95 #define NBUCKETS_THREAD 64
96 static ht_t exclaves_thread_ht = {
97 	.ht_size = NBUCKETS_THREAD,
98 	.ht_bucket = &(ht_bucket_t[NBUCKETS_THREAD]){}[0],
99 };
100 
101 #if DEVELOPMENT || DEBUG
102 
103 #define NBUCKETS_TEST 8
104 static ht_t esync_test_ht = {
105 	.ht_size = NBUCKETS_TEST,
106 	.ht_bucket = &(ht_bucket_t[NBUCKETS_TEST]){}[0],
107 };
108 #endif /* DEVELOPMENT || DEBUG */
109 
110 
111 static ht_t *
space_to_ht(esync_space_t space)112 space_to_ht(esync_space_t space)
113 {
114 	switch (space) {
115 	case ESYNC_SPACE_EXCLAVES_Q:
116 		return &exclaves_queue_ht;
117 
118 	case ESYNC_SPACE_EXCLAVES_T:
119 		return &exclaves_thread_ht;
120 
121 #if DEVELOPMENT || DEBUG
122 	case ESYNC_SPACE_TEST:
123 		return &esync_test_ht;
124 #endif /* DEVELOPMENT || DEBUG */
125 
126 	default:
127 		panic("unknown epoch sync space");
128 	}
129 }
130 
131 static __startup_func void
ht_startup_init(ht_t * ht)132 ht_startup_init(ht_t *ht)
133 {
134 	for (uint32_t i = 0; i < ht->ht_size; i++) {
135 		queue_init(&ht->ht_bucket[i].htb_head);
136 		lck_spin_init(&ht->ht_bucket[i].htb_lock, &ht_lck_grp, NULL);
137 	}
138 }
139 STARTUP_ARG(LOCKS, STARTUP_RANK_LAST, ht_startup_init, &exclaves_queue_ht);
140 STARTUP_ARG(LOCKS, STARTUP_RANK_LAST, ht_startup_init, &exclaves_thread_ht);
141 
142 static inline ht_bucket_t *
ht_get_bucket(ht_t * ht,const uint64_t key)143 ht_get_bucket(ht_t *ht, const uint64_t key)
144 {
145 	assert3u((ht->ht_size & (ht->ht_size - 1)), ==, 0);
146 
147 	const uint32_t idx = os_hash_jenkins(&key, sizeof(key)) & (ht->ht_size - 1);
148 	return &ht->ht_bucket[idx];
149 }
150 
151 static esync_t *
ht_put(ht_t * ht,const uint64_t key,esync_t * new_value)152 ht_put(ht_t *ht, const uint64_t key, esync_t *new_value)
153 {
154 	/* 'new_value' shouldn't be part of an existing queue. */
155 	assert3p(new_value->es_link.next, ==, NULL);
156 	assert3p(new_value->es_link.prev, ==, NULL);
157 
158 	ht_bucket_t *bucket = ht_get_bucket(ht, key);
159 
160 	lck_spin_lock_grp(&bucket->htb_lock, &ht_lck_grp);
161 
162 	esync_t *value = NULL;
163 	esync_t *elem = NULL;
164 	qe_foreach_element(elem, &bucket->htb_head, es_link) {
165 		if (elem->es_id != key) {
166 			continue;
167 		}
168 
169 		lck_spin_lock_grp(&elem->es_lock, &esync_lckgrp);
170 		if (elem->es_id == key) {
171 			value = elem;
172 			break;
173 		}
174 		lck_spin_unlock(&elem->es_lock);
175 	}
176 
177 	if (value == NULL) {
178 		value = new_value;
179 		lck_spin_lock_grp(&value->es_lock, &esync_lckgrp);
180 		enqueue(&bucket->htb_head, &value->es_link);
181 	}
182 
183 	lck_spin_unlock(&bucket->htb_lock);
184 
185 	return value;
186 }
187 
188 static void
ht_remove(ht_t * ht,const uint64_t key,esync_t * value)189 ht_remove(ht_t *ht, const uint64_t key, esync_t *value)
190 {
191 	ht_bucket_t *bucket = ht_get_bucket(ht, key);
192 
193 	lck_spin_lock_grp(&bucket->htb_lock, &ht_lck_grp);
194 	remqueue(&value->es_link);
195 	lck_spin_unlock(&bucket->htb_lock);
196 
197 	assert3p(value->es_link.next, ==, NULL);
198 	assert3p(value->es_link.prev, ==, NULL);
199 }
200 
201 static esync_t *
ht_get(ht_t * ht,const uint64_t key)202 ht_get(ht_t *ht, const uint64_t key)
203 {
204 	ht_bucket_t *bucket = ht_get_bucket(ht, key);
205 
206 	lck_spin_lock_grp(&bucket->htb_lock, &ht_lck_grp);
207 
208 	esync_t *value = NULL;
209 	esync_t *elem = NULL;
210 	qe_foreach_element(elem, &bucket->htb_head, es_link) {
211 		if (elem->es_id != key) {
212 			continue;
213 		}
214 
215 		lck_spin_lock_grp(&elem->es_lock, &esync_lckgrp);
216 		if (elem->es_id == key) {
217 			value = elem;
218 			break;
219 		}
220 		lck_spin_unlock(&elem->es_lock);
221 	}
222 
223 	lck_spin_unlock(&bucket->htb_lock);
224 
225 	return value;
226 }
227 
228 #pragma mark - Epoch Sync Implementation -
229 
230 /*
231  * Allocate a backing object.
232  */
233 static esync_t *
esync_alloc(const uint64_t id,const esync_policy_t policy)234 esync_alloc(const uint64_t id, const esync_policy_t policy)
235 {
236 	assert3u(id, !=, ES_INVALID_ID);
237 
238 	esync_t *sync = kalloc_type(esync_t, Z_WAITOK | Z_ZERO | Z_NOFAIL);
239 	assert3p(sync, !=, NULL);
240 
241 	sync->es_id = id;
242 	sync->es_turnstile = TURNSTILE_NULL;
243 	sync->es_policy = policy;
244 
245 	lck_spin_init(&sync->es_lock, &esync_lckgrp, NULL);
246 
247 	os_ref_init_count(&sync->es_refcnt, &esync_refgrp, 1);
248 
249 	return sync;
250 }
251 
252 /*
253  * Free a backing object.
254  */
255 static void
esync_free(esync_t * sync)256 esync_free(esync_t *sync)
257 {
258 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_NOTOWNED);
259 	assert3p(sync->es_turnstile, ==, TURNSTILE_NULL);
260 	assert3u(os_ref_get_count(&sync->es_refcnt), ==, 0);
261 
262 	lck_spin_destroy(&sync->es_lock, &esync_lckgrp);
263 
264 	kfree_type(esync_t, sync);
265 }
266 
267 /*
268  * Stop using 'sync'. Drop the ref count and possibly remove it from the hash
269  * table and free it.  Free up an unused entry if not NULL.
270  * Called with the object locked.
271  */
272 static void
esync_put(ht_t * ht,esync_t * sync,esync_t * to_be_freed)273 esync_put(ht_t *ht, esync_t *sync, esync_t *to_be_freed)
274 {
275 	os_ref_count_t cnt = 0;
276 
277 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_OWNED);
278 
279 	/* The last owner will remove it from the hash table. */
280 	cnt = os_ref_get_count(&sync->es_refcnt);
281 	if (cnt == 2) {
282 		/*
283 		 * Make sure no other thread will match it during the window
284 		 * where the lock is dropped but before it's been removed from
285 		 * the hash table (lookups are protected by es_lock as called
286 		 * from esync_acquire).
287 		 */
288 		const uint64_t id = sync->es_id;
289 		sync->es_id = ES_INVALID_ID;
290 		lck_spin_unlock(&sync->es_lock);
291 
292 		ht_remove(ht, id, sync);
293 
294 		/* Drop the ref associated with the hash table. */
295 		(void) os_ref_release(&sync->es_refcnt);
296 
297 		/* Drop the final refcnt and free it. */
298 		os_ref_release_last(&sync->es_refcnt);
299 
300 		/*
301 		 * Before freeing (and potentially taking another lock), call
302 		 * turnstile_cleanup().
303 		 */
304 		turnstile_cleanup();
305 		esync_free(sync);
306 	} else {
307 		cnt = os_ref_release_locked(&sync->es_refcnt);
308 		assert3u(cnt, >=, 2);
309 		lck_spin_unlock(&sync->es_lock);
310 		turnstile_cleanup();
311 	}
312 
313 	/* An unused entry, free it. */
314 	if (to_be_freed != NULL) {
315 		os_ref_release_last(&to_be_freed->es_refcnt);
316 		esync_free(to_be_freed);
317 	}
318 }
319 
320 /*
321  * Get an object associated with 'id'. If there isn't one already, allocate one
322  * and insert it.
323  * Returns with the object locked and a +1 on the refcount.
324  */
325 static esync_t *
esync_get(ht_t * ht,const uint64_t id,const esync_policy_t policy,esync_t ** const to_be_freed)326 esync_get(ht_t *ht, const uint64_t id, const esync_policy_t policy,
327     esync_t **const to_be_freed)
328 {
329 	esync_t *new = esync_alloc(id, policy);
330 	esync_t *sync = ht_put(ht, id, new);
331 
332 	/*
333 	 * See if the newly allocated entry was inserted. If so, then there's
334 	 * nothing extra to clean up later (in case cleanup is needed, it must
335 	 * be done later as the spinlock is held at this point).
336 	 * ht_put consumes the refcount of new if the entry was inserted.
337 	 */
338 	*to_be_freed = (sync != new) ? new : NULL;
339 
340 	/*
341 	 * The policy of the sync object should always match. i.e. the
342 	 * consumer of the esync interfaces must guarantee that all waiters use
343 	 * the same policy.
344 	 */
345 	assert3u(sync->es_policy, ==, policy);
346 
347 	os_ref_retain_locked(&sync->es_refcnt);
348 
349 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_OWNED);
350 	return sync;
351 }
352 
353 /*
354  * Update the epoch counter with a new epoch.
355  * Returns true if the epoch was newer or equal to the existing epoch.
356  */
357 static bool
esync_update_epoch(const uint64_t epoch,os_atomic (uint64_t)* counter)358 esync_update_epoch(const uint64_t epoch, os_atomic(uint64_t) *counter)
359 {
360 	uint64_t old, new;
361 
362 	return os_atomic_rmw_loop(counter, old, new, acq_rel, {
363 		if (old > epoch) {
364 		        os_atomic_rmw_loop_give_up();
365 		}
366 		new = epoch;
367 	}) == 1;
368 }
369 
370 
371 static void
esync_wait_set_block_hint(const esync_space_t space,const esync_policy_t policy)372 esync_wait_set_block_hint(const esync_space_t space, const esync_policy_t policy)
373 {
374 	thread_t self = current_thread();
375 
376 	switch (space) {
377 	case ESYNC_SPACE_EXCLAVES_Q:
378 	case ESYNC_SPACE_EXCLAVES_T:
379 		switch (policy) {
380 		case ESYNC_POLICY_KERNEL:
381 			thread_set_pending_block_hint(self, kThreadWaitExclaveCore);
382 			return;
383 		case ESYNC_POLICY_USER:
384 			thread_set_pending_block_hint(self, kThreadWaitExclaveKit);
385 			return;
386 		default:
387 			return;
388 		}
389 	default:
390 		return;
391 	}
392 }
393 
394 void
kdp_esync_find_owner(struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)395 kdp_esync_find_owner(struct waitq *waitq, event64_t event,
396     thread_waitinfo_t *waitinfo)
397 {
398 	const esync_t *esync = (esync_t *)event;
399 	waitinfo->context = esync->es_id;
400 
401 	if (waitq_held(waitq)) {
402 		return;
403 	}
404 
405 	const struct turnstile *turnstile = waitq_to_turnstile(waitq);
406 	waitinfo->owner = thread_tid(turnstile->ts_inheritor);
407 }
408 
409 /*
410  * Block until esync_wake() is called on this id.
411  * The epoch is incremented by the client on wakes. If the epoch is stale, then
412  * don't block and return immediately.
413  * Can allocate a new epoch synchronization object if needed.
414  * Will only use "owner" if the epoch is fresh.
415  */
416 wait_result_t
esync_wait(esync_space_t space,const uint64_t id,const uint64_t epoch,os_atomic (uint64_t)* counter,const ctid_t owner_ctid,const esync_policy_t policy,const wait_interrupt_t interruptible)417 esync_wait(esync_space_t space, const uint64_t id, const uint64_t epoch,
418     os_atomic(uint64_t) *counter, const ctid_t owner_ctid,
419     const esync_policy_t policy, const wait_interrupt_t interruptible)
420 {
421 	assert3u(id, !=, ES_INVALID_ID);
422 	const uint64_t unique_id = ES_UNIQUE_ID(space, id);
423 
424 	ht_t *ht = space_to_ht(space);
425 
426 	esync_t *to_be_freed = NULL;
427 	esync_t *sync = esync_get(ht, unique_id, policy, &to_be_freed);
428 
429 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_OWNED);
430 
431 	const bool fresh_epoch = esync_update_epoch(epoch, counter);
432 	if (!fresh_epoch) {
433 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
434 		    MACH_EPOCH_SYNC_WAIT_STALE), unique_id, epoch, NULL);
435 		esync_put(ht, sync, to_be_freed);
436 		return THREAD_NOT_WAITING;
437 	}
438 
439 	/*
440 	 * It is safe to lookup the thread from the ctid here as the lock is
441 	 * held and the epoch is fresh.
442 	 * ctid and hence tid may be 0.
443 	 */
444 	thread_t owner_thread = ctid_get_thread(owner_ctid);
445 	uint64_t tid = thread_tid(owner_thread);
446 
447 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC, MACH_EPOCH_SYNC_WAIT) |
448 	    DBG_FUNC_START, unique_id, epoch, tid);
449 
450 	assert(sync->es_policy == ESYNC_POLICY_KERNEL ||
451 	    sync->es_policy == ESYNC_POLICY_USER);
452 	turnstile_type_t tt = sync->es_policy == ESYNC_POLICY_KERNEL ?
453 	    TURNSTILE_EPOCH_KERNEL : TURNSTILE_EPOCH_USER;
454 	struct turnstile *ts = turnstile_prepare((uintptr_t)sync,
455 	    &sync->es_turnstile, TURNSTILE_NULL, tt);
456 
457 	/*
458 	 * owner_thread may not be set, that's fine, the inheritor will be
459 	 * cleared.
460 	 */
461 	turnstile_update_inheritor(ts, owner_thread,
462 	    (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
463 
464 	esync_wait_set_block_hint(space, policy);
465 
466 	wait_result_t wr = waitq_assert_wait64(&ts->ts_waitq,
467 	    CAST_EVENT64_T(sync), interruptible, TIMEOUT_WAIT_FOREVER);
468 
469 	lck_spin_unlock(&sync->es_lock);
470 
471 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
472 
473 	if (wr == THREAD_WAITING) {
474 		wr = thread_block(THREAD_CONTINUE_NULL);
475 	}
476 
477 	lck_spin_lock(&sync->es_lock);
478 
479 	turnstile_complete((uintptr_t)sync, &sync->es_turnstile, NULL, tt);
480 
481 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC, MACH_EPOCH_SYNC_WAIT) |
482 	    DBG_FUNC_END, wr);
483 
484 	/* Drops the lock, refcount and possibly frees sync. */
485 	esync_put(ht, sync, to_be_freed);
486 
487 	return wr;
488 }
489 
490 /*
491  * Wake up a waiter. Pre-posted wakes (wakes which happen when there is no
492  * active waiter) just return. The epoch is always updated.
493  */
494 kern_return_t
esync_wake(esync_space_t space,const uint64_t id,const uint64_t epoch,os_atomic (uint64_t)* counter,const esync_wake_mode_t mode,const ctid_t ctid)495 esync_wake(esync_space_t space, const uint64_t id, const uint64_t epoch,
496     os_atomic(uint64_t) *counter, const esync_wake_mode_t mode,
497     const ctid_t ctid)
498 {
499 	assert3u(id, !=, ES_INVALID_ID);
500 	assert(
501 		mode == ESYNC_WAKE_ONE ||
502 		mode == ESYNC_WAKE_ALL ||
503 		mode == ESYNC_WAKE_ONE_WITH_OWNER ||
504 		mode == ESYNC_WAKE_THREAD);
505 
506 	ht_t *ht = space_to_ht(space);
507 	const uint64_t unique_id = ES_UNIQUE_ID(space, id);
508 
509 	kern_return_t kr = KERN_FAILURE;
510 
511 	/*
512 	 * Update the epoch regardless of whether there's a waiter or not. (If
513 	 * there's no waiter, there will be no sync object).
514 	 * The epoch is read by waiters under the object lock to ensure that it
515 	 * doesn't miss a wake.
516 	 */
517 	(void) esync_update_epoch(epoch, counter);
518 
519 	esync_t *sync = ht_get(ht, unique_id);
520 	if (sync == NULL) {
521 		/* Drop pre-posted WAKEs. */
522 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
523 		    MACH_EPOCH_SYNC_WAKE_NO_WAITERS), unique_id,
524 		    epoch, mode);
525 		return KERN_NOT_WAITING;
526 	}
527 	LCK_SPIN_ASSERT(&sync->es_lock, LCK_ASSERT_OWNED);
528 
529 	os_ref_retain_locked(&sync->es_refcnt);
530 
531 	assert(sync->es_policy == ESYNC_POLICY_KERNEL ||
532 	    sync->es_policy == ESYNC_POLICY_USER);
533 	turnstile_type_t tt = sync->es_policy == ESYNC_POLICY_KERNEL ?
534 	    TURNSTILE_EPOCH_KERNEL : TURNSTILE_EPOCH_USER;
535 	struct turnstile *ts = turnstile_prepare((uintptr_t)sync,
536 	    &sync->es_turnstile, TURNSTILE_NULL, tt);
537 
538 	/* ctid and hence tid may be 0. */
539 	thread_t thread = ctid_get_thread(ctid);
540 	uint64_t tid = thread_tid(thread);
541 
542 	switch (mode) {
543 	case ESYNC_WAKE_ONE:
544 		/* The woken thread is the new inheritor. */
545 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
546 		    MACH_EPOCH_SYNC_WAKE_ONE), unique_id, epoch);
547 		kr = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(sync),
548 		    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
549 		break;
550 
551 	case ESYNC_WAKE_ALL:
552 		/* The inheritor is cleared. */
553 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
554 		    MACH_EPOCH_SYNC_WAKE_ALL), unique_id, epoch);
555 		kr = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(sync),
556 		    THREAD_AWAKENED, WAITQ_UPDATE_INHERITOR);
557 		break;
558 
559 	case ESYNC_WAKE_ONE_WITH_OWNER:
560 		/* The specified thread is the new inheritor (may be NULL). */
561 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
562 		    MACH_EPOCH_SYNC_WAKE_ONE_WITH_OWNER), unique_id, epoch, tid);
563 		kr = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(sync),
564 		    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
565 		turnstile_update_inheritor(ts, thread,
566 		    TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
567 		break;
568 
569 	case ESYNC_WAKE_THREAD:
570 		/* No new inheritor. Wake the specified thread (if waiting). */
571 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EPOCH_SYNC,
572 		    MACH_EPOCH_SYNC_WAKE_THREAD), unique_id, epoch, tid);
573 		kr = waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T(sync),
574 		    thread, THREAD_AWAKENED);
575 	}
576 
577 	turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
578 
579 	turnstile_complete((uintptr_t)sync, &sync->es_turnstile, NULL, tt);
580 
581 	/* Drops the lock, refcount and possibly frees sync. */
582 	esync_put(ht, sync, NULL);
583 
584 	assert(kr == KERN_SUCCESS || kr == KERN_NOT_WAITING);
585 	return kr;
586 }
587 
588 #if DEVELOPMENT || DEBUG
589 
590 #pragma mark - Tests -
591 
592 /* For SYSCTL_TEST_REGISTER. */
593 #include <kern/startup.h>
594 
595 /*
596  * Delay for a random amount up to ~1/2ms.
597  */
598 static void
random_delay(void)599 random_delay(void)
600 {
601 	extern void read_random(void* buffer, u_int numBytes);
602 	uint64_t random = 0;
603 	read_random(&random, sizeof(random));
604 	delay(random % 512);
605 }
606 
607 /*
608  * Basic mutex-like primitive to test the epoch synchronization primitives.
609  */
610 
611 /* Counter for the "client"-side. */
612 static os_atomic(uint64_t) client_counter = 0;
613 
614 /* Counter for the "server"-side. */
615 static os_atomic(uint64_t) server_counter = 0;
616 
617 /*
618  * The lock object stores 0 when not held and the thread's CTID when held.
619  * If there's an active waiter, bit 32 is set.
620  */
621 #define OWNER(x) ((x) & ((1ull << 32) - 1))
622 #define WAITER_BIT (1ull << 32)
623 
624 /* The "mutex" itself. */
625 static uint64_t test_mutex;
626 
627 
628 STARTUP_ARG(LOCKS, STARTUP_RANK_LAST, ht_startup_init, &esync_test_ht);
629 
630 /*
631  * Grab the lock.
632  * If already held, set a waiters bit and call esync_wait.
633  * On acquisition, if there are still waiters, set the waiters bit when taking
634  * the lock.
635  */
636 static void
test_lock(uint64_t * lock)637 test_lock(uint64_t *lock)
638 {
639 	/* Counter to keep track of the number of active waiters. */
640 	static os_atomic(uint32_t) test_waiter_count = 0;
641 
642 	const ctid_t ctid = thread_get_ctid(current_thread());
643 	uint64_t old = 0;
644 	uint64_t new = ctid;
645 
646 	while (true) {
647 		/* Try to grab the lock. */
648 		if (os_atomic_cmpxchgv(lock, 0, new, &old, relaxed) == 1) {
649 			return;
650 		}
651 
652 		/* Failed to grab the lock, add a waiter bit and wait. */
653 		do {
654 			uint64_t epoch = os_atomic_load(&client_counter, acquire);
655 
656 			if (os_atomic_cmpxchgv(lock, old, old | WAITER_BIT, &old, relaxed) == 1) {
657 				os_atomic_inc(&test_waiter_count, acq_rel);
658 
659 				random_delay();
660 				const wait_result_t wr = esync_wait(ESYNC_SPACE_TEST, (uint32_t)(uintptr_t)lock, epoch,
661 				    &server_counter, OWNER(old), ESYNC_POLICY_KERNEL, THREAD_UNINT);
662 				assert(wr == THREAD_NOT_WAITING || wr == THREAD_AWAKENED);
663 				random_delay();
664 
665 				/*
666 				 * When acquiring the lock, if there are waiters make sure to
667 				 * set the waiters bit.
668 				 */
669 				new = ctid;
670 				if (os_atomic_dec(&test_waiter_count, acq_rel) != 0) {
671 					new |= WAITER_BIT;
672 				}
673 				break;
674 			}
675 		} while (old != 0);
676 	}
677 }
678 
679 /*
680  * Drop the lock.
681  */
682 static void
test_unlock(uint64_t * lock)683 test_unlock(uint64_t *lock)
684 {
685 	const ctid_t ctid = thread_get_ctid(current_thread());
686 
687 	/* Drop the lock. */
688 	uint64_t old = os_atomic_xchg(lock, 0, relaxed);
689 	assert3u(OWNER(old), ==, ctid);
690 
691 	uint64_t epoch = os_atomic_inc(&client_counter, release);
692 
693 	if ((old & WAITER_BIT) != 0) {
694 		random_delay();
695 		(void) esync_wake(ESYNC_SPACE_TEST, (uint32_t)(uintptr_t)lock, epoch,
696 		    &server_counter, ESYNC_WAKE_ONE, 0);
697 		random_delay();
698 	}
699 }
700 
701 
702 /* Count to keep track of completed test threads. */
703 static os_atomic(uint64_t) test_complete_count = 0;
704 
705 static void
test_lock_unlock(__unused void * arg,__unused int a)706 test_lock_unlock(__unused void *arg, __unused int a)
707 {
708 	for (int c = 0; c < 10; c++) {
709 		test_lock(&test_mutex);
710 		random_delay();
711 		test_unlock(&test_mutex);
712 	}
713 
714 	os_atomic_inc(&test_complete_count, relaxed);
715 }
716 
717 static LCK_MTX_DECLARE(esync_test_mtx, &esync_lckgrp);
718 
719 /* Wait then wake. */
720 static int
esync_test(int64_t count,int64_t * out)721 esync_test(int64_t count, int64_t *out)
722 {
723 	kern_return_t ret;
724 	thread_t *thread = kalloc_type(thread_t, count,
725 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
726 
727 	printf("%s: STARTING\n", __func__);
728 
729 	lck_mtx_lock(&esync_test_mtx);
730 
731 	for (int64_t i = 0; i < count; i++) {
732 		ret = kernel_thread_start_priority(test_lock_unlock, NULL,
733 		    BASEPRI_DEFAULT, &thread[i]);
734 		assert3u(ret, ==, KERN_SUCCESS);
735 	}
736 
737 	/* Wait for completion. */
738 	while (test_complete_count != count) {
739 		delay(100000);
740 	}
741 
742 	os_atomic_store(&test_complete_count, 0, relaxed);
743 
744 	/* Drop the thread refs. */
745 	for (int i = 0; i < count; i++) {
746 		thread_deallocate(thread[i]);
747 	}
748 
749 	os_atomic_store(&server_counter, 0, relaxed);
750 	os_atomic_store(&client_counter, 0, relaxed);
751 
752 	lck_mtx_unlock(&esync_test_mtx);
753 
754 	printf("%s: SUCCESS\n", __func__);
755 
756 	kfree_type(thread_t, count, thread);
757 
758 	*out = 1;
759 
760 	return 0;
761 }
762 
763 SYSCTL_TEST_REGISTER(esync_test, esync_test);
764 
765 /*
766  * Block the caller on an interruptible wait. The thread must be terminated in
767  * order for this test to return.
768  */
769 static int
esync_test_wait(__unused int64_t in,__unused int64_t * out)770 esync_test_wait(__unused int64_t in, __unused int64_t *out)
771 {
772 	os_atomic(uint64_t) counter = 0;
773 
774 	printf("%s: STARTING\n", __func__);
775 
776 	wait_result_t wr = esync_wait(ESYNC_SPACE_TEST, 0, 0, &counter, 0,
777 	    ESYNC_POLICY_USER, THREAD_INTERRUPTIBLE);
778 	if (wr != THREAD_INTERRUPTED) {
779 		printf("%s: FAILURE - unexpected wait result (%d)\n", __func__, wr);
780 		*out = -1;
781 		return 0;
782 	}
783 
784 	printf("%s: SUCCESS\n", __func__);
785 
786 	*out = 1;
787 
788 	return 0;
789 }
790 
791 SYSCTL_TEST_REGISTER(esync_test_wait, esync_test_wait);
792 
793 #endif /* DEVELOPMENT  || DEBUG */
794