xref: /xnu-10063.121.3/osfmk/kern/restartable.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/task.h>
31 
32 #include <kern/ast.h>
33 #include <kern/kalloc.h>
34 #include <kern/kern_types.h>
35 #include <kern/mach_param.h>
36 #include <kern/machine.h>
37 #include <kern/misc_protos.h>
38 #include <kern/processor.h>
39 #include <kern/queue.h>
40 #include <kern/restartable.h>
41 #include <kern/task.h>
42 #include <kern/thread.h>
43 #include <kern/waitq.h>
44 
45 #include <os/atomic_private.h>
46 #include <os/hash.h>
47 #include <os/refcnt.h>
48 
49 /**
50  * @file osfmk/kern/restartable.c
51  *
52  * @brief
53  * This module implements restartable userspace functions.
54  *
55  * @discussion
56  * task_restartable_ranges_register() allows task to configure
57  * the restartable ranges, only once per task,
58  * before it has made its second thread.
59  *
60  * task_restartable_ranges_synchronize() can later be used to trigger
61  * restarts for threads with a PC in a restartable region.
62  *
63  * It is implemented with an AST (AST_RESET_PCS) that will cause threads
64  * as they return to userspace to reset PCs in a restartable region
65  * to the recovery offset of this region.
66  *
67  * Because signal delivery would mask the proper saved PC for threads,
68  * sigreturn also forcefully sets the AST and will go through the logic
69  * every single time.
70  */
71 
72 typedef int (*cmpfunc_t)(const void *a, const void *b);
73 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
74 
75 #define RR_RANGES_MAX   64
76 struct restartable_ranges {
77 	queue_chain_t            rr_link;
78 	os_refcnt_t              rr_ref;
79 	uint32_t                 rr_count;
80 	uint32_t                 rr_hash;
81 	task_restartable_range_t rr_ranges[RR_RANGES_MAX];
82 };
83 
84 #if DEBUG || DEVELOPMENT
85 #define RR_HASH_SIZE   256
86 #else
87 // Release kernel userspace should have shared caches and a single registration
88 #define RR_HASH_SIZE    16
89 #endif
90 
91 static queue_head_t rr_hash[RR_HASH_SIZE];
92 LCK_GRP_DECLARE(rr_lock_grp, "restartable ranges");
93 LCK_SPIN_DECLARE(rr_spinlock, &rr_lock_grp);
94 
95 #define rr_lock()   lck_spin_lock_grp(&rr_spinlock, &rr_lock_grp)
96 #define rr_unlock() lck_spin_unlock(&rr_spinlock);
97 
98 #pragma mark internals
99 
100 /**
101  * @function _ranges_cmp
102  *
103  * @brief
104  * Compares two ranges together.
105  */
106 static int
_ranges_cmp(const void * _r1,const void * _r2)107 _ranges_cmp(const void *_r1, const void *_r2)
108 {
109 	const task_restartable_range_t *r1 = _r1;
110 	const task_restartable_range_t *r2 = _r2;
111 
112 	if (r1->location != r2->location) {
113 		return r1->location < r2->location ? -1 : 1;
114 	}
115 	if (r1->length == r2->length) {
116 		return 0;
117 	}
118 	return r1->length < r2->length ? -1 : 1;
119 }
120 
121 /**
122  * @function _ranges_validate
123  *
124  * @brief
125  * Validates an array of PC ranges for wraps and intersections.
126  *
127  * @discussion
128  * This sorts and modifies the input.
129  *
130  * The ranges must:
131  * - not wrap around,
132  * - have a length/recovery offset within a page of the range start
133  *
134  * @returns
135  * - KERN_SUCCESS:          ranges are valid
136  * - KERN_INVALID_ARGUMENT: ranges are invalid
137  */
138 static kern_return_t
_ranges_validate(task_t task,task_restartable_range_t * ranges,uint32_t count)139 _ranges_validate(task_t task, task_restartable_range_t *ranges, uint32_t count)
140 {
141 	qsort(ranges, count, sizeof(task_restartable_range_t), _ranges_cmp);
142 	uint64_t limit = task_has_64Bit_data(task) ? UINT64_MAX : UINT32_MAX;
143 	uint64_t end, recovery;
144 
145 	if (count == 0) {
146 		return KERN_INVALID_ARGUMENT;
147 	}
148 
149 	for (size_t i = 0; i < count; i++) {
150 		if (ranges[i].length > TASK_RESTARTABLE_OFFSET_MAX ||
151 		    ranges[i].recovery_offs > TASK_RESTARTABLE_OFFSET_MAX) {
152 			return KERN_INVALID_ARGUMENT;
153 		}
154 		if (ranges[i].flags) {
155 			return KERN_INVALID_ARGUMENT;
156 		}
157 		if (os_add_overflow(ranges[i].location, ranges[i].length, &end)) {
158 			return KERN_INVALID_ARGUMENT;
159 		}
160 		if (os_add_overflow(ranges[i].location, ranges[i].recovery_offs, &recovery)) {
161 			return KERN_INVALID_ARGUMENT;
162 		}
163 		if (ranges[i].location > limit || end > limit || recovery > limit) {
164 			return KERN_INVALID_ARGUMENT;
165 		}
166 		if (i + 1 < count && end > ranges[i + 1].location) {
167 			return KERN_INVALID_ARGUMENT;
168 		}
169 	}
170 
171 	return KERN_SUCCESS;
172 }
173 
174 /**
175  * @function _ranges_lookup
176  *
177  * @brief
178  * Lookup the left side of a range for a given PC within a set of ranges.
179  *
180  * @returns
181  * - 0: no PC range found
182  * - the left-side of the range.
183  */
184 __attribute__((always_inline))
185 static mach_vm_address_t
_ranges_lookup(struct restartable_ranges * rr,mach_vm_address_t pc)186 _ranges_lookup(struct restartable_ranges *rr, mach_vm_address_t pc)
187 {
188 	task_restartable_range_t *ranges = rr->rr_ranges;
189 	uint32_t l = 0, r = rr->rr_count;
190 
191 	if (pc <= ranges[0].location) {
192 		return 0;
193 	}
194 	if (pc >= ranges[r - 1].location + ranges[r - 1].length) {
195 		return 0;
196 	}
197 
198 	while (l < r) {
199 		uint32_t i = (r + l) / 2;
200 		mach_vm_address_t location = ranges[i].location;
201 
202 		if (pc <= location) {
203 			/* if the PC is exactly at pc_start, no reset is needed */
204 			r = i;
205 		} else if (location + ranges[i].length <= pc) {
206 			/* if the PC is exactly at the end, it's out of the function */
207 			l = i + 1;
208 		} else {
209 			/* else it's strictly in the range, return the recovery pc */
210 			return location + ranges[i].recovery_offs;
211 		}
212 	}
213 
214 	return 0;
215 }
216 
217 /**
218  * @function _restartable_ranges_dispose
219  *
220  * @brief
221  * Helper to dispose of a range that has reached a 0 refcount.
222  */
223 __attribute__((noinline))
224 static void
_restartable_ranges_dispose(struct restartable_ranges * rr,bool hash_remove)225 _restartable_ranges_dispose(struct restartable_ranges *rr, bool hash_remove)
226 {
227 	if (hash_remove) {
228 		rr_lock();
229 		remqueue(&rr->rr_link);
230 		rr_unlock();
231 	}
232 	kfree_type(struct restartable_ranges, rr);
233 }
234 
235 /**
236  * @function _restartable_ranges_equals
237  *
238  * @brief
239  * Helper to compare two restartable ranges.
240  */
241 static bool
_restartable_ranges_equals(const struct restartable_ranges * rr1,const struct restartable_ranges * rr2)242 _restartable_ranges_equals(
243 	const struct restartable_ranges *rr1,
244 	const struct restartable_ranges *rr2)
245 {
246 	size_t rr1_size = rr1->rr_count * sizeof(task_restartable_range_t);
247 	return rr1->rr_hash == rr2->rr_hash &&
248 	       rr1->rr_count == rr2->rr_count &&
249 	       memcmp(rr1->rr_ranges, rr2->rr_ranges, rr1_size) == 0;
250 }
251 
252 /**
253  * @function _restartable_ranges_create
254  *
255  * @brief
256  * Helper to create a uniqued restartable range.
257  *
258  * @returns
259  * - KERN_SUCCESS
260  * - KERN_INVALID_ARGUMENT: the validation of the new ranges failed.
261  * - KERN_RESOURCE_SHORTAGE: too many ranges, out of memory
262  */
263 static kern_return_t
_restartable_ranges_create(task_t task,task_restartable_range_t * ranges,uint32_t count,struct restartable_ranges ** rr_storage)264 _restartable_ranges_create(task_t task, task_restartable_range_t *ranges,
265     uint32_t count, struct restartable_ranges **rr_storage)
266 {
267 	struct restartable_ranges *rr, *rr_found, *rr_base;
268 	queue_head_t *head;
269 	uint32_t base_count, total_count;
270 	size_t base_size, size;
271 	kern_return_t kr;
272 
273 	rr_base = *rr_storage;
274 	base_count = rr_base ? rr_base->rr_count : 0;
275 	base_size = sizeof(task_restartable_range_t) * base_count;
276 	size = sizeof(task_restartable_range_t) * count;
277 
278 	if (os_add_overflow(base_count, count, &total_count)) {
279 		return KERN_INVALID_ARGUMENT;
280 	}
281 	if (total_count > RR_RANGES_MAX) {
282 		return KERN_RESOURCE_SHORTAGE;
283 	}
284 
285 	rr = kalloc_type(struct restartable_ranges,
286 	    (zalloc_flags_t) (Z_WAITOK | Z_ZERO | Z_NOFAIL));
287 
288 	queue_chain_init(rr->rr_link);
289 	os_ref_init(&rr->rr_ref, NULL);
290 	rr->rr_count = total_count;
291 	if (base_size) {
292 		memcpy(rr->rr_ranges, rr_base->rr_ranges, base_size);
293 	}
294 	memcpy(rr->rr_ranges + base_count, ranges, size);
295 	kr = _ranges_validate(task, rr->rr_ranges, total_count);
296 	if (kr) {
297 		_restartable_ranges_dispose(rr, false);
298 		return kr;
299 	}
300 	rr->rr_hash = os_hash_jenkins(rr->rr_ranges,
301 	    rr->rr_count * sizeof(task_restartable_range_t));
302 
303 	head = &rr_hash[rr->rr_hash % RR_HASH_SIZE];
304 
305 	rr_lock();
306 	queue_iterate(head, rr_found, struct restartable_ranges *, rr_link) {
307 		if (_restartable_ranges_equals(rr, rr_found) &&
308 		os_ref_retain_try(&rr_found->rr_ref)) {
309 			goto found;
310 		}
311 	}
312 
313 	enqueue_tail(head, &rr->rr_link);
314 	rr_found = rr;
315 
316 found:
317 	if (rr_base && os_ref_release_relaxed(&rr_base->rr_ref) == 0) {
318 		remqueue(&rr_base->rr_link);
319 	} else {
320 		rr_base = NULL;
321 	}
322 	rr_unlock();
323 
324 	*rr_storage = rr_found;
325 
326 	if (rr_found != rr) {
327 		_restartable_ranges_dispose(rr, false);
328 	}
329 	if (rr_base) {
330 		_restartable_ranges_dispose(rr_base, false);
331 	}
332 	return KERN_SUCCESS;
333 }
334 
335 #pragma mark extern interfaces
336 
337 __attribute__((always_inline))
338 void
restartable_ranges_release(struct restartable_ranges * rr)339 restartable_ranges_release(struct restartable_ranges *rr)
340 {
341 	if (os_ref_release_relaxed(&rr->rr_ref) == 0) {
342 		_restartable_ranges_dispose(rr, true);
343 	}
344 }
345 
346 __attribute__((always_inline))
347 void
thread_reset_pcs_will_fault(thread_t thread)348 thread_reset_pcs_will_fault(thread_t thread)
349 {
350 	/*
351 	 * Called in the exception handling code while interrupts
352 	 * are still disabled.
353 	 */
354 	os_atomic_store(&thread->t_rr_state.trr_fault_state,
355 	    (uint8_t)TRR_FAULT_PENDING, relaxed);
356 }
357 
358 __attribute__((always_inline))
359 void
thread_reset_pcs_done_faulting(struct thread * thread)360 thread_reset_pcs_done_faulting(struct thread *thread)
361 {
362 	thread_rr_state_t state = {
363 		.trr_ipi_ack_pending = ~0,
364 	};
365 
366 	/*
367 	 * Called by the exception handling code on the way back,
368 	 * or when the thread is terminated.
369 	 */
370 	state.trr_value = os_atomic_and_orig(&thread->t_rr_state.trr_value,
371 	    state.trr_value, relaxed);
372 
373 	if (__improbable(state.trr_sync_waiting)) {
374 		task_t task = get_threadtask(thread);
375 
376 		task_lock(task);
377 		wakeup_all_with_inheritor(&thread->t_rr_state, THREAD_AWAKENED);
378 		task_unlock(task);
379 	}
380 }
381 
382 void
thread_reset_pcs_ack_IPI(struct thread * thread)383 thread_reset_pcs_ack_IPI(struct thread *thread)
384 {
385 	thread_rr_state_t trrs;
386 
387 	/*
388 	 * Called under the thread lock from IPI or CSwitch context.
389 	 */
390 	trrs.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed);
391 	if (__improbable(trrs.trr_ipi_ack_pending)) {
392 		trrs.trr_ipi_ack_pending = false;
393 		if (trrs.trr_fault_state) {
394 			assert3u(trrs.trr_fault_state, ==, TRR_FAULT_PENDING);
395 			trrs.trr_fault_state = TRR_FAULT_OBSERVED;
396 		}
397 		os_atomic_store(&thread->t_rr_state.trr_value,
398 		    trrs.trr_value, relaxed);
399 	}
400 }
401 
402 static bool
thread_rr_wait_if_needed(task_t task,thread_t thread)403 thread_rr_wait_if_needed(task_t task, thread_t thread)
404 {
405 	thread_rr_state_t state;
406 	bool did_unlock = false;
407 
408 	state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed);
409 	if (state.trr_value == 0) {
410 		return did_unlock;
411 	}
412 
413 	assert(state.trr_sync_waiting == 0);
414 
415 	thread_reference(thread);
416 
417 	/*
418 	 * The thread_rr_state state machine is:
419 	 *
420 	 *                        ,------------ IPI ack --------------.
421 	 *                        v                                   |
422 	 *        .-----> {f:N, w:0, ipi:0} --- IPI sent ---> {f:N, w:0, ipi:1}
423 	 *        |           |        ^                              |
424 	 *        |           |        |                              |
425 	 *      fault       will     fault                          will
426 	 *      done        fault    done                           fault
427 	 *        |           |        |                              |
428 	 *        |           v        |                              v
429 	 *        |       {f:P, w:0, ipi:0} --- IPI sent ---> {f:P, w:0, ipi:1}
430 	 *        |               |                                   |
431 	 *        |               |                                   |
432 	 *        |     act_set_ast_reset_pcs()                       |
433 	 *        |               |                                   |
434 	 *        |               v                                   |
435 	 *        +------ {f:O, w:0, ipi:0} <--- IPI Ack -------------'
436 	 *        |               |
437 	 *        |               |
438 	 *        |        wait_if_needed()
439 	 *        |               |
440 	 *        |               v
441 	 *        `------ {f:O, w:1, ipi:0}
442 	 */
443 
444 	while (state.trr_ipi_ack_pending) {
445 		disable_preemption();
446 		task_unlock(task);
447 
448 		state.trr_value =
449 		    hw_wait_while_equals32(&thread->t_rr_state.trr_value,
450 		    state.trr_value);
451 
452 		enable_preemption();
453 		task_lock(task);
454 
455 		did_unlock = true;
456 	}
457 
458 	/*
459 	 * If a VM fault is in flight we must wait for it to resolve
460 	 * before we can return from task_restartable_ranges_synchronize(),
461 	 * as the memory we're faulting against might be freed by the caller
462 	 * as soon as it returns, leading a crash.
463 	 */
464 	if (state.trr_fault_state == TRR_FAULT_OBSERVED) {
465 		thread_rr_state_t nstate = {
466 			.trr_fault_state  = TRR_FAULT_OBSERVED,
467 			.trr_sync_waiting = 1,
468 		};
469 
470 		if (os_atomic_cmpxchg(&thread->t_rr_state, state,
471 		    nstate, relaxed)) {
472 			lck_mtx_sleep_with_inheritor(&task->lock,
473 			    LCK_SLEEP_DEFAULT, &thread->t_rr_state,
474 			    thread, THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
475 			did_unlock = true;
476 		}
477 	}
478 
479 #if MACH_ASSERT
480 	state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed);
481 	assert3u(state.trr_fault_state, !=, TRR_FAULT_OBSERVED);
482 	assert3u(state.trr_ipi_ack_pending, ==, 0);
483 	assert3u(state.trr_sync_waiting, ==, 0);
484 #endif
485 
486 	thread_deallocate_safe(thread);
487 	return did_unlock;
488 }
489 
490 bool
thread_reset_pcs_in_range(task_t task,thread_t thread)491 thread_reset_pcs_in_range(task_t task, thread_t thread)
492 {
493 	return _ranges_lookup(task->t_rr_ranges, machine_thread_pc(thread)) != 0;
494 }
495 
496 void
thread_reset_pcs_ast(task_t task,thread_t thread)497 thread_reset_pcs_ast(task_t task, thread_t thread)
498 {
499 	struct restartable_ranges *rr;
500 	mach_vm_address_t pc;
501 
502 	/*
503 	 * Because restartable_ranges are set while the task only has on thread
504 	 * and can't be mutated outside of this, no lock is required to read this.
505 	 */
506 	rr = task->t_rr_ranges;
507 	if (thread->active && rr) {
508 		pc = _ranges_lookup(rr, machine_thread_pc(thread));
509 
510 		if (pc) {
511 			machine_thread_reset_pc(thread, pc);
512 		}
513 	}
514 
515 #if MACH_ASSERT
516 	thread_rr_state_t state;
517 
518 	state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed);
519 	assert3u(state.trr_fault_state, ==, TRR_FAULT_NONE);
520 	assert3u(state.trr_sync_waiting, ==, 0);
521 #endif
522 }
523 
524 void
restartable_init(void)525 restartable_init(void)
526 {
527 	for (size_t i = 0; i < RR_HASH_SIZE; i++) {
528 		queue_head_init(rr_hash[i]);
529 	}
530 }
531 
532 #pragma mark MiG interfaces
533 
534 kern_return_t
task_restartable_ranges_register(task_t task,task_restartable_range_t * ranges,mach_msg_type_number_t count)535 task_restartable_ranges_register(
536 	task_t                    task,
537 	task_restartable_range_t *ranges,
538 	mach_msg_type_number_t    count)
539 {
540 	kern_return_t kr;
541 
542 	if (task != current_task()) {
543 		return KERN_FAILURE;
544 	}
545 
546 #if CONFIG_ROSETTA
547 	// <rdar://problem/48527888> Obj-C adoption of task_restartable_ranges_register breaks Cambria
548 	if (task_is_translated(task)) {
549 		return KERN_RESOURCE_SHORTAGE;
550 	}
551 #endif
552 
553 	kr = _ranges_validate(task, ranges, count);
554 
555 	if (kr == KERN_SUCCESS) {
556 		task_lock(task);
557 
558 		if (task->thread_count > 1) {
559 			kr = KERN_NOT_SUPPORTED;
560 #if !DEBUG && !DEVELOPMENT
561 		} else if (task->t_rr_ranges) {
562 			/*
563 			 * For security reasons, on release kernels,
564 			 * only allow for this to be configured once.
565 			 *
566 			 * But to be able to test the feature we need
567 			 * to relax this for dev kernels.
568 			 */
569 			kr = KERN_NOT_SUPPORTED;
570 #endif
571 		} else {
572 			kr = _restartable_ranges_create(task, ranges, count,
573 			    &task->t_rr_ranges);
574 		}
575 
576 		task_unlock(task);
577 	}
578 
579 	return kr;
580 }
581 
582 kern_return_t
task_restartable_ranges_synchronize(task_t task)583 task_restartable_ranges_synchronize(task_t task)
584 {
585 	thread_pri_floor_t token;
586 	thread_t thread;
587 	bool needs_wait = false;
588 	kern_return_t kr = KERN_SUCCESS;
589 
590 	if (task != current_task()) {
591 		return KERN_FAILURE;
592 	}
593 
594 	/*
595 	 * t_rr_ranges can only be set if the process is single threaded.
596 	 * As a result, `t_rr_ranges` can _always_ be looked at
597 	 * from current_thread() without holding a lock:
598 	 * - either because it's the only thread in the task
599 	 * - or because the existence of another thread precludes
600 	 *   modification
601 	 */
602 	if (!task->t_rr_ranges) {
603 		return KERN_SUCCESS;
604 	}
605 
606 	/*
607 	 * When initiating a GC, artificially raise the priority for the
608 	 * duration of sending ASTs, we want to be preemptible, but this
609 	 * sequence has to terminate in a timely fashion.
610 	 */
611 	token = thread_priority_floor_start();
612 
613 	task_lock(task);
614 
615 	/*
616 	 * In order to avoid trivial deadlocks of 2 threads trying
617 	 * to wait on each other while in kernel, disallow
618 	 * concurrent usage of task_restartable_ranges_synchronize().
619 	 *
620 	 * At the time this code was written, the one client (Objective-C)
621 	 * does this under lock which guarantees ordering. If we ever need
622 	 * more clients, the library around restartable ranges will have
623 	 * to synchronize in userspace.
624 	 */
625 	if (task->task_rr_in_flight) {
626 		kr = KERN_ALREADY_WAITING;
627 		goto out;
628 	}
629 
630 	task->task_rr_in_flight = true;
631 
632 	/*
633 	 * Pair with the acquire barriers handling RR_TSTATE_ONCORE.
634 	 *
635 	 * For threads that weren't on core, we rely on the fact
636 	 * that we are taking their lock in act_set_ast_reset_pcs()
637 	 * and that the context switch path will also take it before
638 	 * resuming them which rovides the required ordering.
639 	 *
640 	 * For new threads not existing yet, because the task_lock()
641 	 * is taken to add them to the task thread list,
642 	 * which also synchronizes with this code.
643 	 */
644 	os_atomic_thread_fence(release);
645 
646 	/*
647 	 * Set all the AST_RESET_PCS, and see if any thread needs
648 	 * actual acknowledgement.
649 	 */
650 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
651 		if (thread != current_thread()) {
652 			needs_wait |= act_set_ast_reset_pcs(task, thread);
653 		}
654 	}
655 
656 	/*
657 	 * Now wait for acknowledgement if we need any
658 	 */
659 	while (needs_wait) {
660 		needs_wait = false;
661 
662 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
663 			if (thread == current_thread()) {
664 				continue;
665 			}
666 
667 			needs_wait = thread_rr_wait_if_needed(task, thread);
668 			if (needs_wait) {
669 				/*
670 				 * We drop the task lock,
671 				 * we need to restart enumerating threads.
672 				 */
673 				break;
674 			}
675 		}
676 	}
677 
678 	task->task_rr_in_flight = false;
679 
680 out:
681 	task_unlock(task);
682 
683 	thread_priority_floor_end(&token);
684 
685 	return kr;
686 }
687