xref: /xnu-8020.121.3/osfmk/kern/thread_call.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/zalloc.h>
34 #include <kern/sched_prim.h>
35 #include <kern/clock.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/waitq.h>
39 #include <kern/ledger.h>
40 #include <kern/policy_internal.h>
41 
42 #include <vm/vm_pageout.h>
43 
44 #include <kern/thread_call.h>
45 #include <kern/timer_call.h>
46 
47 #include <libkern/OSAtomic.h>
48 #include <kern/timer_queue.h>
49 
50 #include <sys/kdebug.h>
51 #if CONFIG_DTRACE
52 #include <mach/sdt.h>
53 #endif
54 #include <machine/machine_routines.h>
55 
56 static ZONE_DEFINE_TYPE(thread_call_zone, "thread_call",
57     thread_call_data_t, ZC_ZFREE_CLEARMEM);
58 
59 typedef enum {
60 	TCF_ABSOLUTE    = 0,
61 	TCF_CONTINUOUS  = 1,
62 	TCF_COUNT       = 2,
63 } thread_call_flavor_t;
64 
65 __options_decl(thread_call_group_flags_t, uint32_t, {
66 	TCG_NONE                = 0x0,
67 	TCG_PARALLEL            = 0x1,
68 	TCG_DEALLOC_ACTIVE      = 0x2,
69 });
70 
71 static struct thread_call_group {
72 	__attribute__((aligned(128))) lck_ticket_t tcg_lock;
73 
74 	const char *            tcg_name;
75 
76 	queue_head_t            pending_queue;
77 	uint32_t                pending_count;
78 
79 	queue_head_t            delayed_queues[TCF_COUNT];
80 	struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
81 	timer_call_data_t       delayed_timers[TCF_COUNT];
82 
83 	timer_call_data_t       dealloc_timer;
84 
85 	struct waitq            idle_waitq;
86 	uint64_t                idle_timestamp;
87 	uint32_t                idle_count, active_count, blocked_count;
88 
89 	uint32_t                tcg_thread_pri;
90 	uint32_t                target_thread_count;
91 
92 	thread_call_group_flags_t tcg_flags;
93 
94 	struct waitq            waiters_waitq;
95 } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
96 	[THREAD_CALL_INDEX_HIGH] = {
97 		.tcg_name               = "high",
98 		.tcg_thread_pri         = BASEPRI_PREEMPT_HIGH,
99 		.target_thread_count    = 4,
100 		.tcg_flags              = TCG_NONE,
101 	},
102 	[THREAD_CALL_INDEX_KERNEL] = {
103 		.tcg_name               = "kernel",
104 		.tcg_thread_pri         = BASEPRI_KERNEL,
105 		.target_thread_count    = 1,
106 		.tcg_flags              = TCG_PARALLEL,
107 	},
108 	[THREAD_CALL_INDEX_USER] = {
109 		.tcg_name               = "user",
110 		.tcg_thread_pri         = BASEPRI_DEFAULT,
111 		.target_thread_count    = 1,
112 		.tcg_flags              = TCG_PARALLEL,
113 	},
114 	[THREAD_CALL_INDEX_LOW] = {
115 		.tcg_name               = "low",
116 		.tcg_thread_pri         = MAXPRI_THROTTLE,
117 		.target_thread_count    = 1,
118 		.tcg_flags              = TCG_PARALLEL,
119 	},
120 	[THREAD_CALL_INDEX_KERNEL_HIGH] = {
121 		.tcg_name               = "kernel-high",
122 		.tcg_thread_pri         = BASEPRI_PREEMPT,
123 		.target_thread_count    = 2,
124 		.tcg_flags              = TCG_NONE,
125 	},
126 	[THREAD_CALL_INDEX_QOS_UI] = {
127 		.tcg_name               = "qos-ui",
128 		.tcg_thread_pri         = BASEPRI_FOREGROUND,
129 		.target_thread_count    = 1,
130 		.tcg_flags              = TCG_NONE,
131 	},
132 	[THREAD_CALL_INDEX_QOS_IN] = {
133 		.tcg_name               = "qos-in",
134 		.tcg_thread_pri         = BASEPRI_USER_INITIATED,
135 		.target_thread_count    = 1,
136 		.tcg_flags              = TCG_NONE,
137 	},
138 	[THREAD_CALL_INDEX_QOS_UT] = {
139 		.tcg_name               = "qos-ut",
140 		.tcg_thread_pri         = BASEPRI_UTILITY,
141 		.target_thread_count    = 1,
142 		.tcg_flags              = TCG_NONE,
143 	},
144 };
145 
146 typedef struct thread_call_group        *thread_call_group_t;
147 
148 #define INTERNAL_CALL_COUNT             768
149 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
150 #define THREAD_CALL_ADD_RATIO           4
151 #define THREAD_CALL_MACH_FACTOR_CAP     3
152 #define THREAD_CALL_GROUP_MAX_THREADS   500
153 
154 struct thread_call_thread_state {
155 	struct thread_call_group * thc_group;
156 	struct thread_call *       thc_call;    /* debug only, may be deallocated */
157 	uint64_t thc_call_start;
158 	uint64_t thc_call_soft_deadline;
159 	uint64_t thc_call_hard_deadline;
160 	uint64_t thc_call_pending_timestamp;
161 	uint64_t thc_IOTES_invocation_timestamp;
162 	thread_call_func_t  thc_func;
163 	thread_call_param_t thc_param0;
164 	thread_call_param_t thc_param1;
165 };
166 
167 static bool                     thread_call_daemon_awake = true;
168 /*
169  * This special waitq exists because the daemon thread
170  * might need to be woken while already holding a global waitq locked.
171  */
172 static struct waitq             daemon_waitq;
173 
174 static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
175 static queue_head_t             thread_call_internal_queue;
176 int                                             thread_call_internal_queue_count = 0;
177 static uint64_t                 thread_call_dealloc_interval_abs;
178 
179 static void                     _internal_call_init(void);
180 
181 static thread_call_t            _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
182 static bool                     _is_internal_call(thread_call_t call);
183 static void                     _internal_call_release(thread_call_t call);
184 static bool                     _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
185 static bool                     _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
186     uint64_t deadline, thread_call_flavor_t flavor);
187 static bool                     _call_dequeue(thread_call_t call, thread_call_group_t group);
188 static void                     thread_call_wake(thread_call_group_t group);
189 static void                     thread_call_daemon(void *arg);
190 static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
191 static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
192 static void                     thread_call_group_setup(thread_call_group_t group);
193 static void                     sched_call_thread(int type, thread_t thread);
194 static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
195 static void                     thread_call_wait_locked(thread_call_t call, spl_t s);
196 static bool                     thread_call_wait_once_locked(thread_call_t call, spl_t s);
197 
198 static boolean_t                thread_call_enter_delayed_internal(thread_call_t call,
199     thread_call_func_t alt_func, thread_call_param_t alt_param0,
200     thread_call_param_t param1, uint64_t deadline,
201     uint64_t leeway, unsigned int flags);
202 
203 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
204 extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
205 
206 LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
207 
208 
209 static void
thread_call_lock_spin(thread_call_group_t group)210 thread_call_lock_spin(thread_call_group_t group)
211 {
212 	lck_ticket_lock(&group->tcg_lock, &thread_call_lck_grp);
213 }
214 
215 static void
thread_call_unlock(thread_call_group_t group)216 thread_call_unlock(thread_call_group_t group)
217 {
218 	lck_ticket_unlock(&group->tcg_lock);
219 }
220 
221 static void __assert_only
thread_call_assert_locked(thread_call_group_t group)222 thread_call_assert_locked(thread_call_group_t group)
223 {
224 	lck_ticket_assert_owned(&group->tcg_lock);
225 }
226 
227 
228 static spl_t
disable_ints_and_lock(thread_call_group_t group)229 disable_ints_and_lock(thread_call_group_t group)
230 {
231 	spl_t s = splsched();
232 	thread_call_lock_spin(group);
233 
234 	return s;
235 }
236 
237 static void
enable_ints_and_unlock(thread_call_group_t group,spl_t s)238 enable_ints_and_unlock(thread_call_group_t group, spl_t s)
239 {
240 	thread_call_unlock(group);
241 	splx(s);
242 }
243 
244 /* Lock held */
245 static thread_call_group_t
thread_call_get_group(thread_call_t call)246 thread_call_get_group(thread_call_t call)
247 {
248 	thread_call_index_t index = call->tc_index;
249 
250 	assert(index >= 0 && index < THREAD_CALL_INDEX_MAX);
251 
252 	return &thread_call_groups[index];
253 }
254 
255 /* Lock held */
256 static thread_call_flavor_t
thread_call_get_flavor(thread_call_t call)257 thread_call_get_flavor(thread_call_t call)
258 {
259 	return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
260 }
261 
262 /* Lock held */
263 static thread_call_flavor_t
thread_call_set_flavor(thread_call_t call,thread_call_flavor_t flavor)264 thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
265 {
266 	assert(flavor == TCF_CONTINUOUS || flavor == TCF_ABSOLUTE);
267 	thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
268 
269 	if (old_flavor != flavor) {
270 		if (flavor == TCF_CONTINUOUS) {
271 			call->tc_flags |= THREAD_CALL_FLAG_CONTINUOUS;
272 		} else {
273 			call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
274 		}
275 	}
276 
277 	return old_flavor;
278 }
279 
280 /* returns true if it was on a queue */
281 static bool
thread_call_enqueue_tail(thread_call_t call,queue_t new_queue)282 thread_call_enqueue_tail(
283 	thread_call_t           call,
284 	queue_t                 new_queue)
285 {
286 	queue_t                 old_queue = call->tc_queue;
287 
288 	thread_call_group_t     group = thread_call_get_group(call);
289 	thread_call_flavor_t    flavor = thread_call_get_flavor(call);
290 
291 	if (old_queue != NULL &&
292 	    old_queue != &group->delayed_queues[flavor]) {
293 		panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
294 	}
295 
296 	if (old_queue == &group->delayed_queues[flavor]) {
297 		priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
298 	}
299 
300 	if (old_queue == NULL) {
301 		enqueue_tail(new_queue, &call->tc_qlink);
302 	} else {
303 		re_queue_tail(new_queue, &call->tc_qlink);
304 	}
305 
306 	call->tc_queue = new_queue;
307 
308 	return old_queue != NULL;
309 }
310 
311 static queue_head_t *
thread_call_dequeue(thread_call_t call)312 thread_call_dequeue(
313 	thread_call_t            call)
314 {
315 	queue_t                 old_queue = call->tc_queue;
316 
317 	thread_call_group_t     group = thread_call_get_group(call);
318 	thread_call_flavor_t    flavor = thread_call_get_flavor(call);
319 
320 	if (old_queue != NULL &&
321 	    old_queue != &group->pending_queue &&
322 	    old_queue != &group->delayed_queues[flavor]) {
323 		panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
324 	}
325 
326 	if (old_queue == &group->delayed_queues[flavor]) {
327 		priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
328 	}
329 
330 	if (old_queue != NULL) {
331 		remqueue(&call->tc_qlink);
332 
333 		call->tc_queue = NULL;
334 	}
335 	return old_queue;
336 }
337 
338 static queue_head_t *
thread_call_enqueue_deadline(thread_call_t call,thread_call_group_t group,thread_call_flavor_t flavor,uint64_t deadline)339 thread_call_enqueue_deadline(
340 	thread_call_t           call,
341 	thread_call_group_t     group,
342 	thread_call_flavor_t    flavor,
343 	uint64_t                deadline)
344 {
345 	queue_t old_queue = call->tc_queue;
346 	queue_t new_queue = &group->delayed_queues[flavor];
347 
348 	thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
349 
350 	if (old_queue != NULL &&
351 	    old_queue != &group->pending_queue &&
352 	    old_queue != &group->delayed_queues[old_flavor]) {
353 		panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
354 	}
355 
356 	if (old_queue == new_queue) {
357 		/* optimize the same-queue case to avoid a full re-insert */
358 		uint64_t old_deadline = call->tc_pqlink.deadline;
359 		call->tc_pqlink.deadline = deadline;
360 
361 		if (old_deadline < deadline) {
362 			priority_queue_entry_increased(&group->delayed_pqueues[flavor],
363 			    &call->tc_pqlink);
364 		} else {
365 			priority_queue_entry_decreased(&group->delayed_pqueues[flavor],
366 			    &call->tc_pqlink);
367 		}
368 	} else {
369 		if (old_queue == &group->delayed_queues[old_flavor]) {
370 			priority_queue_remove(&group->delayed_pqueues[old_flavor],
371 			    &call->tc_pqlink);
372 		}
373 
374 		call->tc_pqlink.deadline = deadline;
375 
376 		priority_queue_insert(&group->delayed_pqueues[flavor], &call->tc_pqlink);
377 	}
378 
379 	if (old_queue == NULL) {
380 		enqueue_tail(new_queue, &call->tc_qlink);
381 	} else if (old_queue != new_queue) {
382 		re_queue_tail(new_queue, &call->tc_qlink);
383 	}
384 
385 	call->tc_queue = new_queue;
386 
387 	return old_queue;
388 }
389 
390 uint64_t
thread_call_get_armed_deadline(thread_call_t call)391 thread_call_get_armed_deadline(thread_call_t call)
392 {
393 	return call->tc_pqlink.deadline;
394 }
395 
396 
397 static bool
group_isparallel(thread_call_group_t group)398 group_isparallel(thread_call_group_t group)
399 {
400 	return (group->tcg_flags & TCG_PARALLEL) != 0;
401 }
402 
403 static bool
thread_call_group_should_add_thread(thread_call_group_t group)404 thread_call_group_should_add_thread(thread_call_group_t group)
405 {
406 	if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
407 		panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
408 		    group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
409 		    group->active_count, group->blocked_count, group->idle_count);
410 	}
411 
412 	if (group_isparallel(group) == false) {
413 		if (group->pending_count > 0 && group->active_count == 0) {
414 			return true;
415 		}
416 
417 		return false;
418 	}
419 
420 	if (group->pending_count > 0) {
421 		if (group->idle_count > 0) {
422 			return false;
423 		}
424 
425 		uint32_t thread_count = group->active_count;
426 
427 		/*
428 		 * Add a thread if either there are no threads,
429 		 * the group has fewer than its target number of
430 		 * threads, or the amount of work is large relative
431 		 * to the number of threads.  In the last case, pay attention
432 		 * to the total load on the system, and back off if
433 		 * it's high.
434 		 */
435 		if ((thread_count == 0) ||
436 		    (thread_count < group->target_thread_count) ||
437 		    ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
438 		    (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
439 			return true;
440 		}
441 	}
442 
443 	return false;
444 }
445 
446 static void
thread_call_group_setup(thread_call_group_t group)447 thread_call_group_setup(thread_call_group_t group)
448 {
449 	lck_ticket_init(&group->tcg_lock, &thread_call_lck_grp);
450 
451 	queue_init(&group->pending_queue);
452 
453 	for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
454 		queue_init(&group->delayed_queues[flavor]);
455 		priority_queue_init(&group->delayed_pqueues[flavor]);
456 		timer_call_setup(&group->delayed_timers[flavor], thread_call_delayed_timer, group);
457 	}
458 
459 	timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
460 
461 	waitq_init(&group->waiters_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
462 
463 	/* Reverse the wait order so we re-use the most recently parked thread from the pool */
464 	waitq_init(&group->idle_waitq, WQT_QUEUE, SYNC_POLICY_REVERSED);
465 }
466 
467 /*
468  * Simple wrapper for creating threads bound to
469  * thread call groups.
470  */
471 static void
thread_call_thread_create(thread_call_group_t group)472 thread_call_thread_create(
473 	thread_call_group_t             group)
474 {
475 	thread_t thread;
476 	kern_return_t result;
477 
478 	int thread_pri = group->tcg_thread_pri;
479 
480 	result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
481 	    group, thread_pri, &thread);
482 	if (result != KERN_SUCCESS) {
483 		panic("cannot create new thread call thread %d", result);
484 	}
485 
486 	if (thread_pri <= BASEPRI_KERNEL) {
487 		/*
488 		 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
489 		 * in kernel if there are higher priority threads available.
490 		 */
491 		thread_set_eager_preempt(thread);
492 	}
493 
494 	char name[MAXTHREADNAMESIZE] = "";
495 
496 	int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
497 
498 	snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
499 	thread_set_thread_name(thread, name);
500 
501 	thread_deallocate(thread);
502 }
503 
504 /*
505  *	thread_call_initialize:
506  *
507  *	Initialize this module, called
508  *	early during system initialization.
509  */
510 __startup_func
511 static void
thread_call_initialize(void)512 thread_call_initialize(void)
513 {
514 	nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
515 	waitq_init(&daemon_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
516 
517 	for (uint32_t i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
518 		thread_call_group_setup(&thread_call_groups[i]);
519 	}
520 
521 	_internal_call_init();
522 
523 	thread_t thread;
524 	kern_return_t result;
525 
526 	result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
527 	    NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
528 	if (result != KERN_SUCCESS) {
529 		panic("thread_call_initialize");
530 	}
531 
532 	thread_deallocate(thread);
533 }
534 STARTUP(THREAD_CALL, STARTUP_RANK_FIRST, thread_call_initialize);
535 
536 void
thread_call_setup_with_options(thread_call_t call,thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)537 thread_call_setup_with_options(
538 	thread_call_t                   call,
539 	thread_call_func_t              func,
540 	thread_call_param_t             param0,
541 	thread_call_priority_t          pri,
542 	thread_call_options_t           options)
543 {
544 	bzero(call, sizeof(*call));
545 
546 	*call = (struct thread_call) {
547 		.tc_func = func,
548 		.tc_param0 = param0,
549 	};
550 
551 	switch (pri) {
552 	case THREAD_CALL_PRIORITY_HIGH:
553 		call->tc_index = THREAD_CALL_INDEX_HIGH;
554 		break;
555 	case THREAD_CALL_PRIORITY_KERNEL:
556 		call->tc_index = THREAD_CALL_INDEX_KERNEL;
557 		break;
558 	case THREAD_CALL_PRIORITY_USER:
559 		call->tc_index = THREAD_CALL_INDEX_USER;
560 		break;
561 	case THREAD_CALL_PRIORITY_LOW:
562 		call->tc_index = THREAD_CALL_INDEX_LOW;
563 		break;
564 	case THREAD_CALL_PRIORITY_KERNEL_HIGH:
565 		call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
566 		break;
567 	default:
568 		panic("Invalid thread call pri value: %d", pri);
569 		break;
570 	}
571 
572 	if (options & THREAD_CALL_OPTIONS_ONCE) {
573 		call->tc_flags |= THREAD_CALL_ONCE;
574 	}
575 	if (options & THREAD_CALL_OPTIONS_SIGNAL) {
576 		call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
577 	}
578 }
579 
580 void
thread_call_setup(thread_call_t call,thread_call_func_t func,thread_call_param_t param0)581 thread_call_setup(
582 	thread_call_t                   call,
583 	thread_call_func_t              func,
584 	thread_call_param_t             param0)
585 {
586 	thread_call_setup_with_options(call, func, param0,
587 	    THREAD_CALL_PRIORITY_HIGH, 0);
588 }
589 
590 static void
_internal_call_init(void)591 _internal_call_init(void)
592 {
593 	/* Function-only thread calls are only kept in the default HIGH group */
594 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
595 
596 	spl_t s = disable_ints_and_lock(group);
597 
598 	queue_init(&thread_call_internal_queue);
599 
600 	for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
601 		enqueue_tail(&thread_call_internal_queue, &internal_call_storage[i].tc_qlink);
602 		thread_call_internal_queue_count++;
603 	}
604 
605 	enable_ints_and_unlock(group, s);
606 }
607 
608 /*
609  *	_internal_call_allocate:
610  *
611  *	Allocate an internal callout entry.
612  *
613  *	Called with thread_call_lock held.
614  */
615 static thread_call_t
_internal_call_allocate(thread_call_func_t func,thread_call_param_t param0)616 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
617 {
618 	/* Function-only thread calls are only kept in the default HIGH group */
619 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
620 
621 	spl_t s = disable_ints_and_lock(group);
622 
623 	thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
624 	    struct thread_call, tc_qlink);
625 
626 	if (call == NULL) {
627 		panic("_internal_call_allocate: thread_call_internal_queue empty");
628 	}
629 
630 	thread_call_internal_queue_count--;
631 
632 	thread_call_setup(call, func, param0);
633 	/* THREAD_CALL_ALLOC not set, do not free back to zone */
634 	assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
635 	enable_ints_and_unlock(group, s);
636 
637 	return call;
638 }
639 
640 /* Check if a call is internal and needs to be returned to the internal pool. */
641 static bool
_is_internal_call(thread_call_t call)642 _is_internal_call(thread_call_t call)
643 {
644 	if (call >= internal_call_storage &&
645 	    call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
646 		assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
647 		return true;
648 	}
649 	return false;
650 }
651 
652 /*
653  *	_internal_call_release:
654  *
655  *	Release an internal callout entry which
656  *	is no longer pending (or delayed).
657  *
658  *      Called with thread_call_lock held.
659  */
660 static void
_internal_call_release(thread_call_t call)661 _internal_call_release(thread_call_t call)
662 {
663 	assert(_is_internal_call(call));
664 
665 	thread_call_group_t group = thread_call_get_group(call);
666 
667 	assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
668 	thread_call_assert_locked(group);
669 
670 	enqueue_head(&thread_call_internal_queue, &call->tc_qlink);
671 	thread_call_internal_queue_count++;
672 }
673 
674 /*
675  *	_pending_call_enqueue:
676  *
677  *	Place an entry at the end of the
678  *	pending queue, to be executed soon.
679  *
680  *	Returns TRUE if the entry was already
681  *	on a queue.
682  *
683  *	Called with thread_call_lock held.
684  */
685 static bool
_pending_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t now)686 _pending_call_enqueue(thread_call_t call,
687     thread_call_group_t group,
688     uint64_t now)
689 {
690 	if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
691 	    == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
692 		call->tc_pqlink.deadline = 0;
693 
694 		thread_call_flags_t flags = call->tc_flags;
695 		call->tc_flags |= THREAD_CALL_RESCHEDULE;
696 
697 		assert(call->tc_queue == NULL);
698 
699 		return flags & THREAD_CALL_RESCHEDULE;
700 	}
701 
702 	call->tc_pending_timestamp = now;
703 
704 	bool was_on_queue = thread_call_enqueue_tail(call, &group->pending_queue);
705 
706 	if (!was_on_queue) {
707 		call->tc_submit_count++;
708 	}
709 
710 	group->pending_count++;
711 
712 	thread_call_wake(group);
713 
714 	return was_on_queue;
715 }
716 
717 /*
718  *	_delayed_call_enqueue:
719  *
720  *	Place an entry on the delayed queue,
721  *	after existing entries with an earlier
722  *      (or identical) deadline.
723  *
724  *	Returns TRUE if the entry was already
725  *	on a queue.
726  *
727  *	Called with thread_call_lock held.
728  */
729 static bool
_delayed_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t deadline,thread_call_flavor_t flavor)730 _delayed_call_enqueue(
731 	thread_call_t           call,
732 	thread_call_group_t     group,
733 	uint64_t                deadline,
734 	thread_call_flavor_t    flavor)
735 {
736 	if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
737 	    == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
738 		call->tc_pqlink.deadline = deadline;
739 
740 		thread_call_flags_t flags = call->tc_flags;
741 		call->tc_flags |= THREAD_CALL_RESCHEDULE;
742 
743 		assert(call->tc_queue == NULL);
744 		thread_call_set_flavor(call, flavor);
745 
746 		return flags & THREAD_CALL_RESCHEDULE;
747 	}
748 
749 	queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
750 
751 	if (old_queue == &group->pending_queue) {
752 		group->pending_count--;
753 	} else if (old_queue == NULL) {
754 		call->tc_submit_count++;
755 	}
756 
757 	return old_queue != NULL;
758 }
759 
760 /*
761  *	_call_dequeue:
762  *
763  *	Remove an entry from a queue.
764  *
765  *	Returns TRUE if the entry was on a queue.
766  *
767  *	Called with thread_call_lock held.
768  */
769 static bool
_call_dequeue(thread_call_t call,thread_call_group_t group)770 _call_dequeue(
771 	thread_call_t           call,
772 	thread_call_group_t     group)
773 {
774 	queue_head_t *old_queue = thread_call_dequeue(call);
775 
776 	if (old_queue == NULL) {
777 		return false;
778 	}
779 
780 	call->tc_finish_count++;
781 
782 	if (old_queue == &group->pending_queue) {
783 		group->pending_count--;
784 	}
785 
786 	return true;
787 }
788 
789 /*
790  * _arm_delayed_call_timer:
791  *
792  * Check if the timer needs to be armed for this flavor,
793  * and if so, arm it.
794  *
795  * If call is non-NULL, only re-arm the timer if the specified call
796  * is the first in the queue.
797  *
798  * Returns true if the timer was armed/re-armed, false if it was left unset
799  * Caller should cancel the timer if need be.
800  *
801  * Called with thread_call_lock held.
802  */
803 static bool
_arm_delayed_call_timer(thread_call_t new_call,thread_call_group_t group,thread_call_flavor_t flavor)804 _arm_delayed_call_timer(thread_call_t           new_call,
805     thread_call_group_t     group,
806     thread_call_flavor_t    flavor)
807 {
808 	/* No calls implies no timer needed */
809 	if (queue_empty(&group->delayed_queues[flavor])) {
810 		return false;
811 	}
812 
813 	thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
814 
815 	/* We only need to change the hard timer if this new call is the first in the list */
816 	if (new_call != NULL && new_call != call) {
817 		return false;
818 	}
819 
820 	assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
821 
822 	uint64_t fire_at = call->tc_soft_deadline;
823 
824 	if (flavor == TCF_CONTINUOUS) {
825 		assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
826 		fire_at = continuoustime_to_absolutetime(fire_at);
827 	} else {
828 		assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
829 	}
830 
831 	/*
832 	 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
833 	 * which does not take into account later-deadline timers with a larger leeway.
834 	 * This is a valid coalescing behavior, but masks a possible window to
835 	 * fire a timer instead of going idle.
836 	 */
837 	uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
838 
839 	timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
840 	    fire_at, leeway,
841 	    TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LEEWAY,
842 	    ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
843 
844 	return true;
845 }
846 
847 /*
848  *	_cancel_func_from_queue:
849  *
850  *	Remove the first (or all) matching
851  *	entries from the specified queue.
852  *
853  *	Returns TRUE if any matching entries
854  *	were found.
855  *
856  *	Called with thread_call_lock held.
857  */
858 static boolean_t
_cancel_func_from_queue(thread_call_func_t func,thread_call_param_t param0,thread_call_group_t group,boolean_t remove_all,queue_head_t * queue)859 _cancel_func_from_queue(thread_call_func_t      func,
860     thread_call_param_t     param0,
861     thread_call_group_t     group,
862     boolean_t               remove_all,
863     queue_head_t            *queue)
864 {
865 	boolean_t call_removed = FALSE;
866 	thread_call_t call;
867 
868 	qe_foreach_element_safe(call, queue, tc_qlink) {
869 		if (call->tc_func != func ||
870 		    call->tc_param0 != param0) {
871 			continue;
872 		}
873 
874 		_call_dequeue(call, group);
875 
876 		if (_is_internal_call(call)) {
877 			_internal_call_release(call);
878 		}
879 
880 		call_removed = TRUE;
881 		if (!remove_all) {
882 			break;
883 		}
884 	}
885 
886 	return call_removed;
887 }
888 
889 /*
890  *	thread_call_func_delayed:
891  *
892  *	Enqueue a function callout to
893  *	occur at the stated time.
894  */
895 void
thread_call_func_delayed(thread_call_func_t func,thread_call_param_t param,uint64_t deadline)896 thread_call_func_delayed(
897 	thread_call_func_t              func,
898 	thread_call_param_t             param,
899 	uint64_t                        deadline)
900 {
901 	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
902 }
903 
904 /*
905  * thread_call_func_delayed_with_leeway:
906  *
907  * Same as thread_call_func_delayed(), but with
908  * leeway/flags threaded through.
909  */
910 
911 void
thread_call_func_delayed_with_leeway(thread_call_func_t func,thread_call_param_t param,uint64_t deadline,uint64_t leeway,uint32_t flags)912 thread_call_func_delayed_with_leeway(
913 	thread_call_func_t              func,
914 	thread_call_param_t             param,
915 	uint64_t                deadline,
916 	uint64_t                leeway,
917 	uint32_t                flags)
918 {
919 	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
920 }
921 
922 /*
923  *	thread_call_func_cancel:
924  *
925  *	Dequeue a function callout.
926  *
927  *	Removes one (or all) { function, argument }
928  *	instance(s) from either (or both)
929  *	the pending and	the delayed queue,
930  *	in that order.
931  *
932  *	Returns TRUE if any calls were cancelled.
933  *
934  *	This iterates all of the pending or delayed thread calls in the group,
935  *	which is really inefficient.  Switch to an allocated thread call instead.
936  *
937  *	TODO: Give 'func' thread calls their own group, so this silliness doesn't
938  *	affect the main 'high' group.
939  */
940 boolean_t
thread_call_func_cancel(thread_call_func_t func,thread_call_param_t param,boolean_t cancel_all)941 thread_call_func_cancel(
942 	thread_call_func_t              func,
943 	thread_call_param_t             param,
944 	boolean_t                       cancel_all)
945 {
946 	boolean_t       result;
947 
948 	assert(func != NULL);
949 
950 	/* Function-only thread calls are only kept in the default HIGH group */
951 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
952 
953 	spl_t s = disable_ints_and_lock(group);
954 
955 	if (cancel_all) {
956 		/* exhaustively search every queue, and return true if any search found something */
957 		result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
958 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE])  |
959 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
960 	} else {
961 		/* early-exit as soon as we find something, don't search other queues */
962 		result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
963 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
964 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
965 	}
966 
967 	enable_ints_and_unlock(group, s);
968 
969 	return result;
970 }
971 
972 /*
973  * Allocate a thread call with a given priority.  Importances other than
974  * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
975  * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
976  * threads which are not in the normal "urgent" bands).
977  */
978 thread_call_t
thread_call_allocate_with_priority(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri)979 thread_call_allocate_with_priority(
980 	thread_call_func_t              func,
981 	thread_call_param_t             param0,
982 	thread_call_priority_t          pri)
983 {
984 	return thread_call_allocate_with_options(func, param0, pri, 0);
985 }
986 
987 thread_call_t
thread_call_allocate_with_options(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)988 thread_call_allocate_with_options(
989 	thread_call_func_t              func,
990 	thread_call_param_t             param0,
991 	thread_call_priority_t          pri,
992 	thread_call_options_t           options)
993 {
994 	thread_call_t call = zalloc(thread_call_zone);
995 
996 	thread_call_setup_with_options(call, func, param0, pri, options);
997 	call->tc_refs = 1;
998 	call->tc_flags |= THREAD_CALL_ALLOC;
999 
1000 	return call;
1001 }
1002 
1003 thread_call_t
thread_call_allocate_with_qos(thread_call_func_t func,thread_call_param_t param0,int qos_tier,thread_call_options_t options)1004 thread_call_allocate_with_qos(thread_call_func_t        func,
1005     thread_call_param_t       param0,
1006     int                       qos_tier,
1007     thread_call_options_t     options)
1008 {
1009 	thread_call_t call = thread_call_allocate(func, param0);
1010 
1011 	switch (qos_tier) {
1012 	case THREAD_QOS_UNSPECIFIED:
1013 		call->tc_index = THREAD_CALL_INDEX_HIGH;
1014 		break;
1015 	case THREAD_QOS_LEGACY:
1016 		call->tc_index = THREAD_CALL_INDEX_USER;
1017 		break;
1018 	case THREAD_QOS_MAINTENANCE:
1019 	case THREAD_QOS_BACKGROUND:
1020 		call->tc_index = THREAD_CALL_INDEX_LOW;
1021 		break;
1022 	case THREAD_QOS_UTILITY:
1023 		call->tc_index = THREAD_CALL_INDEX_QOS_UT;
1024 		break;
1025 	case THREAD_QOS_USER_INITIATED:
1026 		call->tc_index = THREAD_CALL_INDEX_QOS_IN;
1027 		break;
1028 	case THREAD_QOS_USER_INTERACTIVE:
1029 		call->tc_index = THREAD_CALL_INDEX_QOS_UI;
1030 		break;
1031 	default:
1032 		panic("Invalid thread call qos value: %d", qos_tier);
1033 		break;
1034 	}
1035 
1036 	if (options & THREAD_CALL_OPTIONS_ONCE) {
1037 		call->tc_flags |= THREAD_CALL_ONCE;
1038 	}
1039 
1040 	/* does not support THREAD_CALL_OPTIONS_SIGNAL */
1041 
1042 	return call;
1043 }
1044 
1045 
1046 /*
1047  *	thread_call_allocate:
1048  *
1049  *	Allocate a callout entry.
1050  */
1051 thread_call_t
thread_call_allocate(thread_call_func_t func,thread_call_param_t param0)1052 thread_call_allocate(
1053 	thread_call_func_t              func,
1054 	thread_call_param_t             param0)
1055 {
1056 	return thread_call_allocate_with_options(func, param0,
1057 	           THREAD_CALL_PRIORITY_HIGH, 0);
1058 }
1059 
1060 /*
1061  *	thread_call_free:
1062  *
1063  *	Release a callout.  If the callout is currently
1064  *	executing, it will be freed when all invocations
1065  *	finish.
1066  *
1067  *	If the callout is currently armed to fire again, then
1068  *	freeing is not allowed and returns FALSE.  The
1069  *	client must have canceled the pending invocation before freeing.
1070  */
1071 boolean_t
thread_call_free(thread_call_t call)1072 thread_call_free(
1073 	thread_call_t           call)
1074 {
1075 	thread_call_group_t group = thread_call_get_group(call);
1076 
1077 	spl_t s = disable_ints_and_lock(group);
1078 
1079 	if (call->tc_queue != NULL ||
1080 	    ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
1081 		thread_call_unlock(group);
1082 		splx(s);
1083 
1084 		return FALSE;
1085 	}
1086 
1087 	int32_t refs = --call->tc_refs;
1088 	if (refs < 0) {
1089 		panic("Refcount negative: %d", refs);
1090 	}
1091 
1092 	if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
1093 	    == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
1094 		thread_call_wait_once_locked(call, s);
1095 		/* thread call lock has been unlocked */
1096 	} else {
1097 		enable_ints_and_unlock(group, s);
1098 	}
1099 
1100 	if (refs == 0) {
1101 		assert(call->tc_finish_count == call->tc_submit_count);
1102 		zfree(thread_call_zone, call);
1103 	}
1104 
1105 	return TRUE;
1106 }
1107 
1108 /*
1109  *	thread_call_enter:
1110  *
1111  *	Enqueue a callout entry to occur "soon".
1112  *
1113  *	Returns TRUE if the call was
1114  *	already on a queue.
1115  */
1116 boolean_t
thread_call_enter(thread_call_t call)1117 thread_call_enter(
1118 	thread_call_t           call)
1119 {
1120 	return thread_call_enter1(call, 0);
1121 }
1122 
1123 boolean_t
thread_call_enter1(thread_call_t call,thread_call_param_t param1)1124 thread_call_enter1(
1125 	thread_call_t                   call,
1126 	thread_call_param_t             param1)
1127 {
1128 	assert(call->tc_func != NULL);
1129 	assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
1130 
1131 	thread_call_group_t group = thread_call_get_group(call);
1132 	bool result = true;
1133 
1134 	spl_t s = disable_ints_and_lock(group);
1135 
1136 	if (call->tc_queue != &group->pending_queue) {
1137 		result = _pending_call_enqueue(call, group, mach_absolute_time());
1138 	}
1139 
1140 	call->tc_param1 = param1;
1141 
1142 	enable_ints_and_unlock(group, s);
1143 
1144 	return result;
1145 }
1146 
1147 /*
1148  *	thread_call_enter_delayed:
1149  *
1150  *	Enqueue a callout entry to occur
1151  *	at the stated time.
1152  *
1153  *	Returns TRUE if the call was
1154  *	already on a queue.
1155  */
1156 boolean_t
thread_call_enter_delayed(thread_call_t call,uint64_t deadline)1157 thread_call_enter_delayed(
1158 	thread_call_t           call,
1159 	uint64_t                deadline)
1160 {
1161 	assert(call != NULL);
1162 	return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
1163 }
1164 
1165 boolean_t
thread_call_enter1_delayed(thread_call_t call,thread_call_param_t param1,uint64_t deadline)1166 thread_call_enter1_delayed(
1167 	thread_call_t                   call,
1168 	thread_call_param_t             param1,
1169 	uint64_t                        deadline)
1170 {
1171 	assert(call != NULL);
1172 	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
1173 }
1174 
1175 boolean_t
thread_call_enter_delayed_with_leeway(thread_call_t call,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1176 thread_call_enter_delayed_with_leeway(
1177 	thread_call_t           call,
1178 	thread_call_param_t     param1,
1179 	uint64_t                deadline,
1180 	uint64_t                leeway,
1181 	unsigned int            flags)
1182 {
1183 	assert(call != NULL);
1184 	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
1185 }
1186 
1187 
1188 /*
1189  * thread_call_enter_delayed_internal:
1190  * enqueue a callout entry to occur at the stated time
1191  *
1192  * Returns True if the call was already on a queue
1193  * params:
1194  * call     - structure encapsulating state of the callout
1195  * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1196  * deadline - time deadline in nanoseconds
1197  * leeway   - timer slack represented as delta of deadline.
1198  * flags    - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1199  *            THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1200  *            THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1201  *                                                                        than mach_absolute_time
1202  */
1203 boolean_t
thread_call_enter_delayed_internal(thread_call_t call,thread_call_func_t alt_func,thread_call_param_t alt_param0,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1204 thread_call_enter_delayed_internal(
1205 	thread_call_t           call,
1206 	thread_call_func_t      alt_func,
1207 	thread_call_param_t     alt_param0,
1208 	thread_call_param_t     param1,
1209 	uint64_t                deadline,
1210 	uint64_t                leeway,
1211 	unsigned int            flags)
1212 {
1213 	uint64_t                now, sdeadline;
1214 
1215 	thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1216 
1217 	/* direct mapping between thread_call, timer_call, and timeout_urgency values */
1218 	uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
1219 
1220 	if (call == NULL) {
1221 		/* allocate a structure out of internal storage, as a convenience for BSD callers */
1222 		call = _internal_call_allocate(alt_func, alt_param0);
1223 	}
1224 
1225 	assert(call->tc_func != NULL);
1226 	thread_call_group_t group = thread_call_get_group(call);
1227 
1228 	spl_t s = disable_ints_and_lock(group);
1229 
1230 	/*
1231 	 * kevent and IOTES let you change flavor for an existing timer, so we have to
1232 	 * support flipping flavors for enqueued thread calls.
1233 	 */
1234 	if (flavor == TCF_CONTINUOUS) {
1235 		now = mach_continuous_time();
1236 	} else {
1237 		now = mach_absolute_time();
1238 	}
1239 
1240 	call->tc_flags |= THREAD_CALL_DELAYED;
1241 
1242 	call->tc_soft_deadline = sdeadline = deadline;
1243 
1244 	boolean_t ratelimited = FALSE;
1245 	uint64_t slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1246 
1247 	if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
1248 		slop = leeway;
1249 	}
1250 
1251 	if (UINT64_MAX - deadline <= slop) {
1252 		deadline = UINT64_MAX;
1253 	} else {
1254 		deadline += slop;
1255 	}
1256 
1257 	if (ratelimited) {
1258 		call->tc_flags |= THREAD_CALL_RATELIMITED;
1259 	} else {
1260 		call->tc_flags &= ~THREAD_CALL_RATELIMITED;
1261 	}
1262 
1263 	call->tc_param1 = param1;
1264 
1265 	call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1266 
1267 	bool result = _delayed_call_enqueue(call, group, deadline, flavor);
1268 
1269 	_arm_delayed_call_timer(call, group, flavor);
1270 
1271 #if CONFIG_DTRACE
1272 	DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
1273 	    uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1274 	    (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1275 #endif
1276 
1277 	enable_ints_and_unlock(group, s);
1278 
1279 	return result;
1280 }
1281 
1282 /*
1283  * Remove a callout entry from the queue
1284  * Called with thread_call_lock held
1285  */
1286 static bool
thread_call_cancel_locked(thread_call_t call)1287 thread_call_cancel_locked(thread_call_t call)
1288 {
1289 	bool canceled;
1290 
1291 	if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
1292 		call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1293 		canceled = true;
1294 
1295 		/* if reschedule was set, it must not have been queued */
1296 		assert(call->tc_queue == NULL);
1297 	} else {
1298 		bool queue_head_changed = false;
1299 
1300 		thread_call_flavor_t flavor = thread_call_get_flavor(call);
1301 		thread_call_group_t  group  = thread_call_get_group(call);
1302 
1303 		if (call->tc_pqlink.deadline != 0 &&
1304 		    call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
1305 			assert(call->tc_queue == &group->delayed_queues[flavor]);
1306 			queue_head_changed = true;
1307 		}
1308 
1309 		canceled = _call_dequeue(call, group);
1310 
1311 		if (queue_head_changed) {
1312 			if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
1313 				timer_call_cancel(&group->delayed_timers[flavor]);
1314 			}
1315 		}
1316 	}
1317 
1318 #if CONFIG_DTRACE
1319 	DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
1320 	    0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1321 #endif
1322 
1323 	return canceled;
1324 }
1325 
1326 /*
1327  *	thread_call_cancel:
1328  *
1329  *	Dequeue a callout entry.
1330  *
1331  *	Returns TRUE if the call was
1332  *	on a queue.
1333  */
1334 boolean_t
thread_call_cancel(thread_call_t call)1335 thread_call_cancel(thread_call_t call)
1336 {
1337 	thread_call_group_t group = thread_call_get_group(call);
1338 
1339 	spl_t s = disable_ints_and_lock(group);
1340 
1341 	boolean_t result = thread_call_cancel_locked(call);
1342 
1343 	enable_ints_and_unlock(group, s);
1344 
1345 	return result;
1346 }
1347 
1348 /*
1349  * Cancel a thread call.  If it cannot be cancelled (i.e.
1350  * is already in flight), waits for the most recent invocation
1351  * to finish.  Note that if clients re-submit this thread call,
1352  * it may still be pending or in flight when thread_call_cancel_wait
1353  * returns, but all requests to execute this work item prior
1354  * to the call to thread_call_cancel_wait will have finished.
1355  */
1356 boolean_t
thread_call_cancel_wait(thread_call_t call)1357 thread_call_cancel_wait(thread_call_t call)
1358 {
1359 	thread_call_group_t group = thread_call_get_group(call);
1360 
1361 	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1362 		panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1363 	}
1364 
1365 	if (!ml_get_interrupts_enabled()) {
1366 		panic("unsafe thread_call_cancel_wait");
1367 	}
1368 
1369 	thread_t self = current_thread();
1370 
1371 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
1372 	    self->thc_state && self->thc_state->thc_call == call) {
1373 		panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1374 		    call, call->tc_func);
1375 	}
1376 
1377 	spl_t s = disable_ints_and_lock(group);
1378 
1379 	boolean_t canceled = thread_call_cancel_locked(call);
1380 
1381 	if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1382 		/*
1383 		 * A cancel-wait on a 'once' call will both cancel
1384 		 * the pending call and wait for the in-flight call
1385 		 */
1386 
1387 		thread_call_wait_once_locked(call, s);
1388 		/* thread call lock unlocked */
1389 	} else {
1390 		/*
1391 		 * A cancel-wait on a normal call will only wait for the in-flight calls
1392 		 * if it did not cancel the pending call.
1393 		 *
1394 		 * TODO: This seems less than useful - shouldn't it do the wait as well?
1395 		 */
1396 
1397 		if (canceled == FALSE) {
1398 			thread_call_wait_locked(call, s);
1399 			/* thread call lock unlocked */
1400 		} else {
1401 			enable_ints_and_unlock(group, s);
1402 		}
1403 	}
1404 
1405 	return canceled;
1406 }
1407 
1408 
1409 /*
1410  *	thread_call_wake:
1411  *
1412  *	Wake a call thread to service
1413  *	pending call entries.  May wake
1414  *	the daemon thread in order to
1415  *	create additional call threads.
1416  *
1417  *	Called with thread_call_lock held.
1418  *
1419  *	For high-priority group, only does wakeup/creation if there are no threads
1420  *	running.
1421  */
1422 static void
thread_call_wake(thread_call_group_t group)1423 thread_call_wake(
1424 	thread_call_group_t             group)
1425 {
1426 	/*
1427 	 * New behavior: use threads if you've got 'em.
1428 	 * Traditional behavior: wake only if no threads running.
1429 	 */
1430 	if (group_isparallel(group) || group->active_count == 0) {
1431 		if (group->idle_count) {
1432 			__assert_only kern_return_t kr;
1433 
1434 			kr = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
1435 			    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1436 			assert(kr == KERN_SUCCESS);
1437 
1438 			group->idle_count--;
1439 			group->active_count++;
1440 
1441 			if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1442 				if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1443 					group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
1444 				}
1445 			}
1446 		} else {
1447 			if (thread_call_group_should_add_thread(group) &&
1448 			    os_atomic_cmpxchg(&thread_call_daemon_awake,
1449 			    false, true, relaxed)) {
1450 				waitq_wakeup64_all(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake),
1451 				    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1452 			}
1453 		}
1454 	}
1455 }
1456 
1457 /*
1458  *	sched_call_thread:
1459  *
1460  *	Call out invoked by the scheduler.
1461  */
1462 static void
sched_call_thread(int type,thread_t thread)1463 sched_call_thread(
1464 	int                             type,
1465 	thread_t                thread)
1466 {
1467 	thread_call_group_t             group;
1468 
1469 	assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
1470 	assert(thread->thc_state != NULL);
1471 
1472 	group = thread->thc_state->thc_group;
1473 	assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1474 
1475 	thread_call_lock_spin(group);
1476 
1477 	switch (type) {
1478 	case SCHED_CALL_BLOCK:
1479 		assert(group->active_count);
1480 		--group->active_count;
1481 		group->blocked_count++;
1482 		if (group->pending_count > 0) {
1483 			thread_call_wake(group);
1484 		}
1485 		break;
1486 
1487 	case SCHED_CALL_UNBLOCK:
1488 		assert(group->blocked_count);
1489 		--group->blocked_count;
1490 		group->active_count++;
1491 		break;
1492 	}
1493 
1494 	thread_call_unlock(group);
1495 }
1496 
1497 /*
1498  * Interrupts disabled, lock held; returns the same way.
1499  * Only called on thread calls whose storage we own.  Wakes up
1500  * anyone who might be waiting on this work item and frees it
1501  * if the client has so requested.
1502  */
1503 static bool
thread_call_finish(thread_call_t call,thread_call_group_t group,spl_t * s)1504 thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1505 {
1506 	assert(thread_call_get_group(call) == group);
1507 
1508 	bool repend = false;
1509 	bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
1510 	bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
1511 
1512 	call->tc_finish_count++;
1513 
1514 	if (!signal && alloc) {
1515 		/* The thread call thread owns a ref until the call is finished */
1516 		if (call->tc_refs <= 0) {
1517 			panic("thread_call_finish: detected over-released thread call: %p", call);
1518 		}
1519 		call->tc_refs--;
1520 	}
1521 
1522 	thread_call_flags_t old_flags = call->tc_flags;
1523 	call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1524 
1525 	if ((!alloc || call->tc_refs != 0) &&
1526 	    (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
1527 		assert(old_flags & THREAD_CALL_ONCE);
1528 		thread_call_flavor_t flavor = thread_call_get_flavor(call);
1529 
1530 		if (old_flags & THREAD_CALL_DELAYED) {
1531 			uint64_t now = mach_absolute_time();
1532 			if (flavor == TCF_CONTINUOUS) {
1533 				now = absolutetime_to_continuoustime(now);
1534 			}
1535 			if (call->tc_soft_deadline <= now) {
1536 				/* The deadline has already expired, go straight to pending */
1537 				call->tc_flags &= ~(THREAD_CALL_DELAYED | THREAD_CALL_RATELIMITED);
1538 				call->tc_pqlink.deadline = 0;
1539 			}
1540 		}
1541 
1542 		if (call->tc_pqlink.deadline) {
1543 			_delayed_call_enqueue(call, group, call->tc_pqlink.deadline, flavor);
1544 			if (!signal) {
1545 				_arm_delayed_call_timer(call, group, flavor);
1546 			}
1547 		} else if (signal) {
1548 			call->tc_submit_count++;
1549 			repend = true;
1550 		} else {
1551 			_pending_call_enqueue(call, group, mach_absolute_time());
1552 		}
1553 	}
1554 
1555 	if (!signal && alloc && call->tc_refs == 0) {
1556 		if ((old_flags & THREAD_CALL_WAIT) != 0) {
1557 			panic("Someone waiting on a thread call that is scheduled for free: %p", call->tc_func);
1558 		}
1559 
1560 		assert(call->tc_finish_count == call->tc_submit_count);
1561 
1562 		enable_ints_and_unlock(group, *s);
1563 
1564 		zfree(thread_call_zone, call);
1565 
1566 		*s = disable_ints_and_lock(group);
1567 	}
1568 
1569 	if ((old_flags & THREAD_CALL_WAIT) != 0) {
1570 		/*
1571 		 * This may wake up a thread with a registered sched_call.
1572 		 * That call might need the group lock, so we drop the lock
1573 		 * to avoid deadlocking.
1574 		 *
1575 		 * We also must use a separate waitq from the idle waitq, as
1576 		 * this path goes waitq lock->thread lock->group lock, but
1577 		 * the idle wait goes group lock->waitq_lock->thread_lock.
1578 		 */
1579 		thread_call_unlock(group);
1580 
1581 		waitq_wakeup64_all(&group->waiters_waitq, CAST_EVENT64_T(call),
1582 		    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1583 
1584 		thread_call_lock_spin(group);
1585 		/* THREAD_CALL_SIGNAL call may have been freed */
1586 	}
1587 
1588 	return repend;
1589 }
1590 
1591 /*
1592  * thread_call_invoke
1593  *
1594  * Invoke the function provided for this thread call
1595  *
1596  * Note that the thread call object can be deallocated by the function if we do not control its storage.
1597  */
1598 static void __attribute__((noinline))
thread_call_invoke(thread_call_func_t func,thread_call_param_t param0,thread_call_param_t param1,__unused thread_call_t call)1599 thread_call_invoke(thread_call_func_t func,
1600     thread_call_param_t param0,
1601     thread_call_param_t param1,
1602     __unused thread_call_t call)
1603 {
1604 #if DEVELOPMENT || DEBUG
1605 	KERNEL_DEBUG_CONSTANT(
1606 		MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_START,
1607 		VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1608 #endif /* DEVELOPMENT || DEBUG */
1609 
1610 #if CONFIG_DTRACE
1611 	uint64_t tc_ttd = call->tc_ttd;
1612 	boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1613 	DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1614 	    (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1615 #endif
1616 
1617 	(*func)(param0, param1);
1618 
1619 #if CONFIG_DTRACE
1620 	DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1621 	    (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1622 #endif
1623 
1624 #if DEVELOPMENT || DEBUG
1625 	KERNEL_DEBUG_CONSTANT(
1626 		MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_END,
1627 		VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1628 #endif /* DEVELOPMENT || DEBUG */
1629 }
1630 
1631 /*
1632  *	thread_call_thread:
1633  */
1634 static void
thread_call_thread(thread_call_group_t group,wait_result_t wres)1635 thread_call_thread(
1636 	thread_call_group_t             group,
1637 	wait_result_t                   wres)
1638 {
1639 	thread_t self = current_thread();
1640 
1641 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1642 		(void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1643 	}
1644 
1645 	/*
1646 	 * A wakeup with THREAD_INTERRUPTED indicates that
1647 	 * we should terminate.
1648 	 */
1649 	if (wres == THREAD_INTERRUPTED) {
1650 		thread_terminate(self);
1651 
1652 		/* NOTREACHED */
1653 		panic("thread_terminate() returned?");
1654 	}
1655 
1656 	spl_t s = disable_ints_and_lock(group);
1657 
1658 	struct thread_call_thread_state thc_state = { .thc_group = group };
1659 	self->thc_state = &thc_state;
1660 
1661 	thread_sched_call(self, sched_call_thread);
1662 
1663 	while (group->pending_count > 0) {
1664 		thread_call_t call = qe_dequeue_head(&group->pending_queue,
1665 		    struct thread_call, tc_qlink);
1666 		assert(call != NULL);
1667 
1668 		group->pending_count--;
1669 		if (group->pending_count == 0) {
1670 			assert(queue_empty(&group->pending_queue));
1671 		}
1672 
1673 		thread_call_func_t  func   = call->tc_func;
1674 		thread_call_param_t param0 = call->tc_param0;
1675 		thread_call_param_t param1 = call->tc_param1;
1676 
1677 		call->tc_queue = NULL;
1678 
1679 		if (_is_internal_call(call)) {
1680 			_internal_call_release(call);
1681 		}
1682 
1683 		/*
1684 		 * Can only do wakeups for thread calls whose storage
1685 		 * we control.
1686 		 */
1687 		bool needs_finish = false;
1688 		if (call->tc_flags & THREAD_CALL_ALLOC) {
1689 			call->tc_refs++;        /* Delay free until we're done */
1690 		}
1691 		if (call->tc_flags & (THREAD_CALL_ALLOC | THREAD_CALL_ONCE)) {
1692 			/*
1693 			 * If THREAD_CALL_ONCE is used, and the timer wasn't
1694 			 * THREAD_CALL_ALLOC, then clients swear they will use
1695 			 * thread_call_cancel_wait() before destroying
1696 			 * the thread call.
1697 			 *
1698 			 * Else, the storage for the thread call might have
1699 			 * disappeared when thread_call_invoke() ran.
1700 			 */
1701 			needs_finish = true;
1702 			call->tc_flags |= THREAD_CALL_RUNNING;
1703 		}
1704 
1705 		thc_state.thc_call = call;
1706 		thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
1707 		thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
1708 		thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
1709 		thc_state.thc_func = func;
1710 		thc_state.thc_param0 = param0;
1711 		thc_state.thc_param1 = param1;
1712 		thc_state.thc_IOTES_invocation_timestamp = 0;
1713 
1714 		enable_ints_and_unlock(group, s);
1715 
1716 		thc_state.thc_call_start = mach_absolute_time();
1717 
1718 		thread_call_invoke(func, param0, param1, call);
1719 
1720 		thc_state.thc_call = NULL;
1721 
1722 		if (get_preemption_level() != 0) {
1723 			int pl = get_preemption_level();
1724 			panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1725 			    pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1726 		}
1727 
1728 		s = disable_ints_and_lock(group);
1729 
1730 		if (needs_finish) {
1731 			/* Release refcount, may free, may temporarily drop lock */
1732 			thread_call_finish(call, group, &s);
1733 		}
1734 	}
1735 
1736 	thread_sched_call(self, NULL);
1737 	group->active_count--;
1738 
1739 	if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1740 		ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1741 		if (self->callout_woken_from_platform_idle) {
1742 			ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1743 		}
1744 	}
1745 
1746 	self->callout_woken_from_icontext = FALSE;
1747 	self->callout_woken_from_platform_idle = FALSE;
1748 	self->callout_woke_thread = FALSE;
1749 
1750 	self->thc_state = NULL;
1751 
1752 	if (group_isparallel(group)) {
1753 		/*
1754 		 * For new style of thread group, thread always blocks.
1755 		 * If we have more than the target number of threads,
1756 		 * and this is the first to block, and it isn't active
1757 		 * already, set a timer for deallocating a thread if we
1758 		 * continue to have a surplus.
1759 		 */
1760 		group->idle_count++;
1761 
1762 		if (group->idle_count == 1) {
1763 			group->idle_timestamp = mach_absolute_time();
1764 		}
1765 
1766 		if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
1767 		    ((group->active_count + group->idle_count) > group->target_thread_count)) {
1768 			thread_call_start_deallocate_timer(group);
1769 		}
1770 
1771 		/* Wait for more work (or termination) */
1772 		wres = waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, 0);
1773 		if (wres != THREAD_WAITING) {
1774 			panic("kcall worker unable to assert wait?");
1775 		}
1776 
1777 		enable_ints_and_unlock(group, s);
1778 
1779 		thread_block_parameter((thread_continue_t)thread_call_thread, group);
1780 	} else {
1781 		if (group->idle_count < group->target_thread_count) {
1782 			group->idle_count++;
1783 
1784 			waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, 0); /* Interrupted means to exit */
1785 
1786 			enable_ints_and_unlock(group, s);
1787 
1788 			thread_block_parameter((thread_continue_t)thread_call_thread, group);
1789 			/* NOTREACHED */
1790 		}
1791 	}
1792 
1793 	enable_ints_and_unlock(group, s);
1794 
1795 	thread_terminate(self);
1796 	/* NOTREACHED */
1797 }
1798 
1799 void
thread_call_start_iotes_invocation(__assert_only thread_call_t call)1800 thread_call_start_iotes_invocation(__assert_only thread_call_t call)
1801 {
1802 	thread_t self = current_thread();
1803 
1804 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1805 		/* not a thread call thread, might be a workloop IOTES */
1806 		return;
1807 	}
1808 
1809 	assert(self->thc_state);
1810 	assert(self->thc_state->thc_call == call);
1811 
1812 	self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
1813 }
1814 
1815 
1816 /*
1817  *	thread_call_daemon: walk list of groups, allocating
1818  *	threads if appropriate (as determined by
1819  *	thread_call_group_should_add_thread()).
1820  */
1821 static void
thread_call_daemon_continue(__unused void * arg)1822 thread_call_daemon_continue(__unused void *arg)
1823 {
1824 	do {
1825 		os_atomic_store(&thread_call_daemon_awake, false, relaxed);
1826 
1827 		/* Starting at zero happens to be high-priority first. */
1828 		for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1829 			thread_call_group_t group = &thread_call_groups[i];
1830 
1831 			spl_t s = disable_ints_and_lock(group);
1832 
1833 			while (thread_call_group_should_add_thread(group)) {
1834 				group->active_count++;
1835 
1836 				enable_ints_and_unlock(group, s);
1837 
1838 				thread_call_thread_create(group);
1839 
1840 				s = disable_ints_and_lock(group);
1841 			}
1842 
1843 			enable_ints_and_unlock(group, s);
1844 		}
1845 	} while (os_atomic_load(&thread_call_daemon_awake, relaxed));
1846 
1847 	waitq_assert_wait64(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, 0);
1848 
1849 	if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
1850 		clear_wait(current_thread(), THREAD_AWAKENED);
1851 	}
1852 
1853 	thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1854 	/* NOTREACHED */
1855 }
1856 
1857 static void
thread_call_daemon(__unused void * arg)1858 thread_call_daemon(
1859 	__unused void    *arg)
1860 {
1861 	thread_t        self = current_thread();
1862 
1863 	self->options |= TH_OPT_VMPRIV;
1864 	vm_page_free_reserve(2);        /* XXX */
1865 
1866 	thread_set_thread_name(self, "thread_call_daemon");
1867 
1868 	thread_call_daemon_continue(NULL);
1869 	/* NOTREACHED */
1870 }
1871 
1872 /*
1873  * Schedule timer to deallocate a worker thread if we have a surplus
1874  * of threads (in excess of the group's target) and at least one thread
1875  * is idle the whole time.
1876  */
1877 static void
thread_call_start_deallocate_timer(thread_call_group_t group)1878 thread_call_start_deallocate_timer(thread_call_group_t group)
1879 {
1880 	__assert_only bool already_enqueued;
1881 
1882 	assert(group->idle_count > 0);
1883 	assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
1884 
1885 	group->tcg_flags |= TCG_DEALLOC_ACTIVE;
1886 
1887 	uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1888 
1889 	already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
1890 
1891 	assert(already_enqueued == false);
1892 }
1893 
1894 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1895 void
thread_call_delayed_timer(timer_call_param_t p0,timer_call_param_t p1)1896 thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1897 {
1898 	thread_call_group_t  group  = (thread_call_group_t)  p0;
1899 	thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1900 
1901 	thread_call_t   call;
1902 	uint64_t        now;
1903 
1904 	thread_call_lock_spin(group);
1905 
1906 	if (flavor == TCF_CONTINUOUS) {
1907 		now = mach_continuous_time();
1908 	} else if (flavor == TCF_ABSOLUTE) {
1909 		now = mach_absolute_time();
1910 	} else {
1911 		panic("invalid timer flavor: %d", flavor);
1912 	}
1913 
1914 	while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
1915 	    struct thread_call, tc_pqlink)) != NULL) {
1916 		assert(thread_call_get_group(call) == group);
1917 		assert(thread_call_get_flavor(call) == flavor);
1918 
1919 		/*
1920 		 * if we hit a call that isn't yet ready to expire,
1921 		 * then we're done for now
1922 		 * TODO: The next timer in the list could have a larger leeway
1923 		 *       and therefore be ready to expire.
1924 		 */
1925 		if (call->tc_soft_deadline > now) {
1926 			break;
1927 		}
1928 
1929 		/*
1930 		 * If we hit a rate-limited timer, don't eagerly wake it up.
1931 		 * Wait until it reaches the end of the leeway window.
1932 		 *
1933 		 * TODO: What if the next timer is not rate-limited?
1934 		 *       Have a separate rate-limited queue to avoid this
1935 		 */
1936 		if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1937 		    (call->tc_pqlink.deadline > now) &&
1938 		    (ml_timer_forced_evaluation() == FALSE)) {
1939 			break;
1940 		}
1941 
1942 		if (THREAD_CALL_SIGNAL & call->tc_flags) {
1943 			__assert_only queue_head_t *old_queue;
1944 			old_queue = thread_call_dequeue(call);
1945 			assert(old_queue == &group->delayed_queues[flavor]);
1946 
1947 			do {
1948 				thread_call_func_t  func   = call->tc_func;
1949 				thread_call_param_t param0 = call->tc_param0;
1950 				thread_call_param_t param1 = call->tc_param1;
1951 
1952 				call->tc_flags |= THREAD_CALL_RUNNING;
1953 
1954 				thread_call_unlock(group);
1955 				thread_call_invoke(func, param0, param1, call);
1956 				thread_call_lock_spin(group);
1957 
1958 				/* finish may detect that the call has been re-pended */
1959 			} while (thread_call_finish(call, group, NULL));
1960 			/* call may have been freed by the finish */
1961 		} else {
1962 			_pending_call_enqueue(call, group, now);
1963 		}
1964 	}
1965 
1966 	_arm_delayed_call_timer(call, group, flavor);
1967 
1968 	thread_call_unlock(group);
1969 }
1970 
1971 static void
thread_call_delayed_timer_rescan(thread_call_group_t group,thread_call_flavor_t flavor)1972 thread_call_delayed_timer_rescan(thread_call_group_t group,
1973     thread_call_flavor_t flavor)
1974 {
1975 	thread_call_t call;
1976 	uint64_t now;
1977 
1978 	spl_t s = disable_ints_and_lock(group);
1979 
1980 	assert(ml_timer_forced_evaluation() == TRUE);
1981 
1982 	if (flavor == TCF_CONTINUOUS) {
1983 		now = mach_continuous_time();
1984 	} else {
1985 		now = mach_absolute_time();
1986 	}
1987 
1988 	qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
1989 		if (call->tc_soft_deadline <= now) {
1990 			_pending_call_enqueue(call, group, now);
1991 		} else {
1992 			uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
1993 			assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
1994 			/*
1995 			 * On a latency quality-of-service level change,
1996 			 * re-sort potentially rate-limited callout. The platform
1997 			 * layer determines which timers require this.
1998 			 *
1999 			 * This trick works by updating the deadline value to
2000 			 * equal soft-deadline, effectively crushing away
2001 			 * timer coalescing slop values for any armed
2002 			 * timer in the queue.
2003 			 *
2004 			 * TODO: keep a hint on the timer to tell whether its inputs changed, so we
2005 			 * only have to crush coalescing for timers that need it.
2006 			 *
2007 			 * TODO: Keep a separate queue of timers above the re-sort
2008 			 * threshold, so we only have to look at those.
2009 			 */
2010 			if (timer_resort_threshold(skew)) {
2011 				_call_dequeue(call, group);
2012 				_delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
2013 			}
2014 		}
2015 	}
2016 
2017 	_arm_delayed_call_timer(NULL, group, flavor);
2018 
2019 	enable_ints_and_unlock(group, s);
2020 }
2021 
2022 void
thread_call_delayed_timer_rescan_all(void)2023 thread_call_delayed_timer_rescan_all(void)
2024 {
2025 	for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
2026 		for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
2027 			thread_call_delayed_timer_rescan(&thread_call_groups[i], flavor);
2028 		}
2029 	}
2030 }
2031 
2032 /*
2033  * Timer callback to tell a thread to terminate if
2034  * we have an excess of threads and at least one has been
2035  * idle for a long time.
2036  */
2037 static void
thread_call_dealloc_timer(timer_call_param_t p0,__unused timer_call_param_t p1)2038 thread_call_dealloc_timer(
2039 	timer_call_param_t              p0,
2040 	__unused timer_call_param_t     p1)
2041 {
2042 	thread_call_group_t group = (thread_call_group_t)p0;
2043 	uint64_t now;
2044 	kern_return_t res;
2045 	bool terminated = false;
2046 
2047 	thread_call_lock_spin(group);
2048 
2049 	assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
2050 
2051 	now = mach_absolute_time();
2052 
2053 	if (group->idle_count > 0) {
2054 		if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
2055 			terminated = true;
2056 			group->idle_count--;
2057 			res = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
2058 			    THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
2059 			if (res != KERN_SUCCESS) {
2060 				panic("Unable to wake up idle thread for termination?");
2061 			}
2062 		}
2063 	}
2064 
2065 	group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
2066 
2067 	/*
2068 	 * If we still have an excess of threads, schedule another
2069 	 * invocation of this function.
2070 	 */
2071 	if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
2072 		/*
2073 		 * If we killed someone just now, push out the
2074 		 * next deadline.
2075 		 */
2076 		if (terminated) {
2077 			group->idle_timestamp = now;
2078 		}
2079 
2080 		thread_call_start_deallocate_timer(group);
2081 	}
2082 
2083 	thread_call_unlock(group);
2084 }
2085 
2086 /*
2087  * Wait for the invocation of the thread call to complete
2088  * We know there's only one in flight because of the 'once' flag.
2089  *
2090  * If a subsequent invocation comes in before we wake up, that's OK
2091  *
2092  * TODO: Here is where we will add priority inheritance to the thread executing
2093  * the thread call in case it's lower priority than the current thread
2094  *      <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2095  *
2096  * Takes the thread call lock locked, returns unlocked
2097  *      This lets us avoid a spurious take/drop after waking up from thread_block
2098  *
2099  * This thread could be a thread call thread itself, blocking and therefore making a
2100  * sched_call upcall into the thread call subsystem, needing the group lock.
2101  * However, we're saved from deadlock because the 'block' upcall is made in
2102  * thread_block, not in assert_wait.
2103  */
2104 static bool
thread_call_wait_once_locked(thread_call_t call,spl_t s)2105 thread_call_wait_once_locked(thread_call_t call, spl_t s)
2106 {
2107 	assert(call->tc_flags & THREAD_CALL_ALLOC);
2108 	assert(call->tc_flags & THREAD_CALL_ONCE);
2109 
2110 	thread_call_group_t group = thread_call_get_group(call);
2111 
2112 	if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
2113 		enable_ints_and_unlock(group, s);
2114 		return false;
2115 	}
2116 
2117 	/* call is running, so we have to wait for it */
2118 	call->tc_flags |= THREAD_CALL_WAIT;
2119 
2120 	wait_result_t res = waitq_assert_wait64(&group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, 0);
2121 	if (res != THREAD_WAITING) {
2122 		panic("Unable to assert wait: %d", res);
2123 	}
2124 
2125 	enable_ints_and_unlock(group, s);
2126 
2127 	res = thread_block(THREAD_CONTINUE_NULL);
2128 	if (res != THREAD_AWAKENED) {
2129 		panic("Awoken with %d?", res);
2130 	}
2131 
2132 	/* returns unlocked */
2133 	return true;
2134 }
2135 
2136 /*
2137  * Wait for an in-flight invocation to complete
2138  * Does NOT try to cancel, so the client doesn't need to hold their
2139  * lock while calling this function.
2140  *
2141  * Returns whether or not it had to wait.
2142  *
2143  * Only works for THREAD_CALL_ONCE calls.
2144  */
2145 boolean_t
thread_call_wait_once(thread_call_t call)2146 thread_call_wait_once(thread_call_t call)
2147 {
2148 	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
2149 		panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
2150 	}
2151 
2152 	if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
2153 		panic("thread_call_wait_once: can't wait_once on a non-once call");
2154 	}
2155 
2156 	if (!ml_get_interrupts_enabled()) {
2157 		panic("unsafe thread_call_wait_once");
2158 	}
2159 
2160 	thread_t self = current_thread();
2161 
2162 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
2163 	    self->thc_state && self->thc_state->thc_call == call) {
2164 		panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2165 		    call, call->tc_func);
2166 	}
2167 
2168 	thread_call_group_t group = thread_call_get_group(call);
2169 
2170 	spl_t s = disable_ints_and_lock(group);
2171 
2172 	bool waited = thread_call_wait_once_locked(call, s);
2173 	/* thread call lock unlocked */
2174 
2175 	return waited;
2176 }
2177 
2178 
2179 /*
2180  * Wait for all requested invocations of a thread call prior to now
2181  * to finish.  Can only be invoked on thread calls whose storage we manage.
2182  * Just waits for the finish count to catch up to the submit count we find
2183  * at the beginning of our wait.
2184  *
2185  * Called with thread_call_lock held.  Returns with lock released.
2186  */
2187 static void
thread_call_wait_locked(thread_call_t call,spl_t s)2188 thread_call_wait_locked(thread_call_t call, spl_t s)
2189 {
2190 	thread_call_group_t group = thread_call_get_group(call);
2191 
2192 	assert(call->tc_flags & THREAD_CALL_ALLOC);
2193 
2194 	uint64_t submit_count = call->tc_submit_count;
2195 
2196 	while (call->tc_finish_count < submit_count) {
2197 		call->tc_flags |= THREAD_CALL_WAIT;
2198 
2199 		wait_result_t res = waitq_assert_wait64(&group->waiters_waitq,
2200 		    CAST_EVENT64_T(call), THREAD_UNINT, 0);
2201 
2202 		if (res != THREAD_WAITING) {
2203 			panic("Unable to assert wait: %d", res);
2204 		}
2205 
2206 		enable_ints_and_unlock(group, s);
2207 
2208 		res = thread_block(THREAD_CONTINUE_NULL);
2209 		if (res != THREAD_AWAKENED) {
2210 			panic("Awoken with %d?", res);
2211 		}
2212 
2213 		s = disable_ints_and_lock(group);
2214 	}
2215 
2216 	enable_ints_and_unlock(group, s);
2217 }
2218 
2219 /*
2220  * Determine whether a thread call is either on a queue or
2221  * currently being executed.
2222  */
2223 boolean_t
thread_call_isactive(thread_call_t call)2224 thread_call_isactive(thread_call_t call)
2225 {
2226 	thread_call_group_t group = thread_call_get_group(call);
2227 
2228 	spl_t s = disable_ints_and_lock(group);
2229 	boolean_t active = (call->tc_submit_count > call->tc_finish_count);
2230 	enable_ints_and_unlock(group, s);
2231 
2232 	return active;
2233 }
2234 
2235 /*
2236  * adjust_cont_time_thread_calls
2237  * on wake, reenqueue delayed call timer for continuous time thread call groups
2238  */
2239 void
adjust_cont_time_thread_calls(void)2240 adjust_cont_time_thread_calls(void)
2241 {
2242 	for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
2243 		thread_call_group_t group = &thread_call_groups[i];
2244 		spl_t s = disable_ints_and_lock(group);
2245 
2246 		/* only the continuous timers need to be re-armed */
2247 
2248 		_arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
2249 		enable_ints_and_unlock(group, s);
2250 	}
2251 }
2252