xref: /xnu-12377.41.6/osfmk/kern/thread_call.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/zalloc.h>
34 #include <kern/sched_prim.h>
35 #include <kern/clock.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/waitq.h>
39 #include <kern/ledger.h>
40 #include <kern/policy_internal.h>
41 
42 #include <vm/vm_pageout_xnu.h>
43 
44 #include <kern/thread_call.h>
45 #include <kern/timer_call.h>
46 
47 #include <libkern/OSAtomic.h>
48 #include <kern/timer_queue.h>
49 
50 #include <sys/kdebug.h>
51 #if CONFIG_DTRACE
52 #include <mach/sdt.h>
53 #endif
54 #include <machine/machine_routines.h>
55 
56 static KALLOC_TYPE_DEFINE(thread_call_zone, thread_call_data_t,
57     KT_PRIV_ACCT | KT_NOEARLY);
58 
59 typedef enum {
60 	TCF_ABSOLUTE    = 0,
61 	TCF_CONTINUOUS  = 1,
62 	TCF_COUNT       = 2,
63 } thread_call_flavor_t;
64 
65 __options_decl(thread_call_group_flags_t, uint32_t, {
66 	TCG_NONE                = 0x0,
67 	TCG_PARALLEL            = 0x1,
68 	TCG_DEALLOC_ACTIVE      = 0x2,
69 });
70 
71 static struct thread_call_group {
72 	__attribute__((aligned(128))) lck_ticket_t tcg_lock;
73 
74 	const char *            tcg_name;
75 
76 	queue_head_t            pending_queue;
77 	uint32_t                pending_count;
78 
79 	queue_head_t            delayed_queues[TCF_COUNT];
80 	struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
81 	timer_call_data_t       delayed_timers[TCF_COUNT];
82 
83 	timer_call_data_t       dealloc_timer;
84 
85 	struct waitq            idle_waitq;
86 	uint64_t                idle_timestamp;
87 	uint32_t                idle_count, active_count, blocked_count;
88 
89 	uint32_t                tcg_thread_pri;
90 	uint32_t                target_thread_count;
91 
92 	thread_call_group_flags_t tcg_flags;
93 
94 	struct waitq            waiters_waitq;
95 } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
96 	[THREAD_CALL_INDEX_INVALID] = {
97 		.tcg_name               = "invalid",
98 	},
99 	[THREAD_CALL_INDEX_HIGH] = {
100 		.tcg_name               = "high",
101 		.tcg_thread_pri         = BASEPRI_PREEMPT_HIGH,
102 		.target_thread_count    = 4,
103 		.tcg_flags              = TCG_NONE,
104 	},
105 	[THREAD_CALL_INDEX_KERNEL] = {
106 		.tcg_name               = "kernel",
107 		.tcg_thread_pri         = BASEPRI_KERNEL,
108 		.target_thread_count    = 1,
109 		.tcg_flags              = TCG_PARALLEL,
110 	},
111 	[THREAD_CALL_INDEX_USER] = {
112 		.tcg_name               = "user",
113 		.tcg_thread_pri         = BASEPRI_DEFAULT,
114 		.target_thread_count    = 1,
115 		.tcg_flags              = TCG_PARALLEL,
116 	},
117 	[THREAD_CALL_INDEX_LOW] = {
118 		.tcg_name               = "low",
119 		.tcg_thread_pri         = MAXPRI_THROTTLE,
120 		.target_thread_count    = 1,
121 		.tcg_flags              = TCG_PARALLEL,
122 	},
123 	[THREAD_CALL_INDEX_KERNEL_HIGH] = {
124 		.tcg_name               = "kernel-high",
125 		.tcg_thread_pri         = BASEPRI_PREEMPT,
126 		.target_thread_count    = 2,
127 		.tcg_flags              = TCG_NONE,
128 	},
129 	[THREAD_CALL_INDEX_QOS_UI] = {
130 		.tcg_name               = "qos-ui",
131 		.tcg_thread_pri         = BASEPRI_FOREGROUND,
132 		.target_thread_count    = 1,
133 		.tcg_flags              = TCG_NONE,
134 	},
135 	[THREAD_CALL_INDEX_QOS_IN] = {
136 		.tcg_name               = "qos-in",
137 		.tcg_thread_pri         = BASEPRI_USER_INITIATED,
138 		.target_thread_count    = 1,
139 		.tcg_flags              = TCG_NONE,
140 	},
141 	[THREAD_CALL_INDEX_QOS_UT] = {
142 		.tcg_name               = "qos-ut",
143 		.tcg_thread_pri         = BASEPRI_UTILITY,
144 		.target_thread_count    = 1,
145 		.tcg_flags              = TCG_NONE,
146 	},
147 };
148 
149 typedef struct thread_call_group        *thread_call_group_t;
150 
151 #define INTERNAL_CALL_COUNT             768
152 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
153 #define THREAD_CALL_ADD_RATIO           4
154 #define THREAD_CALL_MACH_FACTOR_CAP     3
155 #define THREAD_CALL_GROUP_MAX_THREADS   500
156 
157 struct thread_call_thread_state {
158 	struct thread_call_group * thc_group;
159 	struct thread_call *       thc_call;    /* debug only, may be deallocated */
160 	uint64_t thc_call_start;
161 	uint64_t thc_call_soft_deadline;
162 	uint64_t thc_call_hard_deadline;
163 	uint64_t thc_call_pending_timestamp;
164 	uint64_t thc_IOTES_invocation_timestamp;
165 	thread_call_func_t  thc_func;
166 	thread_call_param_t thc_param0;
167 	thread_call_param_t thc_param1;
168 };
169 
170 static bool                     thread_call_daemon_awake = true;
171 /*
172  * This special waitq exists because the daemon thread
173  * might need to be woken while already holding a global waitq locked.
174  */
175 static struct waitq             daemon_waitq;
176 
177 static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
178 static queue_head_t             thread_call_internal_queue;
179 int                                             thread_call_internal_queue_count = 0;
180 static uint64_t                 thread_call_dealloc_interval_abs;
181 
182 static void                     _internal_call_init(void);
183 
184 static thread_call_t            _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
185 static bool                     _is_internal_call(thread_call_t call);
186 static void                     _internal_call_release(thread_call_t call);
187 static bool                     _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
188 static bool                     _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
189     uint64_t deadline, thread_call_flavor_t flavor);
190 static bool                     _call_dequeue(thread_call_t call, thread_call_group_t group);
191 static void                     thread_call_wake(thread_call_group_t group);
192 static void                     thread_call_daemon(void *arg, wait_result_t w);
193 static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
194 static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
195 static void                     thread_call_group_setup(thread_call_group_t group);
196 static void                     sched_call_thread(int type, thread_t thread);
197 static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
198 static void                     thread_call_wait_locked(thread_call_t call, spl_t s);
199 static bool                     thread_call_wait_once_locked(thread_call_t call, spl_t s);
200 
201 static boolean_t                thread_call_enter_delayed_internal(thread_call_t call,
202     thread_call_func_t alt_func, thread_call_param_t alt_param0,
203     thread_call_param_t param1, uint64_t deadline,
204     uint64_t leeway, unsigned int flags);
205 
206 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
207 extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
208 
209 LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
210 
211 
212 static void
thread_call_lock_spin(thread_call_group_t group)213 thread_call_lock_spin(thread_call_group_t group)
214 {
215 	lck_ticket_lock(&group->tcg_lock, &thread_call_lck_grp);
216 }
217 
218 static void
thread_call_unlock(thread_call_group_t group)219 thread_call_unlock(thread_call_group_t group)
220 {
221 	lck_ticket_unlock(&group->tcg_lock);
222 }
223 
224 static void __assert_only
thread_call_assert_locked(thread_call_group_t group)225 thread_call_assert_locked(thread_call_group_t group)
226 {
227 	lck_ticket_assert_owned(&group->tcg_lock);
228 }
229 
230 
231 static spl_t
disable_ints_and_lock(thread_call_group_t group)232 disable_ints_and_lock(thread_call_group_t group)
233 {
234 	spl_t s = splsched();
235 	thread_call_lock_spin(group);
236 
237 	return s;
238 }
239 
240 static void
enable_ints_and_unlock(thread_call_group_t group,spl_t s)241 enable_ints_and_unlock(thread_call_group_t group, spl_t s)
242 {
243 	thread_call_unlock(group);
244 	splx(s);
245 }
246 
247 static thread_call_group_t
thread_call_get_group(thread_call_t call)248 thread_call_get_group(thread_call_t call)
249 {
250 	thread_call_index_t index = call->tc_index;
251 	thread_call_flags_t flags = call->tc_flags;
252 	thread_call_func_t  func  = call->tc_func;
253 
254 	if (index == THREAD_CALL_INDEX_INVALID || index >= THREAD_CALL_INDEX_MAX) {
255 		panic("(%p %p) invalid thread call index: %d", call, func, index);
256 	}
257 
258 	if (func == NULL || !(flags & THREAD_CALL_INITIALIZED)) {
259 		panic("(%p %p) uninitialized thread call", call, func);
260 	}
261 
262 	if (flags & THREAD_CALL_ALLOC) {
263 		kalloc_type_require(thread_call_data_t, call);
264 	}
265 
266 	return &thread_call_groups[index];
267 }
268 
269 /* Lock held */
270 static thread_call_flavor_t
thread_call_get_flavor(thread_call_t call)271 thread_call_get_flavor(thread_call_t call)
272 {
273 	return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
274 }
275 
276 /* Lock held */
277 static thread_call_flavor_t
thread_call_set_flavor(thread_call_t call,thread_call_flavor_t flavor)278 thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
279 {
280 	assert(flavor == TCF_CONTINUOUS || flavor == TCF_ABSOLUTE);
281 	thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
282 
283 	if (old_flavor != flavor) {
284 		if (flavor == TCF_CONTINUOUS) {
285 			call->tc_flags |= THREAD_CALL_FLAG_CONTINUOUS;
286 		} else {
287 			call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
288 		}
289 	}
290 
291 	return old_flavor;
292 }
293 
294 /* returns true if it was on a queue */
295 static bool
thread_call_enqueue_tail(thread_call_t call,queue_t new_queue)296 thread_call_enqueue_tail(
297 	thread_call_t           call,
298 	queue_t                 new_queue)
299 {
300 	queue_t                 old_queue = call->tc_queue;
301 
302 	thread_call_group_t     group = thread_call_get_group(call);
303 	thread_call_flavor_t    flavor = thread_call_get_flavor(call);
304 
305 	if (old_queue != NULL &&
306 	    old_queue != &group->delayed_queues[flavor]) {
307 		panic("thread call (%p %p) on bad queue (old_queue: %p)",
308 		    call, call->tc_func, old_queue);
309 	}
310 
311 	if (old_queue == &group->delayed_queues[flavor]) {
312 		priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
313 	}
314 
315 	if (old_queue == NULL) {
316 		enqueue_tail(new_queue, &call->tc_qlink);
317 	} else {
318 		re_queue_tail(new_queue, &call->tc_qlink);
319 	}
320 
321 	call->tc_queue = new_queue;
322 
323 	return old_queue != NULL;
324 }
325 
326 static queue_head_t *
thread_call_dequeue(thread_call_t call)327 thread_call_dequeue(
328 	thread_call_t            call)
329 {
330 	queue_t                 old_queue = call->tc_queue;
331 
332 	thread_call_group_t     group = thread_call_get_group(call);
333 	thread_call_flavor_t    flavor = thread_call_get_flavor(call);
334 
335 	if (old_queue != NULL &&
336 	    old_queue != &group->pending_queue &&
337 	    old_queue != &group->delayed_queues[flavor]) {
338 		panic("thread call (%p %p) on bad queue (old_queue: %p)",
339 		    call, call->tc_func, old_queue);
340 	}
341 
342 	if (old_queue == &group->delayed_queues[flavor]) {
343 		priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
344 	}
345 
346 	if (old_queue != NULL) {
347 		remqueue(&call->tc_qlink);
348 
349 		call->tc_queue = NULL;
350 	}
351 	return old_queue;
352 }
353 
354 static queue_head_t *
thread_call_enqueue_deadline(thread_call_t call,thread_call_group_t group,thread_call_flavor_t flavor,uint64_t deadline)355 thread_call_enqueue_deadline(
356 	thread_call_t           call,
357 	thread_call_group_t     group,
358 	thread_call_flavor_t    flavor,
359 	uint64_t                deadline)
360 {
361 	queue_t old_queue = call->tc_queue;
362 	queue_t new_queue = &group->delayed_queues[flavor];
363 
364 	thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
365 
366 	if (old_queue != NULL &&
367 	    old_queue != &group->pending_queue &&
368 	    old_queue != &group->delayed_queues[old_flavor]) {
369 		panic("thread call (%p %p) on bad queue (old_queue: %p)",
370 		    call, call->tc_func, old_queue);
371 	}
372 
373 	if (old_queue == new_queue) {
374 		/* optimize the same-queue case to avoid a full re-insert */
375 		uint64_t old_deadline = call->tc_pqlink.deadline;
376 		call->tc_pqlink.deadline = deadline;
377 
378 		if (old_deadline < deadline) {
379 			priority_queue_entry_increased(&group->delayed_pqueues[flavor],
380 			    &call->tc_pqlink);
381 		} else {
382 			priority_queue_entry_decreased(&group->delayed_pqueues[flavor],
383 			    &call->tc_pqlink);
384 		}
385 	} else {
386 		if (old_queue == &group->delayed_queues[old_flavor]) {
387 			priority_queue_remove(&group->delayed_pqueues[old_flavor],
388 			    &call->tc_pqlink);
389 		}
390 
391 		call->tc_pqlink.deadline = deadline;
392 
393 		priority_queue_insert(&group->delayed_pqueues[flavor], &call->tc_pqlink);
394 	}
395 
396 	if (old_queue == NULL) {
397 		enqueue_tail(new_queue, &call->tc_qlink);
398 	} else if (old_queue != new_queue) {
399 		re_queue_tail(new_queue, &call->tc_qlink);
400 	}
401 
402 	call->tc_queue = new_queue;
403 
404 	return old_queue;
405 }
406 
407 uint64_t
thread_call_get_armed_deadline(thread_call_t call)408 thread_call_get_armed_deadline(thread_call_t call)
409 {
410 	return call->tc_pqlink.deadline;
411 }
412 
413 
414 static bool
group_isparallel(thread_call_group_t group)415 group_isparallel(thread_call_group_t group)
416 {
417 	return (group->tcg_flags & TCG_PARALLEL) != 0;
418 }
419 
420 static bool
thread_call_group_should_add_thread(thread_call_group_t group)421 thread_call_group_should_add_thread(thread_call_group_t group)
422 {
423 	if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
424 		panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
425 		    group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
426 		    group->active_count, group->blocked_count, group->idle_count);
427 	}
428 
429 	if (group_isparallel(group) == false) {
430 		if (group->pending_count > 0 && group->active_count == 0) {
431 			return true;
432 		}
433 
434 		return false;
435 	}
436 
437 	if (group->pending_count > 0) {
438 		if (group->idle_count > 0) {
439 			return false;
440 		}
441 
442 		uint32_t thread_count = group->active_count;
443 
444 		/*
445 		 * Add a thread if either there are no threads,
446 		 * the group has fewer than its target number of
447 		 * threads, or the amount of work is large relative
448 		 * to the number of threads.  In the last case, pay attention
449 		 * to the total load on the system, and back off if
450 		 * it's high.
451 		 */
452 		if ((thread_count == 0) ||
453 		    (thread_count < group->target_thread_count) ||
454 		    ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
455 		    (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
456 			return true;
457 		}
458 	}
459 
460 	return false;
461 }
462 
463 static void
thread_call_group_setup(thread_call_group_t group)464 thread_call_group_setup(thread_call_group_t group)
465 {
466 	lck_ticket_init(&group->tcg_lock, &thread_call_lck_grp);
467 
468 	queue_init(&group->pending_queue);
469 
470 	for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
471 		queue_init(&group->delayed_queues[flavor]);
472 		priority_queue_init(&group->delayed_pqueues[flavor]);
473 		timer_call_setup(&group->delayed_timers[flavor], thread_call_delayed_timer, group);
474 	}
475 
476 	timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
477 
478 	waitq_init(&group->waiters_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
479 
480 	/* Reverse the wait order so we re-use the most recently parked thread from the pool */
481 	waitq_init(&group->idle_waitq, WQT_QUEUE, SYNC_POLICY_REVERSED);
482 }
483 
484 /*
485  * Simple wrapper for creating threads bound to
486  * thread call groups.
487  */
488 static void
thread_call_thread_create(thread_call_group_t group)489 thread_call_thread_create(
490 	thread_call_group_t             group)
491 {
492 	thread_t thread;
493 	kern_return_t result;
494 
495 	int thread_pri = group->tcg_thread_pri;
496 
497 	result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
498 	    group, thread_pri, &thread);
499 	if (result != KERN_SUCCESS) {
500 		panic("cannot create new thread call thread %d", result);
501 	}
502 
503 	if (thread_pri <= BASEPRI_KERNEL) {
504 		/*
505 		 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
506 		 * in kernel if there are higher priority threads available.
507 		 */
508 		thread_set_eager_preempt(thread);
509 	}
510 
511 	char name[MAXTHREADNAMESIZE] = "";
512 
513 	int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
514 
515 	snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
516 	thread_set_thread_name(thread, name);
517 
518 	thread_deallocate(thread);
519 }
520 
521 /*
522  *	thread_call_initialize:
523  *
524  *	Initialize this module, called
525  *	early during system initialization.
526  */
527 __startup_func
528 static void
thread_call_initialize(void)529 thread_call_initialize(void)
530 {
531 	nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
532 	waitq_init(&daemon_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
533 
534 	for (uint32_t i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
535 		thread_call_group_setup(&thread_call_groups[i]);
536 	}
537 
538 	_internal_call_init();
539 
540 	thread_t thread;
541 	kern_return_t result;
542 
543 	result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
544 	    NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
545 	if (result != KERN_SUCCESS) {
546 		panic("thread_call_initialize failed (%d)", result);
547 	}
548 
549 	thread_deallocate(thread);
550 }
551 STARTUP(THREAD_CALL, STARTUP_RANK_FIRST, thread_call_initialize);
552 
553 void
thread_call_setup_with_options(thread_call_t call,thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)554 thread_call_setup_with_options(
555 	thread_call_t                   call,
556 	thread_call_func_t              func,
557 	thread_call_param_t             param0,
558 	thread_call_priority_t          pri,
559 	thread_call_options_t           options)
560 {
561 	if (func == NULL) {
562 		panic("initializing thread call with NULL func");
563 	}
564 
565 	bzero(call, sizeof(*call));
566 
567 	*call = (struct thread_call) {
568 		.tc_func = func,
569 		.tc_param0 = param0,
570 		.tc_flags = THREAD_CALL_INITIALIZED,
571 	};
572 
573 	switch (pri) {
574 	case THREAD_CALL_PRIORITY_HIGH:
575 		call->tc_index = THREAD_CALL_INDEX_HIGH;
576 		break;
577 	case THREAD_CALL_PRIORITY_KERNEL:
578 		call->tc_index = THREAD_CALL_INDEX_KERNEL;
579 		break;
580 	case THREAD_CALL_PRIORITY_USER:
581 		call->tc_index = THREAD_CALL_INDEX_USER;
582 		break;
583 	case THREAD_CALL_PRIORITY_LOW:
584 		call->tc_index = THREAD_CALL_INDEX_LOW;
585 		break;
586 	case THREAD_CALL_PRIORITY_KERNEL_HIGH:
587 		call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
588 		break;
589 	default:
590 		panic("Invalid thread call pri value: %d", pri);
591 		break;
592 	}
593 
594 	if (options & THREAD_CALL_OPTIONS_ONCE) {
595 		call->tc_flags |= THREAD_CALL_ONCE;
596 	}
597 	if (options & THREAD_CALL_OPTIONS_SIGNAL) {
598 		call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
599 	}
600 }
601 
602 void
thread_call_setup(thread_call_t call,thread_call_func_t func,thread_call_param_t param0)603 thread_call_setup(
604 	thread_call_t                   call,
605 	thread_call_func_t              func,
606 	thread_call_param_t             param0)
607 {
608 	thread_call_setup_with_options(call, func, param0,
609 	    THREAD_CALL_PRIORITY_HIGH, 0);
610 }
611 
612 static void
_internal_call_init(void)613 _internal_call_init(void)
614 {
615 	/* Function-only thread calls are only kept in the default HIGH group */
616 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
617 
618 	spl_t s = disable_ints_and_lock(group);
619 
620 	queue_init(&thread_call_internal_queue);
621 
622 	for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
623 		enqueue_tail(&thread_call_internal_queue, &internal_call_storage[i].tc_qlink);
624 		thread_call_internal_queue_count++;
625 	}
626 
627 	enable_ints_and_unlock(group, s);
628 }
629 
630 /*
631  *	_internal_call_allocate:
632  *
633  *	Allocate an internal callout entry.
634  *
635  *	Called with thread_call_lock held.
636  */
637 static thread_call_t
_internal_call_allocate(thread_call_func_t func,thread_call_param_t param0)638 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
639 {
640 	/* Function-only thread calls are only kept in the default HIGH group */
641 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
642 
643 	spl_t s = disable_ints_and_lock(group);
644 
645 	thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
646 	    struct thread_call, tc_qlink);
647 
648 	if (call == NULL) {
649 		panic("_internal_call_allocate: thread_call_internal_queue empty");
650 	}
651 
652 	thread_call_internal_queue_count--;
653 
654 	thread_call_setup(call, func, param0);
655 	/* THREAD_CALL_ALLOC not set, do not free back to zone */
656 	assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
657 	enable_ints_and_unlock(group, s);
658 
659 	return call;
660 }
661 
662 /* Check if a call is internal and needs to be returned to the internal pool. */
663 static bool
_is_internal_call(thread_call_t call)664 _is_internal_call(thread_call_t call)
665 {
666 	if (call >= internal_call_storage &&
667 	    call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
668 		assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
669 		return true;
670 	}
671 	return false;
672 }
673 
674 /*
675  *	_internal_call_release:
676  *
677  *	Release an internal callout entry which
678  *	is no longer pending (or delayed).
679  *
680  *      Called with thread_call_lock held.
681  */
682 static void
_internal_call_release(thread_call_t call)683 _internal_call_release(thread_call_t call)
684 {
685 	assert(_is_internal_call(call));
686 
687 	thread_call_group_t group = thread_call_get_group(call);
688 
689 	assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
690 	thread_call_assert_locked(group);
691 
692 	call->tc_flags &= ~THREAD_CALL_INITIALIZED;
693 
694 	enqueue_head(&thread_call_internal_queue, &call->tc_qlink);
695 	thread_call_internal_queue_count++;
696 }
697 
698 /*
699  *	_pending_call_enqueue:
700  *
701  *	Place an entry at the end of the
702  *	pending queue, to be executed soon.
703  *
704  *	Returns TRUE if the entry was already
705  *	on a queue.
706  *
707  *	Called with thread_call_lock held.
708  */
709 static bool
_pending_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t now)710 _pending_call_enqueue(thread_call_t call,
711     thread_call_group_t group,
712     uint64_t now)
713 {
714 	if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
715 	    == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
716 		call->tc_pqlink.deadline = 0;
717 
718 		thread_call_flags_t flags = call->tc_flags;
719 		call->tc_flags |= THREAD_CALL_RESCHEDULE;
720 
721 		assert(call->tc_queue == NULL);
722 
723 		return flags & THREAD_CALL_RESCHEDULE;
724 	}
725 
726 	call->tc_pending_timestamp = now;
727 
728 	bool was_on_queue = thread_call_enqueue_tail(call, &group->pending_queue);
729 
730 	if (!was_on_queue) {
731 		call->tc_submit_count++;
732 	}
733 
734 	group->pending_count++;
735 
736 	thread_call_wake(group);
737 
738 	return was_on_queue;
739 }
740 
741 /*
742  *	_delayed_call_enqueue:
743  *
744  *	Place an entry on the delayed queue,
745  *	after existing entries with an earlier
746  *      (or identical) deadline.
747  *
748  *	Returns TRUE if the entry was already
749  *	on a queue.
750  *
751  *	Called with thread_call_lock held.
752  */
753 static bool
_delayed_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t deadline,thread_call_flavor_t flavor)754 _delayed_call_enqueue(
755 	thread_call_t           call,
756 	thread_call_group_t     group,
757 	uint64_t                deadline,
758 	thread_call_flavor_t    flavor)
759 {
760 	if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
761 	    == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
762 		call->tc_pqlink.deadline = deadline;
763 
764 		thread_call_flags_t flags = call->tc_flags;
765 		call->tc_flags |= THREAD_CALL_RESCHEDULE;
766 
767 		assert(call->tc_queue == NULL);
768 		thread_call_set_flavor(call, flavor);
769 
770 		return flags & THREAD_CALL_RESCHEDULE;
771 	}
772 
773 	queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
774 
775 	if (old_queue == &group->pending_queue) {
776 		group->pending_count--;
777 	} else if (old_queue == NULL) {
778 		call->tc_submit_count++;
779 	}
780 
781 	return old_queue != NULL;
782 }
783 
784 /*
785  *	_call_dequeue:
786  *
787  *	Remove an entry from a queue.
788  *
789  *	Returns TRUE if the entry was on a queue.
790  *
791  *	Called with thread_call_lock held.
792  */
793 static bool
_call_dequeue(thread_call_t call,thread_call_group_t group)794 _call_dequeue(
795 	thread_call_t           call,
796 	thread_call_group_t     group)
797 {
798 	queue_head_t *old_queue = thread_call_dequeue(call);
799 
800 	if (old_queue == NULL) {
801 		return false;
802 	}
803 
804 	call->tc_finish_count++;
805 
806 	if (old_queue == &group->pending_queue) {
807 		group->pending_count--;
808 	}
809 
810 	return true;
811 }
812 
813 /*
814  * _arm_delayed_call_timer:
815  *
816  * Check if the timer needs to be armed for this flavor,
817  * and if so, arm it.
818  *
819  * If call is non-NULL, only re-arm the timer if the specified call
820  * is the first in the queue.
821  *
822  * Returns true if the timer was armed/re-armed, false if it was left unset
823  * Caller should cancel the timer if need be.
824  *
825  * Called with thread_call_lock held.
826  */
827 static bool
_arm_delayed_call_timer(thread_call_t new_call,thread_call_group_t group,thread_call_flavor_t flavor)828 _arm_delayed_call_timer(thread_call_t           new_call,
829     thread_call_group_t     group,
830     thread_call_flavor_t    flavor)
831 {
832 	/* No calls implies no timer needed */
833 	if (queue_empty(&group->delayed_queues[flavor])) {
834 		return false;
835 	}
836 
837 	thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
838 
839 	/* We only need to change the hard timer if this new call is the first in the list */
840 	if (new_call != NULL && new_call != call) {
841 		return false;
842 	}
843 
844 	assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
845 
846 	uint64_t fire_at = call->tc_soft_deadline;
847 
848 	if (flavor == TCF_CONTINUOUS) {
849 		assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
850 		fire_at = continuoustime_to_absolutetime(fire_at);
851 	} else {
852 		assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
853 	}
854 
855 	/*
856 	 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
857 	 * which does not take into account later-deadline timers with a larger leeway.
858 	 * This is a valid coalescing behavior, but masks a possible window to
859 	 * fire a timer instead of going idle.
860 	 */
861 	uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
862 
863 	timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
864 	    fire_at, leeway,
865 	    TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LEEWAY,
866 	    ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
867 
868 	return true;
869 }
870 
871 /*
872  *	_cancel_func_from_queue:
873  *
874  *	Remove the first (or all) matching
875  *	entries from the specified queue.
876  *
877  *	Returns TRUE if any matching entries
878  *	were found.
879  *
880  *	Called with thread_call_lock held.
881  */
882 static boolean_t
_cancel_func_from_queue(thread_call_func_t func,thread_call_param_t param0,thread_call_group_t group,boolean_t remove_all,queue_head_t * queue)883 _cancel_func_from_queue(thread_call_func_t      func,
884     thread_call_param_t     param0,
885     thread_call_group_t     group,
886     boolean_t               remove_all,
887     queue_head_t            *queue)
888 {
889 	boolean_t call_removed = FALSE;
890 	thread_call_t call;
891 
892 	qe_foreach_element_safe(call, queue, tc_qlink) {
893 		if (call->tc_func != func ||
894 		    call->tc_param0 != param0) {
895 			continue;
896 		}
897 
898 		_call_dequeue(call, group);
899 
900 		if (_is_internal_call(call)) {
901 			_internal_call_release(call);
902 		}
903 
904 		call_removed = TRUE;
905 		if (!remove_all) {
906 			break;
907 		}
908 	}
909 
910 	return call_removed;
911 }
912 
913 /*
914  *	thread_call_func_delayed:
915  *
916  *	Enqueue a function callout to
917  *	occur at the stated time.
918  */
919 void
thread_call_func_delayed(thread_call_func_t func,thread_call_param_t param,uint64_t deadline)920 thread_call_func_delayed(
921 	thread_call_func_t              func,
922 	thread_call_param_t             param,
923 	uint64_t                        deadline)
924 {
925 	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
926 }
927 
928 /*
929  * thread_call_func_delayed_with_leeway:
930  *
931  * Same as thread_call_func_delayed(), but with
932  * leeway/flags threaded through.
933  */
934 
935 void
thread_call_func_delayed_with_leeway(thread_call_func_t func,thread_call_param_t param,uint64_t deadline,uint64_t leeway,uint32_t flags)936 thread_call_func_delayed_with_leeway(
937 	thread_call_func_t              func,
938 	thread_call_param_t             param,
939 	uint64_t                deadline,
940 	uint64_t                leeway,
941 	uint32_t                flags)
942 {
943 	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
944 }
945 
946 /*
947  *	thread_call_func_cancel:
948  *
949  *	Dequeue a function callout.
950  *
951  *	Removes one (or all) { function, argument }
952  *	instance(s) from either (or both)
953  *	the pending and	the delayed queue,
954  *	in that order.
955  *
956  *	Returns TRUE if any calls were cancelled.
957  *
958  *	This iterates all of the pending or delayed thread calls in the group,
959  *	which is really inefficient.  Switch to an allocated thread call instead.
960  *
961  *	TODO: Give 'func' thread calls their own group, so this silliness doesn't
962  *	affect the main 'high' group.
963  */
964 boolean_t
thread_call_func_cancel(thread_call_func_t func,thread_call_param_t param,boolean_t cancel_all)965 thread_call_func_cancel(
966 	thread_call_func_t              func,
967 	thread_call_param_t             param,
968 	boolean_t                       cancel_all)
969 {
970 	boolean_t       result;
971 
972 	if (func == NULL) {
973 		panic("trying to cancel NULL func");
974 	}
975 
976 	/* Function-only thread calls are only kept in the default HIGH group */
977 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
978 
979 	spl_t s = disable_ints_and_lock(group);
980 
981 	if (cancel_all) {
982 		/* exhaustively search every queue, and return true if any search found something */
983 		result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
984 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE])  |
985 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
986 	} else {
987 		/* early-exit as soon as we find something, don't search other queues */
988 		result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
989 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
990 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
991 	}
992 
993 	enable_ints_and_unlock(group, s);
994 
995 	return result;
996 }
997 
998 /*
999  * Allocate a thread call with a given priority.  Importances other than
1000  * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
1001  * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
1002  * threads which are not in the normal "urgent" bands).
1003  */
1004 thread_call_t
thread_call_allocate_with_priority(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri)1005 thread_call_allocate_with_priority(
1006 	thread_call_func_t              func,
1007 	thread_call_param_t             param0,
1008 	thread_call_priority_t          pri)
1009 {
1010 	return thread_call_allocate_with_options(func, param0, pri, 0);
1011 }
1012 
1013 thread_call_t
thread_call_allocate_with_options(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)1014 thread_call_allocate_with_options(
1015 	thread_call_func_t              func,
1016 	thread_call_param_t             param0,
1017 	thread_call_priority_t          pri,
1018 	thread_call_options_t           options)
1019 {
1020 	thread_call_t call = zalloc(thread_call_zone);
1021 
1022 	thread_call_setup_with_options(call, func, param0, pri, options);
1023 	call->tc_refs = 1;
1024 	call->tc_flags |= THREAD_CALL_ALLOC;
1025 
1026 	return call;
1027 }
1028 
1029 thread_call_t
thread_call_allocate_with_qos(thread_call_func_t func,thread_call_param_t param0,int qos_tier,thread_call_options_t options)1030 thread_call_allocate_with_qos(thread_call_func_t        func,
1031     thread_call_param_t       param0,
1032     int                       qos_tier,
1033     thread_call_options_t     options)
1034 {
1035 	thread_call_t call = thread_call_allocate(func, param0);
1036 
1037 	switch (qos_tier) {
1038 	case THREAD_QOS_UNSPECIFIED:
1039 		call->tc_index = THREAD_CALL_INDEX_HIGH;
1040 		break;
1041 	case THREAD_QOS_LEGACY:
1042 		call->tc_index = THREAD_CALL_INDEX_USER;
1043 		break;
1044 	case THREAD_QOS_MAINTENANCE:
1045 	case THREAD_QOS_BACKGROUND:
1046 		call->tc_index = THREAD_CALL_INDEX_LOW;
1047 		break;
1048 	case THREAD_QOS_UTILITY:
1049 		call->tc_index = THREAD_CALL_INDEX_QOS_UT;
1050 		break;
1051 	case THREAD_QOS_USER_INITIATED:
1052 		call->tc_index = THREAD_CALL_INDEX_QOS_IN;
1053 		break;
1054 	case THREAD_QOS_USER_INTERACTIVE:
1055 		call->tc_index = THREAD_CALL_INDEX_QOS_UI;
1056 		break;
1057 	default:
1058 		panic("Invalid thread call qos value: %d", qos_tier);
1059 		break;
1060 	}
1061 
1062 	if (options & THREAD_CALL_OPTIONS_ONCE) {
1063 		call->tc_flags |= THREAD_CALL_ONCE;
1064 	}
1065 
1066 	/* does not support THREAD_CALL_OPTIONS_SIGNAL */
1067 
1068 	return call;
1069 }
1070 
1071 
1072 /*
1073  *	thread_call_allocate:
1074  *
1075  *	Allocate a callout entry.
1076  */
1077 thread_call_t
thread_call_allocate(thread_call_func_t func,thread_call_param_t param0)1078 thread_call_allocate(
1079 	thread_call_func_t              func,
1080 	thread_call_param_t             param0)
1081 {
1082 	return thread_call_allocate_with_options(func, param0,
1083 	           THREAD_CALL_PRIORITY_HIGH, 0);
1084 }
1085 
1086 /*
1087  *	thread_call_free:
1088  *
1089  *	Release a callout.  If the callout is currently
1090  *	executing, it will be freed when all invocations
1091  *	finish.
1092  *
1093  *	If the callout is currently armed to fire again, then
1094  *	freeing is not allowed and returns FALSE.  The
1095  *	client must have canceled the pending invocation before freeing.
1096  */
1097 boolean_t
thread_call_free(thread_call_t call)1098 thread_call_free(
1099 	thread_call_t           call)
1100 {
1101 	thread_call_group_t group = thread_call_get_group(call);
1102 
1103 	spl_t s = disable_ints_and_lock(group);
1104 
1105 	if (call->tc_queue != NULL ||
1106 	    ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
1107 		thread_call_unlock(group);
1108 		splx(s);
1109 
1110 		return FALSE;
1111 	}
1112 
1113 	int32_t refs = --call->tc_refs;
1114 	if (refs < 0) {
1115 		panic("(%p %p) Refcount negative: %d", call, call->tc_func, refs);
1116 	}
1117 
1118 	if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
1119 	    == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
1120 		thread_call_wait_once_locked(call, s);
1121 		/* thread call lock has been unlocked */
1122 	} else {
1123 		enable_ints_and_unlock(group, s);
1124 	}
1125 
1126 	if (refs == 0) {
1127 		if (!(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1128 			panic("(%p %p) freeing an uninitialized call", call, call->tc_func);
1129 		}
1130 
1131 		if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
1132 			panic("(%p %p) Someone waiting on a thread call that is scheduled for free",
1133 			    call, call->tc_func);
1134 		}
1135 
1136 		if (call->tc_flags & THREAD_CALL_RUNNING) {
1137 			panic("(%p %p) freeing a running once call", call, call->tc_func);
1138 		}
1139 
1140 		if (call->tc_finish_count != call->tc_submit_count) {
1141 			panic("(%p %p) thread call submit/finish imbalance: %lld %lld",
1142 			    call, call->tc_func,
1143 			    call->tc_submit_count, call->tc_finish_count);
1144 		}
1145 
1146 		call->tc_flags &= ~THREAD_CALL_INITIALIZED;
1147 
1148 		zfree(thread_call_zone, call);
1149 	}
1150 
1151 	return TRUE;
1152 }
1153 
1154 /*
1155  *	thread_call_enter:
1156  *
1157  *	Enqueue a callout entry to occur "soon".
1158  *
1159  *	Returns TRUE if the call was
1160  *	already on a queue.
1161  */
1162 boolean_t
thread_call_enter(thread_call_t call)1163 thread_call_enter(
1164 	thread_call_t           call)
1165 {
1166 	return thread_call_enter1(call, 0);
1167 }
1168 
1169 boolean_t
thread_call_enter1(thread_call_t call,thread_call_param_t param1)1170 thread_call_enter1(
1171 	thread_call_t                   call,
1172 	thread_call_param_t             param1)
1173 {
1174 	if (call->tc_func == NULL || !(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1175 		panic("(%p %p) uninitialized thread call", call, call->tc_func);
1176 	}
1177 
1178 	assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
1179 
1180 	thread_call_group_t group = thread_call_get_group(call);
1181 	bool result = true;
1182 
1183 	spl_t s = disable_ints_and_lock(group);
1184 
1185 	if (call->tc_queue != &group->pending_queue) {
1186 		result = _pending_call_enqueue(call, group, mach_absolute_time());
1187 	}
1188 
1189 	call->tc_param1 = param1;
1190 
1191 	enable_ints_and_unlock(group, s);
1192 
1193 	return result;
1194 }
1195 
1196 /*
1197  *	thread_call_enter_delayed:
1198  *
1199  *	Enqueue a callout entry to occur
1200  *	at the stated time.
1201  *
1202  *	Returns TRUE if the call was
1203  *	already on a queue.
1204  */
1205 boolean_t
thread_call_enter_delayed(thread_call_t call,uint64_t deadline)1206 thread_call_enter_delayed(
1207 	thread_call_t           call,
1208 	uint64_t                deadline)
1209 {
1210 	if (call == NULL) {
1211 		panic("NULL call in %s", __FUNCTION__);
1212 	}
1213 	return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
1214 }
1215 
1216 boolean_t
thread_call_enter1_delayed(thread_call_t call,thread_call_param_t param1,uint64_t deadline)1217 thread_call_enter1_delayed(
1218 	thread_call_t                   call,
1219 	thread_call_param_t             param1,
1220 	uint64_t                        deadline)
1221 {
1222 	if (call == NULL) {
1223 		panic("NULL call in %s", __FUNCTION__);
1224 	}
1225 
1226 	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
1227 }
1228 
1229 boolean_t
thread_call_enter_delayed_with_leeway(thread_call_t call,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1230 thread_call_enter_delayed_with_leeway(
1231 	thread_call_t           call,
1232 	thread_call_param_t     param1,
1233 	uint64_t                deadline,
1234 	uint64_t                leeway,
1235 	unsigned int            flags)
1236 {
1237 	if (call == NULL) {
1238 		panic("NULL call in %s", __FUNCTION__);
1239 	}
1240 
1241 	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
1242 }
1243 
1244 
1245 /*
1246  * thread_call_enter_delayed_internal:
1247  * enqueue a callout entry to occur at the stated time
1248  *
1249  * Returns True if the call was already on a queue
1250  * params:
1251  * call     - structure encapsulating state of the callout
1252  * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1253  * deadline - time deadline in nanoseconds
1254  * leeway   - timer slack represented as delta of deadline.
1255  * flags    - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1256  *            THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1257  *            THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1258  *                                                                        than mach_absolute_time
1259  */
1260 boolean_t
thread_call_enter_delayed_internal(thread_call_t call,thread_call_func_t alt_func,thread_call_param_t alt_param0,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1261 thread_call_enter_delayed_internal(
1262 	thread_call_t           call,
1263 	thread_call_func_t      alt_func,
1264 	thread_call_param_t     alt_param0,
1265 	thread_call_param_t     param1,
1266 	uint64_t                deadline,
1267 	uint64_t                leeway,
1268 	unsigned int            flags)
1269 {
1270 	uint64_t                now, sdeadline;
1271 
1272 	thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1273 
1274 	/* direct mapping between thread_call, timer_call, and timeout_urgency values */
1275 	uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
1276 
1277 	if (call == NULL) {
1278 		/* allocate a structure out of internal storage, as a convenience for BSD callers */
1279 		call = _internal_call_allocate(alt_func, alt_param0);
1280 	}
1281 
1282 	thread_call_group_t group = thread_call_get_group(call);
1283 
1284 	spl_t s = disable_ints_and_lock(group);
1285 
1286 	/*
1287 	 * kevent and IOTES let you change flavor for an existing timer, so we have to
1288 	 * support flipping flavors for enqueued thread calls.
1289 	 */
1290 	if (flavor == TCF_CONTINUOUS) {
1291 		now = mach_continuous_time();
1292 	} else {
1293 		now = mach_absolute_time();
1294 	}
1295 
1296 	call->tc_flags |= THREAD_CALL_DELAYED;
1297 
1298 	call->tc_soft_deadline = sdeadline = deadline;
1299 
1300 	boolean_t ratelimited = FALSE;
1301 	uint64_t slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1302 
1303 	if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
1304 		slop = leeway;
1305 	}
1306 
1307 	if (UINT64_MAX - deadline <= slop) {
1308 		deadline = UINT64_MAX;
1309 	} else {
1310 		deadline += slop;
1311 	}
1312 
1313 	if (ratelimited) {
1314 		call->tc_flags |= THREAD_CALL_RATELIMITED;
1315 	} else {
1316 		call->tc_flags &= ~THREAD_CALL_RATELIMITED;
1317 	}
1318 
1319 	call->tc_param1 = param1;
1320 
1321 	call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1322 
1323 	bool result = _delayed_call_enqueue(call, group, deadline, flavor);
1324 
1325 	_arm_delayed_call_timer(call, group, flavor);
1326 
1327 #if CONFIG_DTRACE
1328 	DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
1329 	    uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1330 	    (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1331 #endif
1332 
1333 	enable_ints_and_unlock(group, s);
1334 
1335 	return result;
1336 }
1337 
1338 /*
1339  * Remove a callout entry from the queue
1340  * Called with thread_call_lock held
1341  */
1342 static bool
thread_call_cancel_locked(thread_call_t call)1343 thread_call_cancel_locked(thread_call_t call)
1344 {
1345 	bool canceled;
1346 
1347 	if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
1348 		call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1349 		canceled = true;
1350 
1351 		/* if reschedule was set, it must not have been queued */
1352 		assert(call->tc_queue == NULL);
1353 	} else {
1354 		bool queue_head_changed = false;
1355 
1356 		thread_call_flavor_t flavor = thread_call_get_flavor(call);
1357 		thread_call_group_t  group  = thread_call_get_group(call);
1358 
1359 		if (call->tc_pqlink.deadline != 0 &&
1360 		    call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
1361 			assert(call->tc_queue == &group->delayed_queues[flavor]);
1362 			queue_head_changed = true;
1363 		}
1364 
1365 		canceled = _call_dequeue(call, group);
1366 
1367 		if (queue_head_changed) {
1368 			if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
1369 				timer_call_cancel(&group->delayed_timers[flavor]);
1370 			}
1371 		}
1372 	}
1373 
1374 #if CONFIG_DTRACE
1375 	DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
1376 	    0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1377 #endif
1378 
1379 	return canceled;
1380 }
1381 
1382 /*
1383  *	thread_call_cancel:
1384  *
1385  *	Dequeue a callout entry.
1386  *
1387  *	Returns TRUE if the call was
1388  *	on a queue.
1389  */
1390 boolean_t
thread_call_cancel(thread_call_t call)1391 thread_call_cancel(thread_call_t call)
1392 {
1393 	thread_call_group_t group = thread_call_get_group(call);
1394 
1395 	spl_t s = disable_ints_and_lock(group);
1396 
1397 	boolean_t result = thread_call_cancel_locked(call);
1398 
1399 	enable_ints_and_unlock(group, s);
1400 
1401 	return result;
1402 }
1403 
1404 /*
1405  * Cancel a thread call.  If it cannot be cancelled (i.e.
1406  * is already in flight), waits for the most recent invocation
1407  * to finish.  Note that if clients re-submit this thread call,
1408  * it may still be pending or in flight when thread_call_cancel_wait
1409  * returns, but all requests to execute this work item prior
1410  * to the call to thread_call_cancel_wait will have finished.
1411  */
1412 boolean_t
thread_call_cancel_wait(thread_call_t call)1413 thread_call_cancel_wait(thread_call_t call)
1414 {
1415 	thread_call_group_t group = thread_call_get_group(call);
1416 
1417 	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1418 		panic("(%p %p) thread_call_cancel_wait: can't wait on thread call whose storage I don't own",
1419 		    call, call->tc_func);
1420 	}
1421 
1422 	if (!ml_get_interrupts_enabled()) {
1423 		panic("(%p %p) unsafe thread_call_cancel_wait",
1424 		    call, call->tc_func);
1425 	}
1426 
1427 	thread_t self = current_thread();
1428 
1429 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
1430 	    self->thc_state && self->thc_state->thc_call == call) {
1431 		panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1432 		    call, call->tc_func);
1433 	}
1434 
1435 	spl_t s = disable_ints_and_lock(group);
1436 
1437 	boolean_t canceled = thread_call_cancel_locked(call);
1438 
1439 	if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1440 		/*
1441 		 * A cancel-wait on a 'once' call will both cancel
1442 		 * the pending call and wait for the in-flight call
1443 		 */
1444 
1445 		thread_call_wait_once_locked(call, s);
1446 		/* thread call lock unlocked */
1447 	} else {
1448 		/*
1449 		 * A cancel-wait on a normal call will only wait for the in-flight calls
1450 		 * if it did not cancel the pending call.
1451 		 *
1452 		 * TODO: This seems less than useful - shouldn't it do the wait as well?
1453 		 */
1454 
1455 		if (canceled == FALSE) {
1456 			thread_call_wait_locked(call, s);
1457 			/* thread call lock unlocked */
1458 		} else {
1459 			enable_ints_and_unlock(group, s);
1460 		}
1461 	}
1462 
1463 	return canceled;
1464 }
1465 
1466 
1467 /*
1468  *	thread_call_wake:
1469  *
1470  *	Wake a call thread to service
1471  *	pending call entries.  May wake
1472  *	the daemon thread in order to
1473  *	create additional call threads.
1474  *
1475  *	Called with thread_call_lock held.
1476  *
1477  *	For high-priority group, only does wakeup/creation if there are no threads
1478  *	running.
1479  */
1480 static void
thread_call_wake(thread_call_group_t group)1481 thread_call_wake(
1482 	thread_call_group_t             group)
1483 {
1484 	/*
1485 	 * New behavior: use threads if you've got 'em.
1486 	 * Traditional behavior: wake only if no threads running.
1487 	 */
1488 	if (group_isparallel(group) || group->active_count == 0) {
1489 		if (group->idle_count) {
1490 			__assert_only kern_return_t kr;
1491 
1492 			kr = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
1493 			    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1494 			assert(kr == KERN_SUCCESS);
1495 
1496 			group->idle_count--;
1497 			group->active_count++;
1498 
1499 			if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1500 				if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1501 					group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
1502 				}
1503 			}
1504 		} else {
1505 			if (thread_call_group_should_add_thread(group) &&
1506 			    os_atomic_cmpxchg(&thread_call_daemon_awake,
1507 			    false, true, relaxed)) {
1508 				waitq_wakeup64_all(&daemon_waitq,
1509 				    CAST_EVENT64_T(&thread_call_daemon_awake),
1510 				    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1511 			}
1512 		}
1513 	}
1514 }
1515 
1516 /*
1517  *	sched_call_thread:
1518  *
1519  *	Call out invoked by the scheduler.
1520  */
1521 static void
sched_call_thread(int type,thread_t thread)1522 sched_call_thread(
1523 	int                             type,
1524 	thread_t                thread)
1525 {
1526 	thread_call_group_t             group;
1527 
1528 	assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
1529 	assert(thread->thc_state != NULL);
1530 
1531 	group = thread->thc_state->thc_group;
1532 	assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1533 	assert((group - &thread_call_groups[0]) > THREAD_CALL_INDEX_INVALID);
1534 
1535 	thread_call_lock_spin(group);
1536 
1537 	switch (type) {
1538 	case SCHED_CALL_BLOCK:
1539 		assert(group->active_count);
1540 		--group->active_count;
1541 		group->blocked_count++;
1542 		if (group->pending_count > 0) {
1543 			thread_call_wake(group);
1544 		}
1545 		break;
1546 
1547 	case SCHED_CALL_UNBLOCK:
1548 		assert(group->blocked_count);
1549 		--group->blocked_count;
1550 		group->active_count++;
1551 		break;
1552 	}
1553 
1554 	thread_call_unlock(group);
1555 }
1556 
1557 /*
1558  * Interrupts disabled, lock held; returns the same way.
1559  * Only called on thread calls whose storage we own.  Wakes up
1560  * anyone who might be waiting on this work item and frees it
1561  * if the client has so requested.
1562  */
1563 static bool
thread_call_finish(thread_call_t call,thread_call_group_t group,spl_t * s)1564 thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1565 {
1566 	thread_call_group_t call_group = thread_call_get_group(call);
1567 	if (group != call_group) {
1568 		panic("(%p %p) call finishing from wrong group: %p",
1569 		    call, call->tc_func, call_group);
1570 	}
1571 
1572 	bool repend = false;
1573 	bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
1574 	bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
1575 
1576 	call->tc_finish_count++;
1577 
1578 	if (!signal && alloc) {
1579 		/* The thread call thread owns a ref until the call is finished */
1580 		if (call->tc_refs <= 0) {
1581 			panic("(%p %p) thread_call_finish: detected over-released thread call",
1582 			    call, call->tc_func);
1583 		}
1584 		call->tc_refs--;
1585 	}
1586 
1587 	thread_call_flags_t old_flags = call->tc_flags;
1588 	call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1589 
1590 	if ((!alloc || call->tc_refs != 0) &&
1591 	    (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
1592 		assert(old_flags & THREAD_CALL_ONCE);
1593 		thread_call_flavor_t flavor = thread_call_get_flavor(call);
1594 
1595 		if (old_flags & THREAD_CALL_DELAYED) {
1596 			uint64_t now = mach_absolute_time();
1597 			if (flavor == TCF_CONTINUOUS) {
1598 				now = absolutetime_to_continuoustime(now);
1599 			}
1600 			if (call->tc_soft_deadline <= now) {
1601 				/* The deadline has already expired, go straight to pending */
1602 				call->tc_flags &= ~(THREAD_CALL_DELAYED | THREAD_CALL_RATELIMITED);
1603 				call->tc_pqlink.deadline = 0;
1604 			}
1605 		}
1606 
1607 		if (call->tc_pqlink.deadline) {
1608 			_delayed_call_enqueue(call, group, call->tc_pqlink.deadline, flavor);
1609 			if (!signal) {
1610 				_arm_delayed_call_timer(call, group, flavor);
1611 			}
1612 		} else if (signal) {
1613 			call->tc_submit_count++;
1614 			repend = true;
1615 		} else {
1616 			_pending_call_enqueue(call, group, mach_absolute_time());
1617 		}
1618 	}
1619 
1620 	if (!signal && alloc && call->tc_refs == 0) {
1621 		if ((old_flags & THREAD_CALL_WAIT) != 0) {
1622 			panic("(%p %p) Someone waiting on a thread call that is scheduled for free",
1623 			    call, call->tc_func);
1624 		}
1625 
1626 		if (call->tc_finish_count != call->tc_submit_count) {
1627 			panic("(%p %p) thread call submit/finish imbalance: %lld %lld",
1628 			    call, call->tc_func,
1629 			    call->tc_submit_count, call->tc_finish_count);
1630 		}
1631 
1632 		if (call->tc_func == NULL || !(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1633 			panic("(%p %p) uninitialized thread call", call, call->tc_func);
1634 		}
1635 
1636 		call->tc_flags &= ~THREAD_CALL_INITIALIZED;
1637 
1638 		enable_ints_and_unlock(group, *s);
1639 
1640 		zfree(thread_call_zone, call);
1641 
1642 		*s = disable_ints_and_lock(group);
1643 	}
1644 
1645 	if ((old_flags & THREAD_CALL_WAIT) != 0) {
1646 		/*
1647 		 * This may wake up a thread with a registered sched_call.
1648 		 * That call might need the group lock, so we drop the lock
1649 		 * to avoid deadlocking.
1650 		 *
1651 		 * We also must use a separate waitq from the idle waitq, as
1652 		 * this path goes waitq lock->thread lock->group lock, but
1653 		 * the idle wait goes group lock->waitq_lock->thread_lock.
1654 		 */
1655 		thread_call_unlock(group);
1656 
1657 		waitq_wakeup64_all(&group->waiters_waitq, CAST_EVENT64_T(call),
1658 		    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1659 
1660 		thread_call_lock_spin(group);
1661 		/* THREAD_CALL_SIGNAL call may have been freed */
1662 	}
1663 
1664 	return repend;
1665 }
1666 
1667 /*
1668  * thread_call_invoke
1669  *
1670  * Invoke the function provided for this thread call
1671  *
1672  * Note that the thread call object can be deallocated by the function if we do not control its storage.
1673  */
1674 static void __attribute__((noinline))
thread_call_invoke(thread_call_func_t func,thread_call_param_t param0,thread_call_param_t param1,__unused thread_call_t call)1675 thread_call_invoke(thread_call_func_t func,
1676     thread_call_param_t param0,
1677     thread_call_param_t param1,
1678     __unused thread_call_t call)
1679 {
1680 #if DEVELOPMENT || DEBUG
1681 	KERNEL_DEBUG_CONSTANT(
1682 		MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_START,
1683 		VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1684 #endif /* DEVELOPMENT || DEBUG */
1685 
1686 #if CONFIG_DTRACE
1687 	uint64_t tc_ttd = call->tc_ttd;
1688 	boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1689 	DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1690 	    (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1691 #endif
1692 
1693 	(*func)(param0, param1);
1694 
1695 #if CONFIG_DTRACE
1696 	DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1697 	    (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1698 #endif
1699 
1700 #if DEVELOPMENT || DEBUG
1701 	KERNEL_DEBUG_CONSTANT(
1702 		MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_END,
1703 		VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1704 #endif /* DEVELOPMENT || DEBUG */
1705 }
1706 
1707 /*
1708  *	thread_call_thread:
1709  */
1710 static void
thread_call_thread(thread_call_group_t group,wait_result_t wres)1711 thread_call_thread(
1712 	thread_call_group_t             group,
1713 	wait_result_t                   wres)
1714 {
1715 	thread_t self = current_thread();
1716 
1717 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1718 		(void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1719 	}
1720 
1721 	/*
1722 	 * A wakeup with THREAD_INTERRUPTED indicates that
1723 	 * we should terminate.
1724 	 */
1725 	if (wres == THREAD_INTERRUPTED) {
1726 		thread_terminate(self);
1727 
1728 		/* NOTREACHED */
1729 		panic("thread_terminate() returned?");
1730 	}
1731 
1732 	spl_t s = disable_ints_and_lock(group);
1733 
1734 	struct thread_call_thread_state thc_state = { .thc_group = group };
1735 	self->thc_state = &thc_state;
1736 
1737 	thread_sched_call(self, sched_call_thread);
1738 
1739 	while (group->pending_count > 0) {
1740 		thread_call_t call = qe_dequeue_head(&group->pending_queue,
1741 		    struct thread_call, tc_qlink);
1742 		assert(call != NULL);
1743 
1744 		/*
1745 		 * This thread_call_get_group is also here to validate
1746 		 * sanity of the thing popped off the queue
1747 		 */
1748 		thread_call_group_t call_group = thread_call_get_group(call);
1749 		if (group != call_group) {
1750 			panic("(%p %p) call on pending_queue from wrong group %p",
1751 			    call, call->tc_func, call_group);
1752 		}
1753 
1754 		group->pending_count--;
1755 		if (group->pending_count == 0) {
1756 			assert(queue_empty(&group->pending_queue));
1757 		}
1758 
1759 		thread_call_func_t  func   = call->tc_func;
1760 		thread_call_param_t param0 = call->tc_param0;
1761 		thread_call_param_t param1 = call->tc_param1;
1762 
1763 		if (func == NULL) {
1764 			panic("pending call with NULL func: %p", call);
1765 		}
1766 
1767 		call->tc_queue = NULL;
1768 
1769 		if (_is_internal_call(call)) {
1770 			_internal_call_release(call);
1771 		}
1772 
1773 		/*
1774 		 * Can only do wakeups for thread calls whose storage
1775 		 * we control.
1776 		 */
1777 		bool needs_finish = false;
1778 		if (call->tc_flags & THREAD_CALL_ALLOC) {
1779 			call->tc_refs++;        /* Delay free until we're done */
1780 		}
1781 		if (call->tc_flags & (THREAD_CALL_ALLOC | THREAD_CALL_ONCE)) {
1782 			/*
1783 			 * If THREAD_CALL_ONCE is used, and the timer wasn't
1784 			 * THREAD_CALL_ALLOC, then clients swear they will use
1785 			 * thread_call_cancel_wait() before destroying
1786 			 * the thread call.
1787 			 *
1788 			 * Else, the storage for the thread call might have
1789 			 * disappeared when thread_call_invoke() ran.
1790 			 */
1791 			needs_finish = true;
1792 			call->tc_flags |= THREAD_CALL_RUNNING;
1793 		}
1794 
1795 		thc_state.thc_call = call;
1796 		thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
1797 		thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
1798 		thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
1799 		thc_state.thc_func = func;
1800 		thc_state.thc_param0 = param0;
1801 		thc_state.thc_param1 = param1;
1802 		thc_state.thc_IOTES_invocation_timestamp = 0;
1803 
1804 		enable_ints_and_unlock(group, s);
1805 
1806 		thc_state.thc_call_start = mach_absolute_time();
1807 
1808 		thread_call_invoke(func, param0, param1, call);
1809 
1810 		thc_state.thc_call = NULL;
1811 
1812 		if (get_preemption_level() != 0) {
1813 			int pl = get_preemption_level();
1814 			panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1815 			    pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1816 		}
1817 
1818 		s = disable_ints_and_lock(group);
1819 
1820 		if (needs_finish) {
1821 			/* Release refcount, may free, may temporarily drop lock */
1822 			thread_call_finish(call, group, &s);
1823 		}
1824 	}
1825 
1826 	thread_sched_call(self, NULL);
1827 	group->active_count--;
1828 
1829 	if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1830 		ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1831 		if (self->callout_woken_from_platform_idle) {
1832 			ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1833 		}
1834 	}
1835 
1836 	self->callout_woken_from_icontext = FALSE;
1837 	self->callout_woken_from_platform_idle = FALSE;
1838 	self->callout_woke_thread = FALSE;
1839 
1840 	self->thc_state = NULL;
1841 
1842 	if (group_isparallel(group)) {
1843 		/*
1844 		 * For new style of thread group, thread always blocks.
1845 		 * If we have more than the target number of threads,
1846 		 * and this is the first to block, and it isn't active
1847 		 * already, set a timer for deallocating a thread if we
1848 		 * continue to have a surplus.
1849 		 */
1850 		group->idle_count++;
1851 
1852 		if (group->idle_count == 1) {
1853 			group->idle_timestamp = mach_absolute_time();
1854 		}
1855 
1856 		if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
1857 		    ((group->active_count + group->idle_count) > group->target_thread_count)) {
1858 			thread_call_start_deallocate_timer(group);
1859 		}
1860 
1861 		/* Wait for more work (or termination) */
1862 		wres = waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, 0);
1863 		if (wres != THREAD_WAITING) {
1864 			panic("kcall worker unable to assert wait %d", wres);
1865 		}
1866 
1867 		enable_ints_and_unlock(group, s);
1868 
1869 		thread_block_parameter((thread_continue_t)thread_call_thread, group);
1870 	} else {
1871 		if (group->idle_count < group->target_thread_count) {
1872 			group->idle_count++;
1873 
1874 			waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, 0); /* Interrupted means to exit */
1875 
1876 			enable_ints_and_unlock(group, s);
1877 
1878 			thread_block_parameter((thread_continue_t)thread_call_thread, group);
1879 			/* NOTREACHED */
1880 		}
1881 	}
1882 
1883 	enable_ints_and_unlock(group, s);
1884 
1885 	thread_terminate(self);
1886 	/* NOTREACHED */
1887 }
1888 
1889 void
thread_call_start_iotes_invocation(__assert_only thread_call_t call)1890 thread_call_start_iotes_invocation(__assert_only thread_call_t call)
1891 {
1892 	thread_t self = current_thread();
1893 
1894 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1895 		/* not a thread call thread, might be a workloop IOTES */
1896 		return;
1897 	}
1898 
1899 	assert(self->thc_state);
1900 	assert(self->thc_state->thc_call == call);
1901 
1902 	self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
1903 }
1904 
1905 
1906 /*
1907  *	thread_call_daemon: walk list of groups, allocating
1908  *	threads if appropriate (as determined by
1909  *	thread_call_group_should_add_thread()).
1910  */
1911 static void
thread_call_daemon_continue(__unused void * arg,__unused wait_result_t w)1912 thread_call_daemon_continue(__unused void *arg,
1913     __unused wait_result_t w)
1914 {
1915 	do {
1916 		os_atomic_store(&thread_call_daemon_awake, false, relaxed);
1917 
1918 		for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
1919 			thread_call_group_t group = &thread_call_groups[i];
1920 
1921 			spl_t s = disable_ints_and_lock(group);
1922 
1923 			while (thread_call_group_should_add_thread(group)) {
1924 				group->active_count++;
1925 
1926 				enable_ints_and_unlock(group, s);
1927 
1928 				thread_call_thread_create(group);
1929 
1930 				s = disable_ints_and_lock(group);
1931 			}
1932 
1933 			enable_ints_and_unlock(group, s);
1934 		}
1935 	} while (os_atomic_load(&thread_call_daemon_awake, relaxed));
1936 
1937 	waitq_assert_wait64(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, 0);
1938 
1939 	if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
1940 		clear_wait(current_thread(), THREAD_AWAKENED);
1941 	}
1942 
1943 	thread_block_parameter(thread_call_daemon_continue, NULL);
1944 	/* NOTREACHED */
1945 }
1946 
1947 static void
thread_call_daemon(__unused void * arg,__unused wait_result_t w)1948 thread_call_daemon(
1949 	__unused void    *arg,
1950 	__unused wait_result_t w)
1951 {
1952 	thread_t        self = current_thread();
1953 
1954 	self->options |= TH_OPT_VMPRIV;
1955 	vm_page_free_reserve(2);        /* XXX */
1956 
1957 	thread_set_thread_name(self, "thread_call_daemon");
1958 
1959 	thread_call_daemon_continue(NULL, 0);
1960 	/* NOTREACHED */
1961 }
1962 
1963 /*
1964  * Schedule timer to deallocate a worker thread if we have a surplus
1965  * of threads (in excess of the group's target) and at least one thread
1966  * is idle the whole time.
1967  */
1968 static void
thread_call_start_deallocate_timer(thread_call_group_t group)1969 thread_call_start_deallocate_timer(thread_call_group_t group)
1970 {
1971 	__assert_only bool already_enqueued;
1972 
1973 	assert(group->idle_count > 0);
1974 	assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
1975 
1976 	group->tcg_flags |= TCG_DEALLOC_ACTIVE;
1977 
1978 	uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1979 
1980 	already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
1981 
1982 	assert(already_enqueued == false);
1983 }
1984 
1985 static inline uint64_t
thread_call_get_time(thread_call_flavor_t flavor)1986 thread_call_get_time(thread_call_flavor_t flavor)
1987 {
1988 	if (flavor == TCF_CONTINUOUS) {
1989 		return mach_continuous_time();
1990 	} else if (flavor == TCF_ABSOLUTE) {
1991 		return mach_absolute_time();
1992 	} else {
1993 		panic("invalid timer flavor: %d", flavor);
1994 	}
1995 }
1996 
1997 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1998 void
thread_call_delayed_timer(timer_call_param_t p0,timer_call_param_t p1)1999 thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
2000 {
2001 	thread_call_group_t  group  = (thread_call_group_t)  p0;
2002 	thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
2003 
2004 	assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
2005 	assert((group - &thread_call_groups[0]) > THREAD_CALL_INDEX_INVALID);
2006 
2007 	thread_call_t   call;
2008 	uint64_t        now;
2009 	extern uint64_t timer_scan_limit_abs;
2010 
2011 	thread_call_lock_spin(group);
2012 
2013 	now = thread_call_get_time(flavor);
2014 	while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
2015 	    struct thread_call, tc_pqlink)) != NULL) {
2016 		assert(thread_call_get_group(call) == group);
2017 		assert(thread_call_get_flavor(call) == flavor);
2018 
2019 		/*
2020 		 * if we hit a call that isn't yet ready to expire,
2021 		 * then we're done for now
2022 		 * TODO: The next timer in the list could have a larger leeway
2023 		 *       and therefore be ready to expire.
2024 		 */
2025 		if (call->tc_soft_deadline > now) {
2026 			break;
2027 		}
2028 
2029 		/*
2030 		 * Don't do too much work in one timer interrupt.
2031 		 */
2032 		if (thread_call_get_time(flavor) > (now + timer_scan_limit_abs)) {
2033 			break;
2034 		}
2035 
2036 		/*
2037 		 * If we hit a rate-limited timer, don't eagerly wake it up.
2038 		 * Wait until it reaches the end of the leeway window.
2039 		 *
2040 		 * TODO: What if the next timer is not rate-limited?
2041 		 *       Have a separate rate-limited queue to avoid this
2042 		 */
2043 		if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
2044 		    (call->tc_pqlink.deadline > now) &&
2045 		    (ml_timer_forced_evaluation() == FALSE)) {
2046 			break;
2047 		}
2048 
2049 		if (THREAD_CALL_SIGNAL & call->tc_flags) {
2050 			__assert_only queue_head_t *old_queue;
2051 			old_queue = thread_call_dequeue(call);
2052 			assert(old_queue == &group->delayed_queues[flavor]);
2053 
2054 			do {
2055 				thread_call_func_t  func   = call->tc_func;
2056 				thread_call_param_t param0 = call->tc_param0;
2057 				thread_call_param_t param1 = call->tc_param1;
2058 
2059 				call->tc_flags |= THREAD_CALL_RUNNING;
2060 
2061 				thread_call_unlock(group);
2062 				thread_call_invoke(func, param0, param1, call);
2063 				thread_call_lock_spin(group);
2064 
2065 				/* finish may detect that the call has been re-pended */
2066 			} while (thread_call_finish(call, group, NULL));
2067 			/* call may have been freed by the finish */
2068 		} else {
2069 			_pending_call_enqueue(call, group, now);
2070 		}
2071 	}
2072 
2073 	_arm_delayed_call_timer(call, group, flavor);
2074 
2075 	thread_call_unlock(group);
2076 }
2077 
2078 static void
thread_call_delayed_timer_rescan(thread_call_group_t group,thread_call_flavor_t flavor)2079 thread_call_delayed_timer_rescan(thread_call_group_t group,
2080     thread_call_flavor_t flavor)
2081 {
2082 	thread_call_t call;
2083 	uint64_t now;
2084 
2085 	spl_t s = disable_ints_and_lock(group);
2086 
2087 	assert(ml_timer_forced_evaluation() == TRUE);
2088 
2089 	if (flavor == TCF_CONTINUOUS) {
2090 		now = mach_continuous_time();
2091 	} else {
2092 		now = mach_absolute_time();
2093 	}
2094 
2095 	qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
2096 		if (call->tc_soft_deadline <= now) {
2097 			_pending_call_enqueue(call, group, now);
2098 		} else {
2099 			uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
2100 			assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
2101 			/*
2102 			 * On a latency quality-of-service level change,
2103 			 * re-sort potentially rate-limited callout. The platform
2104 			 * layer determines which timers require this.
2105 			 *
2106 			 * This trick works by updating the deadline value to
2107 			 * equal soft-deadline, effectively crushing away
2108 			 * timer coalescing slop values for any armed
2109 			 * timer in the queue.
2110 			 *
2111 			 * TODO: keep a hint on the timer to tell whether its inputs changed, so we
2112 			 * only have to crush coalescing for timers that need it.
2113 			 *
2114 			 * TODO: Keep a separate queue of timers above the re-sort
2115 			 * threshold, so we only have to look at those.
2116 			 */
2117 			if (timer_resort_threshold(skew)) {
2118 				_call_dequeue(call, group);
2119 				_delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
2120 			}
2121 		}
2122 	}
2123 
2124 	_arm_delayed_call_timer(NULL, group, flavor);
2125 
2126 	enable_ints_and_unlock(group, s);
2127 }
2128 
2129 void
thread_call_delayed_timer_rescan_all(void)2130 thread_call_delayed_timer_rescan_all(void)
2131 {
2132 	for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
2133 		for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
2134 			thread_call_delayed_timer_rescan(&thread_call_groups[i], flavor);
2135 		}
2136 	}
2137 }
2138 
2139 /*
2140  * Timer callback to tell a thread to terminate if
2141  * we have an excess of threads and at least one has been
2142  * idle for a long time.
2143  */
2144 static void
thread_call_dealloc_timer(timer_call_param_t p0,__unused timer_call_param_t p1)2145 thread_call_dealloc_timer(
2146 	timer_call_param_t              p0,
2147 	__unused timer_call_param_t     p1)
2148 {
2149 	thread_call_group_t group = (thread_call_group_t)p0;
2150 	uint64_t now;
2151 	kern_return_t res;
2152 	bool terminated = false;
2153 
2154 	thread_call_lock_spin(group);
2155 
2156 	assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
2157 
2158 	now = mach_absolute_time();
2159 
2160 	if (group->idle_count > 0) {
2161 		if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
2162 			terminated = true;
2163 			group->idle_count--;
2164 			res = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
2165 			    THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT);
2166 			if (res != KERN_SUCCESS) {
2167 				panic("Unable to wake up idle thread for termination (%d)", res);
2168 			}
2169 		}
2170 	}
2171 
2172 	group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
2173 
2174 	/*
2175 	 * If we still have an excess of threads, schedule another
2176 	 * invocation of this function.
2177 	 */
2178 	if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
2179 		/*
2180 		 * If we killed someone just now, push out the
2181 		 * next deadline.
2182 		 */
2183 		if (terminated) {
2184 			group->idle_timestamp = now;
2185 		}
2186 
2187 		thread_call_start_deallocate_timer(group);
2188 	}
2189 
2190 	thread_call_unlock(group);
2191 }
2192 
2193 /*
2194  * Wait for the invocation of the thread call to complete
2195  * We know there's only one in flight because of the 'once' flag.
2196  *
2197  * If a subsequent invocation comes in before we wake up, that's OK
2198  *
2199  * TODO: Here is where we will add priority inheritance to the thread executing
2200  * the thread call in case it's lower priority than the current thread
2201  *      <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2202  *
2203  * Takes the thread call lock locked, returns unlocked
2204  *      This lets us avoid a spurious take/drop after waking up from thread_block
2205  *
2206  * This thread could be a thread call thread itself, blocking and therefore making a
2207  * sched_call upcall into the thread call subsystem, needing the group lock.
2208  * However, we're saved from deadlock because the 'block' upcall is made in
2209  * thread_block, not in assert_wait.
2210  */
2211 static bool
thread_call_wait_once_locked(thread_call_t call,spl_t s)2212 thread_call_wait_once_locked(thread_call_t call, spl_t s)
2213 {
2214 	assert(call->tc_flags & THREAD_CALL_ALLOC);
2215 	assert(call->tc_flags & THREAD_CALL_ONCE);
2216 
2217 	thread_call_group_t group = thread_call_get_group(call);
2218 
2219 	if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
2220 		enable_ints_and_unlock(group, s);
2221 		return false;
2222 	}
2223 
2224 	/* call is running, so we have to wait for it */
2225 	call->tc_flags |= THREAD_CALL_WAIT;
2226 
2227 	wait_result_t res = waitq_assert_wait64(&group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, 0);
2228 	if (res != THREAD_WAITING) {
2229 		panic("Unable to assert wait: %d", res);
2230 	}
2231 
2232 	enable_ints_and_unlock(group, s);
2233 
2234 	res = thread_block(THREAD_CONTINUE_NULL);
2235 	if (res != THREAD_AWAKENED) {
2236 		panic("Awoken with %d?", res);
2237 	}
2238 
2239 	/* returns unlocked */
2240 	return true;
2241 }
2242 
2243 /*
2244  * Wait for an in-flight invocation to complete
2245  * Does NOT try to cancel, so the client doesn't need to hold their
2246  * lock while calling this function.
2247  *
2248  * Returns whether or not it had to wait.
2249  *
2250  * Only works for THREAD_CALL_ONCE calls.
2251  */
2252 boolean_t
thread_call_wait_once(thread_call_t call)2253 thread_call_wait_once(thread_call_t call)
2254 {
2255 	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
2256 		panic("(%p %p) thread_call_wait_once: can't wait on thread call whose storage I don't own",
2257 		    call, call->tc_func);
2258 	}
2259 
2260 	if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
2261 		panic("(%p %p) thread_call_wait_once: can't wait_once on a non-once call",
2262 		    call, call->tc_func);
2263 	}
2264 
2265 	if (!ml_get_interrupts_enabled()) {
2266 		panic("(%p %p) unsafe thread_call_wait_once",
2267 		    call, call->tc_func);
2268 	}
2269 
2270 	thread_t self = current_thread();
2271 
2272 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
2273 	    self->thc_state && self->thc_state->thc_call == call) {
2274 		panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2275 		    call, call->tc_func);
2276 	}
2277 
2278 	thread_call_group_t group = thread_call_get_group(call);
2279 
2280 	spl_t s = disable_ints_and_lock(group);
2281 
2282 	bool waited = thread_call_wait_once_locked(call, s);
2283 	/* thread call lock unlocked */
2284 
2285 	return waited;
2286 }
2287 
2288 
2289 /*
2290  * Wait for all requested invocations of a thread call prior to now
2291  * to finish.  Can only be invoked on thread calls whose storage we manage.
2292  * Just waits for the finish count to catch up to the submit count we find
2293  * at the beginning of our wait.
2294  *
2295  * Called with thread_call_lock held.  Returns with lock released.
2296  */
2297 static void
thread_call_wait_locked(thread_call_t call,spl_t s)2298 thread_call_wait_locked(thread_call_t call, spl_t s)
2299 {
2300 	thread_call_group_t group = thread_call_get_group(call);
2301 
2302 	assert(call->tc_flags & THREAD_CALL_ALLOC);
2303 
2304 	uint64_t submit_count = call->tc_submit_count;
2305 
2306 	while (call->tc_finish_count < submit_count) {
2307 		call->tc_flags |= THREAD_CALL_WAIT;
2308 
2309 		wait_result_t res = waitq_assert_wait64(&group->waiters_waitq,
2310 		    CAST_EVENT64_T(call), THREAD_UNINT, 0);
2311 
2312 		if (res != THREAD_WAITING) {
2313 			panic("Unable to assert wait: %d", res);
2314 		}
2315 
2316 		enable_ints_and_unlock(group, s);
2317 
2318 		res = thread_block(THREAD_CONTINUE_NULL);
2319 		if (res != THREAD_AWAKENED) {
2320 			panic("Awoken with %d?", res);
2321 		}
2322 
2323 		s = disable_ints_and_lock(group);
2324 	}
2325 
2326 	enable_ints_and_unlock(group, s);
2327 }
2328 
2329 /*
2330  * Determine whether a thread call is either on a queue or
2331  * currently being executed.
2332  */
2333 boolean_t
thread_call_isactive(thread_call_t call)2334 thread_call_isactive(thread_call_t call)
2335 {
2336 	thread_call_group_t group = thread_call_get_group(call);
2337 
2338 	spl_t s = disable_ints_and_lock(group);
2339 	boolean_t active = (call->tc_submit_count > call->tc_finish_count);
2340 	enable_ints_and_unlock(group, s);
2341 
2342 	return active;
2343 }
2344 
2345 /*
2346  * adjust_cont_time_thread_calls
2347  * on wake, reenqueue delayed call timer for continuous time thread call groups
2348  */
2349 void
adjust_cont_time_thread_calls(void)2350 adjust_cont_time_thread_calls(void)
2351 {
2352 	for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
2353 		thread_call_group_t group = &thread_call_groups[i];
2354 		spl_t s = disable_ints_and_lock(group);
2355 
2356 		/* only the continuous timers need to be re-armed */
2357 
2358 		_arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
2359 		enable_ints_and_unlock(group, s);
2360 	}
2361 }
2362