xref: /xnu-8792.61.2/osfmk/kern/thread_call.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/zalloc.h>
34 #include <kern/sched_prim.h>
35 #include <kern/clock.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/waitq.h>
39 #include <kern/ledger.h>
40 #include <kern/policy_internal.h>
41 
42 #include <vm/vm_pageout.h>
43 
44 #include <kern/thread_call.h>
45 #include <kern/timer_call.h>
46 
47 #include <libkern/OSAtomic.h>
48 #include <kern/timer_queue.h>
49 
50 #include <sys/kdebug.h>
51 #if CONFIG_DTRACE
52 #include <mach/sdt.h>
53 #endif
54 #include <machine/machine_routines.h>
55 
56 static KALLOC_TYPE_DEFINE(thread_call_zone, thread_call_data_t, KT_PRIV_ACCT);
57 
58 typedef enum {
59 	TCF_ABSOLUTE    = 0,
60 	TCF_CONTINUOUS  = 1,
61 	TCF_COUNT       = 2,
62 } thread_call_flavor_t;
63 
64 __options_decl(thread_call_group_flags_t, uint32_t, {
65 	TCG_NONE                = 0x0,
66 	TCG_PARALLEL            = 0x1,
67 	TCG_DEALLOC_ACTIVE      = 0x2,
68 });
69 
70 static struct thread_call_group {
71 	__attribute__((aligned(128))) lck_ticket_t tcg_lock;
72 
73 	const char *            tcg_name;
74 
75 	queue_head_t            pending_queue;
76 	uint32_t                pending_count;
77 
78 	queue_head_t            delayed_queues[TCF_COUNT];
79 	struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
80 	timer_call_data_t       delayed_timers[TCF_COUNT];
81 
82 	timer_call_data_t       dealloc_timer;
83 
84 	struct waitq            idle_waitq;
85 	uint64_t                idle_timestamp;
86 	uint32_t                idle_count, active_count, blocked_count;
87 
88 	uint32_t                tcg_thread_pri;
89 	uint32_t                target_thread_count;
90 
91 	thread_call_group_flags_t tcg_flags;
92 
93 	struct waitq            waiters_waitq;
94 } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
95 	[THREAD_CALL_INDEX_INVALID] = {
96 		.tcg_name               = "invalid",
97 	},
98 	[THREAD_CALL_INDEX_HIGH] = {
99 		.tcg_name               = "high",
100 		.tcg_thread_pri         = BASEPRI_PREEMPT_HIGH,
101 		.target_thread_count    = 4,
102 		.tcg_flags              = TCG_NONE,
103 	},
104 	[THREAD_CALL_INDEX_KERNEL] = {
105 		.tcg_name               = "kernel",
106 		.tcg_thread_pri         = BASEPRI_KERNEL,
107 		.target_thread_count    = 1,
108 		.tcg_flags              = TCG_PARALLEL,
109 	},
110 	[THREAD_CALL_INDEX_USER] = {
111 		.tcg_name               = "user",
112 		.tcg_thread_pri         = BASEPRI_DEFAULT,
113 		.target_thread_count    = 1,
114 		.tcg_flags              = TCG_PARALLEL,
115 	},
116 	[THREAD_CALL_INDEX_LOW] = {
117 		.tcg_name               = "low",
118 		.tcg_thread_pri         = MAXPRI_THROTTLE,
119 		.target_thread_count    = 1,
120 		.tcg_flags              = TCG_PARALLEL,
121 	},
122 	[THREAD_CALL_INDEX_KERNEL_HIGH] = {
123 		.tcg_name               = "kernel-high",
124 		.tcg_thread_pri         = BASEPRI_PREEMPT,
125 		.target_thread_count    = 2,
126 		.tcg_flags              = TCG_NONE,
127 	},
128 	[THREAD_CALL_INDEX_QOS_UI] = {
129 		.tcg_name               = "qos-ui",
130 		.tcg_thread_pri         = BASEPRI_FOREGROUND,
131 		.target_thread_count    = 1,
132 		.tcg_flags              = TCG_NONE,
133 	},
134 	[THREAD_CALL_INDEX_QOS_IN] = {
135 		.tcg_name               = "qos-in",
136 		.tcg_thread_pri         = BASEPRI_USER_INITIATED,
137 		.target_thread_count    = 1,
138 		.tcg_flags              = TCG_NONE,
139 	},
140 	[THREAD_CALL_INDEX_QOS_UT] = {
141 		.tcg_name               = "qos-ut",
142 		.tcg_thread_pri         = BASEPRI_UTILITY,
143 		.target_thread_count    = 1,
144 		.tcg_flags              = TCG_NONE,
145 	},
146 };
147 
148 typedef struct thread_call_group        *thread_call_group_t;
149 
150 #define INTERNAL_CALL_COUNT             768
151 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
152 #define THREAD_CALL_ADD_RATIO           4
153 #define THREAD_CALL_MACH_FACTOR_CAP     3
154 #define THREAD_CALL_GROUP_MAX_THREADS   500
155 
156 struct thread_call_thread_state {
157 	struct thread_call_group * thc_group;
158 	struct thread_call *       thc_call;    /* debug only, may be deallocated */
159 	uint64_t thc_call_start;
160 	uint64_t thc_call_soft_deadline;
161 	uint64_t thc_call_hard_deadline;
162 	uint64_t thc_call_pending_timestamp;
163 	uint64_t thc_IOTES_invocation_timestamp;
164 	thread_call_func_t  thc_func;
165 	thread_call_param_t thc_param0;
166 	thread_call_param_t thc_param1;
167 };
168 
169 static bool                     thread_call_daemon_awake = true;
170 /*
171  * This special waitq exists because the daemon thread
172  * might need to be woken while already holding a global waitq locked.
173  */
174 static struct waitq             daemon_waitq;
175 
176 static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
177 static queue_head_t             thread_call_internal_queue;
178 int                                             thread_call_internal_queue_count = 0;
179 static uint64_t                 thread_call_dealloc_interval_abs;
180 
181 static void                     _internal_call_init(void);
182 
183 static thread_call_t            _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
184 static bool                     _is_internal_call(thread_call_t call);
185 static void                     _internal_call_release(thread_call_t call);
186 static bool                     _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
187 static bool                     _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
188     uint64_t deadline, thread_call_flavor_t flavor);
189 static bool                     _call_dequeue(thread_call_t call, thread_call_group_t group);
190 static void                     thread_call_wake(thread_call_group_t group);
191 static void                     thread_call_daemon(void *arg, wait_result_t w);
192 static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
193 static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
194 static void                     thread_call_group_setup(thread_call_group_t group);
195 static void                     sched_call_thread(int type, thread_t thread);
196 static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
197 static void                     thread_call_wait_locked(thread_call_t call, spl_t s);
198 static bool                     thread_call_wait_once_locked(thread_call_t call, spl_t s);
199 
200 static boolean_t                thread_call_enter_delayed_internal(thread_call_t call,
201     thread_call_func_t alt_func, thread_call_param_t alt_param0,
202     thread_call_param_t param1, uint64_t deadline,
203     uint64_t leeway, unsigned int flags);
204 
205 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
206 extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
207 
208 LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
209 
210 
211 static void
thread_call_lock_spin(thread_call_group_t group)212 thread_call_lock_spin(thread_call_group_t group)
213 {
214 	lck_ticket_lock(&group->tcg_lock, &thread_call_lck_grp);
215 }
216 
217 static void
thread_call_unlock(thread_call_group_t group)218 thread_call_unlock(thread_call_group_t group)
219 {
220 	lck_ticket_unlock(&group->tcg_lock);
221 }
222 
223 static void __assert_only
thread_call_assert_locked(thread_call_group_t group)224 thread_call_assert_locked(thread_call_group_t group)
225 {
226 	lck_ticket_assert_owned(&group->tcg_lock);
227 }
228 
229 
230 static spl_t
disable_ints_and_lock(thread_call_group_t group)231 disable_ints_and_lock(thread_call_group_t group)
232 {
233 	spl_t s = splsched();
234 	thread_call_lock_spin(group);
235 
236 	return s;
237 }
238 
239 static void
enable_ints_and_unlock(thread_call_group_t group,spl_t s)240 enable_ints_and_unlock(thread_call_group_t group, spl_t s)
241 {
242 	thread_call_unlock(group);
243 	splx(s);
244 }
245 
246 static thread_call_group_t
thread_call_get_group(thread_call_t call)247 thread_call_get_group(thread_call_t call)
248 {
249 	thread_call_index_t index = call->tc_index;
250 	thread_call_flags_t flags = call->tc_flags;
251 	thread_call_func_t  func  = call->tc_func;
252 
253 	if (index == THREAD_CALL_INDEX_INVALID || index >= THREAD_CALL_INDEX_MAX) {
254 		panic("(%p %p) invalid thread call index: %d", call, func, index);
255 	}
256 
257 	if (func == NULL || !(flags & THREAD_CALL_INITIALIZED)) {
258 		panic("(%p %p) uninitialized thread call", call, func);
259 	}
260 
261 	if (flags & THREAD_CALL_ALLOC) {
262 		kalloc_type_require(thread_call_data_t, call);
263 	}
264 
265 	return &thread_call_groups[index];
266 }
267 
268 /* Lock held */
269 static thread_call_flavor_t
thread_call_get_flavor(thread_call_t call)270 thread_call_get_flavor(thread_call_t call)
271 {
272 	return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
273 }
274 
275 /* Lock held */
276 static thread_call_flavor_t
thread_call_set_flavor(thread_call_t call,thread_call_flavor_t flavor)277 thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
278 {
279 	assert(flavor == TCF_CONTINUOUS || flavor == TCF_ABSOLUTE);
280 	thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
281 
282 	if (old_flavor != flavor) {
283 		if (flavor == TCF_CONTINUOUS) {
284 			call->tc_flags |= THREAD_CALL_FLAG_CONTINUOUS;
285 		} else {
286 			call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
287 		}
288 	}
289 
290 	return old_flavor;
291 }
292 
293 /* returns true if it was on a queue */
294 static bool
thread_call_enqueue_tail(thread_call_t call,queue_t new_queue)295 thread_call_enqueue_tail(
296 	thread_call_t           call,
297 	queue_t                 new_queue)
298 {
299 	queue_t                 old_queue = call->tc_queue;
300 
301 	thread_call_group_t     group = thread_call_get_group(call);
302 	thread_call_flavor_t    flavor = thread_call_get_flavor(call);
303 
304 	if (old_queue != NULL &&
305 	    old_queue != &group->delayed_queues[flavor]) {
306 		panic("thread call (%p %p) on bad queue (old_queue: %p)",
307 		    call, call->tc_func, old_queue);
308 	}
309 
310 	if (old_queue == &group->delayed_queues[flavor]) {
311 		priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
312 	}
313 
314 	if (old_queue == NULL) {
315 		enqueue_tail(new_queue, &call->tc_qlink);
316 	} else {
317 		re_queue_tail(new_queue, &call->tc_qlink);
318 	}
319 
320 	call->tc_queue = new_queue;
321 
322 	return old_queue != NULL;
323 }
324 
325 static queue_head_t *
thread_call_dequeue(thread_call_t call)326 thread_call_dequeue(
327 	thread_call_t            call)
328 {
329 	queue_t                 old_queue = call->tc_queue;
330 
331 	thread_call_group_t     group = thread_call_get_group(call);
332 	thread_call_flavor_t    flavor = thread_call_get_flavor(call);
333 
334 	if (old_queue != NULL &&
335 	    old_queue != &group->pending_queue &&
336 	    old_queue != &group->delayed_queues[flavor]) {
337 		panic("thread call (%p %p) on bad queue (old_queue: %p)",
338 		    call, call->tc_func, old_queue);
339 	}
340 
341 	if (old_queue == &group->delayed_queues[flavor]) {
342 		priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
343 	}
344 
345 	if (old_queue != NULL) {
346 		remqueue(&call->tc_qlink);
347 
348 		call->tc_queue = NULL;
349 	}
350 	return old_queue;
351 }
352 
353 static queue_head_t *
thread_call_enqueue_deadline(thread_call_t call,thread_call_group_t group,thread_call_flavor_t flavor,uint64_t deadline)354 thread_call_enqueue_deadline(
355 	thread_call_t           call,
356 	thread_call_group_t     group,
357 	thread_call_flavor_t    flavor,
358 	uint64_t                deadline)
359 {
360 	queue_t old_queue = call->tc_queue;
361 	queue_t new_queue = &group->delayed_queues[flavor];
362 
363 	thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
364 
365 	if (old_queue != NULL &&
366 	    old_queue != &group->pending_queue &&
367 	    old_queue != &group->delayed_queues[old_flavor]) {
368 		panic("thread call (%p %p) on bad queue (old_queue: %p)",
369 		    call, call->tc_func, old_queue);
370 	}
371 
372 	if (old_queue == new_queue) {
373 		/* optimize the same-queue case to avoid a full re-insert */
374 		uint64_t old_deadline = call->tc_pqlink.deadline;
375 		call->tc_pqlink.deadline = deadline;
376 
377 		if (old_deadline < deadline) {
378 			priority_queue_entry_increased(&group->delayed_pqueues[flavor],
379 			    &call->tc_pqlink);
380 		} else {
381 			priority_queue_entry_decreased(&group->delayed_pqueues[flavor],
382 			    &call->tc_pqlink);
383 		}
384 	} else {
385 		if (old_queue == &group->delayed_queues[old_flavor]) {
386 			priority_queue_remove(&group->delayed_pqueues[old_flavor],
387 			    &call->tc_pqlink);
388 		}
389 
390 		call->tc_pqlink.deadline = deadline;
391 
392 		priority_queue_insert(&group->delayed_pqueues[flavor], &call->tc_pqlink);
393 	}
394 
395 	if (old_queue == NULL) {
396 		enqueue_tail(new_queue, &call->tc_qlink);
397 	} else if (old_queue != new_queue) {
398 		re_queue_tail(new_queue, &call->tc_qlink);
399 	}
400 
401 	call->tc_queue = new_queue;
402 
403 	return old_queue;
404 }
405 
406 uint64_t
thread_call_get_armed_deadline(thread_call_t call)407 thread_call_get_armed_deadline(thread_call_t call)
408 {
409 	return call->tc_pqlink.deadline;
410 }
411 
412 
413 static bool
group_isparallel(thread_call_group_t group)414 group_isparallel(thread_call_group_t group)
415 {
416 	return (group->tcg_flags & TCG_PARALLEL) != 0;
417 }
418 
419 static bool
thread_call_group_should_add_thread(thread_call_group_t group)420 thread_call_group_should_add_thread(thread_call_group_t group)
421 {
422 	if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
423 		panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
424 		    group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
425 		    group->active_count, group->blocked_count, group->idle_count);
426 	}
427 
428 	if (group_isparallel(group) == false) {
429 		if (group->pending_count > 0 && group->active_count == 0) {
430 			return true;
431 		}
432 
433 		return false;
434 	}
435 
436 	if (group->pending_count > 0) {
437 		if (group->idle_count > 0) {
438 			return false;
439 		}
440 
441 		uint32_t thread_count = group->active_count;
442 
443 		/*
444 		 * Add a thread if either there are no threads,
445 		 * the group has fewer than its target number of
446 		 * threads, or the amount of work is large relative
447 		 * to the number of threads.  In the last case, pay attention
448 		 * to the total load on the system, and back off if
449 		 * it's high.
450 		 */
451 		if ((thread_count == 0) ||
452 		    (thread_count < group->target_thread_count) ||
453 		    ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
454 		    (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
455 			return true;
456 		}
457 	}
458 
459 	return false;
460 }
461 
462 static void
thread_call_group_setup(thread_call_group_t group)463 thread_call_group_setup(thread_call_group_t group)
464 {
465 	lck_ticket_init(&group->tcg_lock, &thread_call_lck_grp);
466 
467 	queue_init(&group->pending_queue);
468 
469 	for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
470 		queue_init(&group->delayed_queues[flavor]);
471 		priority_queue_init(&group->delayed_pqueues[flavor]);
472 		timer_call_setup(&group->delayed_timers[flavor], thread_call_delayed_timer, group);
473 	}
474 
475 	timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
476 
477 	waitq_init(&group->waiters_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
478 
479 	/* Reverse the wait order so we re-use the most recently parked thread from the pool */
480 	waitq_init(&group->idle_waitq, WQT_QUEUE, SYNC_POLICY_REVERSED);
481 }
482 
483 /*
484  * Simple wrapper for creating threads bound to
485  * thread call groups.
486  */
487 static void
thread_call_thread_create(thread_call_group_t group)488 thread_call_thread_create(
489 	thread_call_group_t             group)
490 {
491 	thread_t thread;
492 	kern_return_t result;
493 
494 	int thread_pri = group->tcg_thread_pri;
495 
496 	result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
497 	    group, thread_pri, &thread);
498 	if (result != KERN_SUCCESS) {
499 		panic("cannot create new thread call thread %d", result);
500 	}
501 
502 	if (thread_pri <= BASEPRI_KERNEL) {
503 		/*
504 		 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
505 		 * in kernel if there are higher priority threads available.
506 		 */
507 		thread_set_eager_preempt(thread);
508 	}
509 
510 	char name[MAXTHREADNAMESIZE] = "";
511 
512 	int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
513 
514 	snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
515 	thread_set_thread_name(thread, name);
516 
517 	thread_deallocate(thread);
518 }
519 
520 /*
521  *	thread_call_initialize:
522  *
523  *	Initialize this module, called
524  *	early during system initialization.
525  */
526 __startup_func
527 static void
thread_call_initialize(void)528 thread_call_initialize(void)
529 {
530 	nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
531 	waitq_init(&daemon_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
532 
533 	for (uint32_t i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
534 		thread_call_group_setup(&thread_call_groups[i]);
535 	}
536 
537 	_internal_call_init();
538 
539 	thread_t thread;
540 	kern_return_t result;
541 
542 	result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
543 	    NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
544 	if (result != KERN_SUCCESS) {
545 		panic("thread_call_initialize failed (%d)", result);
546 	}
547 
548 	thread_deallocate(thread);
549 }
550 STARTUP(THREAD_CALL, STARTUP_RANK_FIRST, thread_call_initialize);
551 
552 void
thread_call_setup_with_options(thread_call_t call,thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)553 thread_call_setup_with_options(
554 	thread_call_t                   call,
555 	thread_call_func_t              func,
556 	thread_call_param_t             param0,
557 	thread_call_priority_t          pri,
558 	thread_call_options_t           options)
559 {
560 	if (func == NULL) {
561 		panic("initializing thread call with NULL func");
562 	}
563 
564 	bzero(call, sizeof(*call));
565 
566 	*call = (struct thread_call) {
567 		.tc_func = func,
568 		.tc_param0 = param0,
569 		.tc_flags = THREAD_CALL_INITIALIZED,
570 	};
571 
572 	switch (pri) {
573 	case THREAD_CALL_PRIORITY_HIGH:
574 		call->tc_index = THREAD_CALL_INDEX_HIGH;
575 		break;
576 	case THREAD_CALL_PRIORITY_KERNEL:
577 		call->tc_index = THREAD_CALL_INDEX_KERNEL;
578 		break;
579 	case THREAD_CALL_PRIORITY_USER:
580 		call->tc_index = THREAD_CALL_INDEX_USER;
581 		break;
582 	case THREAD_CALL_PRIORITY_LOW:
583 		call->tc_index = THREAD_CALL_INDEX_LOW;
584 		break;
585 	case THREAD_CALL_PRIORITY_KERNEL_HIGH:
586 		call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
587 		break;
588 	default:
589 		panic("Invalid thread call pri value: %d", pri);
590 		break;
591 	}
592 
593 	if (options & THREAD_CALL_OPTIONS_ONCE) {
594 		call->tc_flags |= THREAD_CALL_ONCE;
595 	}
596 	if (options & THREAD_CALL_OPTIONS_SIGNAL) {
597 		call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
598 	}
599 }
600 
601 void
thread_call_setup(thread_call_t call,thread_call_func_t func,thread_call_param_t param0)602 thread_call_setup(
603 	thread_call_t                   call,
604 	thread_call_func_t              func,
605 	thread_call_param_t             param0)
606 {
607 	thread_call_setup_with_options(call, func, param0,
608 	    THREAD_CALL_PRIORITY_HIGH, 0);
609 }
610 
611 static void
_internal_call_init(void)612 _internal_call_init(void)
613 {
614 	/* Function-only thread calls are only kept in the default HIGH group */
615 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
616 
617 	spl_t s = disable_ints_and_lock(group);
618 
619 	queue_init(&thread_call_internal_queue);
620 
621 	for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
622 		enqueue_tail(&thread_call_internal_queue, &internal_call_storage[i].tc_qlink);
623 		thread_call_internal_queue_count++;
624 	}
625 
626 	enable_ints_and_unlock(group, s);
627 }
628 
629 /*
630  *	_internal_call_allocate:
631  *
632  *	Allocate an internal callout entry.
633  *
634  *	Called with thread_call_lock held.
635  */
636 static thread_call_t
_internal_call_allocate(thread_call_func_t func,thread_call_param_t param0)637 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
638 {
639 	/* Function-only thread calls are only kept in the default HIGH group */
640 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
641 
642 	spl_t s = disable_ints_and_lock(group);
643 
644 	thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
645 	    struct thread_call, tc_qlink);
646 
647 	if (call == NULL) {
648 		panic("_internal_call_allocate: thread_call_internal_queue empty");
649 	}
650 
651 	thread_call_internal_queue_count--;
652 
653 	thread_call_setup(call, func, param0);
654 	/* THREAD_CALL_ALLOC not set, do not free back to zone */
655 	assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
656 	enable_ints_and_unlock(group, s);
657 
658 	return call;
659 }
660 
661 /* Check if a call is internal and needs to be returned to the internal pool. */
662 static bool
_is_internal_call(thread_call_t call)663 _is_internal_call(thread_call_t call)
664 {
665 	if (call >= internal_call_storage &&
666 	    call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
667 		assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
668 		return true;
669 	}
670 	return false;
671 }
672 
673 /*
674  *	_internal_call_release:
675  *
676  *	Release an internal callout entry which
677  *	is no longer pending (or delayed).
678  *
679  *      Called with thread_call_lock held.
680  */
681 static void
_internal_call_release(thread_call_t call)682 _internal_call_release(thread_call_t call)
683 {
684 	assert(_is_internal_call(call));
685 
686 	thread_call_group_t group = thread_call_get_group(call);
687 
688 	assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
689 	thread_call_assert_locked(group);
690 
691 	call->tc_flags &= ~THREAD_CALL_INITIALIZED;
692 
693 	enqueue_head(&thread_call_internal_queue, &call->tc_qlink);
694 	thread_call_internal_queue_count++;
695 }
696 
697 /*
698  *	_pending_call_enqueue:
699  *
700  *	Place an entry at the end of the
701  *	pending queue, to be executed soon.
702  *
703  *	Returns TRUE if the entry was already
704  *	on a queue.
705  *
706  *	Called with thread_call_lock held.
707  */
708 static bool
_pending_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t now)709 _pending_call_enqueue(thread_call_t call,
710     thread_call_group_t group,
711     uint64_t now)
712 {
713 	if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
714 	    == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
715 		call->tc_pqlink.deadline = 0;
716 
717 		thread_call_flags_t flags = call->tc_flags;
718 		call->tc_flags |= THREAD_CALL_RESCHEDULE;
719 
720 		assert(call->tc_queue == NULL);
721 
722 		return flags & THREAD_CALL_RESCHEDULE;
723 	}
724 
725 	call->tc_pending_timestamp = now;
726 
727 	bool was_on_queue = thread_call_enqueue_tail(call, &group->pending_queue);
728 
729 	if (!was_on_queue) {
730 		call->tc_submit_count++;
731 	}
732 
733 	group->pending_count++;
734 
735 	thread_call_wake(group);
736 
737 	return was_on_queue;
738 }
739 
740 /*
741  *	_delayed_call_enqueue:
742  *
743  *	Place an entry on the delayed queue,
744  *	after existing entries with an earlier
745  *      (or identical) deadline.
746  *
747  *	Returns TRUE if the entry was already
748  *	on a queue.
749  *
750  *	Called with thread_call_lock held.
751  */
752 static bool
_delayed_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t deadline,thread_call_flavor_t flavor)753 _delayed_call_enqueue(
754 	thread_call_t           call,
755 	thread_call_group_t     group,
756 	uint64_t                deadline,
757 	thread_call_flavor_t    flavor)
758 {
759 	if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
760 	    == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
761 		call->tc_pqlink.deadline = deadline;
762 
763 		thread_call_flags_t flags = call->tc_flags;
764 		call->tc_flags |= THREAD_CALL_RESCHEDULE;
765 
766 		assert(call->tc_queue == NULL);
767 		thread_call_set_flavor(call, flavor);
768 
769 		return flags & THREAD_CALL_RESCHEDULE;
770 	}
771 
772 	queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
773 
774 	if (old_queue == &group->pending_queue) {
775 		group->pending_count--;
776 	} else if (old_queue == NULL) {
777 		call->tc_submit_count++;
778 	}
779 
780 	return old_queue != NULL;
781 }
782 
783 /*
784  *	_call_dequeue:
785  *
786  *	Remove an entry from a queue.
787  *
788  *	Returns TRUE if the entry was on a queue.
789  *
790  *	Called with thread_call_lock held.
791  */
792 static bool
_call_dequeue(thread_call_t call,thread_call_group_t group)793 _call_dequeue(
794 	thread_call_t           call,
795 	thread_call_group_t     group)
796 {
797 	queue_head_t *old_queue = thread_call_dequeue(call);
798 
799 	if (old_queue == NULL) {
800 		return false;
801 	}
802 
803 	call->tc_finish_count++;
804 
805 	if (old_queue == &group->pending_queue) {
806 		group->pending_count--;
807 	}
808 
809 	return true;
810 }
811 
812 /*
813  * _arm_delayed_call_timer:
814  *
815  * Check if the timer needs to be armed for this flavor,
816  * and if so, arm it.
817  *
818  * If call is non-NULL, only re-arm the timer if the specified call
819  * is the first in the queue.
820  *
821  * Returns true if the timer was armed/re-armed, false if it was left unset
822  * Caller should cancel the timer if need be.
823  *
824  * Called with thread_call_lock held.
825  */
826 static bool
_arm_delayed_call_timer(thread_call_t new_call,thread_call_group_t group,thread_call_flavor_t flavor)827 _arm_delayed_call_timer(thread_call_t           new_call,
828     thread_call_group_t     group,
829     thread_call_flavor_t    flavor)
830 {
831 	/* No calls implies no timer needed */
832 	if (queue_empty(&group->delayed_queues[flavor])) {
833 		return false;
834 	}
835 
836 	thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
837 
838 	/* We only need to change the hard timer if this new call is the first in the list */
839 	if (new_call != NULL && new_call != call) {
840 		return false;
841 	}
842 
843 	assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
844 
845 	uint64_t fire_at = call->tc_soft_deadline;
846 
847 	if (flavor == TCF_CONTINUOUS) {
848 		assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
849 		fire_at = continuoustime_to_absolutetime(fire_at);
850 	} else {
851 		assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
852 	}
853 
854 	/*
855 	 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
856 	 * which does not take into account later-deadline timers with a larger leeway.
857 	 * This is a valid coalescing behavior, but masks a possible window to
858 	 * fire a timer instead of going idle.
859 	 */
860 	uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
861 
862 	timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
863 	    fire_at, leeway,
864 	    TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LEEWAY,
865 	    ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
866 
867 	return true;
868 }
869 
870 /*
871  *	_cancel_func_from_queue:
872  *
873  *	Remove the first (or all) matching
874  *	entries from the specified queue.
875  *
876  *	Returns TRUE if any matching entries
877  *	were found.
878  *
879  *	Called with thread_call_lock held.
880  */
881 static boolean_t
_cancel_func_from_queue(thread_call_func_t func,thread_call_param_t param0,thread_call_group_t group,boolean_t remove_all,queue_head_t * queue)882 _cancel_func_from_queue(thread_call_func_t      func,
883     thread_call_param_t     param0,
884     thread_call_group_t     group,
885     boolean_t               remove_all,
886     queue_head_t            *queue)
887 {
888 	boolean_t call_removed = FALSE;
889 	thread_call_t call;
890 
891 	qe_foreach_element_safe(call, queue, tc_qlink) {
892 		if (call->tc_func != func ||
893 		    call->tc_param0 != param0) {
894 			continue;
895 		}
896 
897 		_call_dequeue(call, group);
898 
899 		if (_is_internal_call(call)) {
900 			_internal_call_release(call);
901 		}
902 
903 		call_removed = TRUE;
904 		if (!remove_all) {
905 			break;
906 		}
907 	}
908 
909 	return call_removed;
910 }
911 
912 /*
913  *	thread_call_func_delayed:
914  *
915  *	Enqueue a function callout to
916  *	occur at the stated time.
917  */
918 void
thread_call_func_delayed(thread_call_func_t func,thread_call_param_t param,uint64_t deadline)919 thread_call_func_delayed(
920 	thread_call_func_t              func,
921 	thread_call_param_t             param,
922 	uint64_t                        deadline)
923 {
924 	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
925 }
926 
927 /*
928  * thread_call_func_delayed_with_leeway:
929  *
930  * Same as thread_call_func_delayed(), but with
931  * leeway/flags threaded through.
932  */
933 
934 void
thread_call_func_delayed_with_leeway(thread_call_func_t func,thread_call_param_t param,uint64_t deadline,uint64_t leeway,uint32_t flags)935 thread_call_func_delayed_with_leeway(
936 	thread_call_func_t              func,
937 	thread_call_param_t             param,
938 	uint64_t                deadline,
939 	uint64_t                leeway,
940 	uint32_t                flags)
941 {
942 	(void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
943 }
944 
945 /*
946  *	thread_call_func_cancel:
947  *
948  *	Dequeue a function callout.
949  *
950  *	Removes one (or all) { function, argument }
951  *	instance(s) from either (or both)
952  *	the pending and	the delayed queue,
953  *	in that order.
954  *
955  *	Returns TRUE if any calls were cancelled.
956  *
957  *	This iterates all of the pending or delayed thread calls in the group,
958  *	which is really inefficient.  Switch to an allocated thread call instead.
959  *
960  *	TODO: Give 'func' thread calls their own group, so this silliness doesn't
961  *	affect the main 'high' group.
962  */
963 boolean_t
thread_call_func_cancel(thread_call_func_t func,thread_call_param_t param,boolean_t cancel_all)964 thread_call_func_cancel(
965 	thread_call_func_t              func,
966 	thread_call_param_t             param,
967 	boolean_t                       cancel_all)
968 {
969 	boolean_t       result;
970 
971 	if (func == NULL) {
972 		panic("trying to cancel NULL func");
973 	}
974 
975 	/* Function-only thread calls are only kept in the default HIGH group */
976 	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
977 
978 	spl_t s = disable_ints_and_lock(group);
979 
980 	if (cancel_all) {
981 		/* exhaustively search every queue, and return true if any search found something */
982 		result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
983 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE])  |
984 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
985 	} else {
986 		/* early-exit as soon as we find something, don't search other queues */
987 		result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
988 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
989 		    _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
990 	}
991 
992 	enable_ints_and_unlock(group, s);
993 
994 	return result;
995 }
996 
997 /*
998  * Allocate a thread call with a given priority.  Importances other than
999  * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
1000  * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
1001  * threads which are not in the normal "urgent" bands).
1002  */
1003 thread_call_t
thread_call_allocate_with_priority(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri)1004 thread_call_allocate_with_priority(
1005 	thread_call_func_t              func,
1006 	thread_call_param_t             param0,
1007 	thread_call_priority_t          pri)
1008 {
1009 	return thread_call_allocate_with_options(func, param0, pri, 0);
1010 }
1011 
1012 thread_call_t
thread_call_allocate_with_options(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)1013 thread_call_allocate_with_options(
1014 	thread_call_func_t              func,
1015 	thread_call_param_t             param0,
1016 	thread_call_priority_t          pri,
1017 	thread_call_options_t           options)
1018 {
1019 	thread_call_t call = zalloc(thread_call_zone);
1020 
1021 	thread_call_setup_with_options(call, func, param0, pri, options);
1022 	call->tc_refs = 1;
1023 	call->tc_flags |= THREAD_CALL_ALLOC;
1024 
1025 	return call;
1026 }
1027 
1028 thread_call_t
thread_call_allocate_with_qos(thread_call_func_t func,thread_call_param_t param0,int qos_tier,thread_call_options_t options)1029 thread_call_allocate_with_qos(thread_call_func_t        func,
1030     thread_call_param_t       param0,
1031     int                       qos_tier,
1032     thread_call_options_t     options)
1033 {
1034 	thread_call_t call = thread_call_allocate(func, param0);
1035 
1036 	switch (qos_tier) {
1037 	case THREAD_QOS_UNSPECIFIED:
1038 		call->tc_index = THREAD_CALL_INDEX_HIGH;
1039 		break;
1040 	case THREAD_QOS_LEGACY:
1041 		call->tc_index = THREAD_CALL_INDEX_USER;
1042 		break;
1043 	case THREAD_QOS_MAINTENANCE:
1044 	case THREAD_QOS_BACKGROUND:
1045 		call->tc_index = THREAD_CALL_INDEX_LOW;
1046 		break;
1047 	case THREAD_QOS_UTILITY:
1048 		call->tc_index = THREAD_CALL_INDEX_QOS_UT;
1049 		break;
1050 	case THREAD_QOS_USER_INITIATED:
1051 		call->tc_index = THREAD_CALL_INDEX_QOS_IN;
1052 		break;
1053 	case THREAD_QOS_USER_INTERACTIVE:
1054 		call->tc_index = THREAD_CALL_INDEX_QOS_UI;
1055 		break;
1056 	default:
1057 		panic("Invalid thread call qos value: %d", qos_tier);
1058 		break;
1059 	}
1060 
1061 	if (options & THREAD_CALL_OPTIONS_ONCE) {
1062 		call->tc_flags |= THREAD_CALL_ONCE;
1063 	}
1064 
1065 	/* does not support THREAD_CALL_OPTIONS_SIGNAL */
1066 
1067 	return call;
1068 }
1069 
1070 
1071 /*
1072  *	thread_call_allocate:
1073  *
1074  *	Allocate a callout entry.
1075  */
1076 thread_call_t
thread_call_allocate(thread_call_func_t func,thread_call_param_t param0)1077 thread_call_allocate(
1078 	thread_call_func_t              func,
1079 	thread_call_param_t             param0)
1080 {
1081 	return thread_call_allocate_with_options(func, param0,
1082 	           THREAD_CALL_PRIORITY_HIGH, 0);
1083 }
1084 
1085 /*
1086  *	thread_call_free:
1087  *
1088  *	Release a callout.  If the callout is currently
1089  *	executing, it will be freed when all invocations
1090  *	finish.
1091  *
1092  *	If the callout is currently armed to fire again, then
1093  *	freeing is not allowed and returns FALSE.  The
1094  *	client must have canceled the pending invocation before freeing.
1095  */
1096 boolean_t
thread_call_free(thread_call_t call)1097 thread_call_free(
1098 	thread_call_t           call)
1099 {
1100 	thread_call_group_t group = thread_call_get_group(call);
1101 
1102 	spl_t s = disable_ints_and_lock(group);
1103 
1104 	if (call->tc_queue != NULL ||
1105 	    ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
1106 		thread_call_unlock(group);
1107 		splx(s);
1108 
1109 		return FALSE;
1110 	}
1111 
1112 	int32_t refs = --call->tc_refs;
1113 	if (refs < 0) {
1114 		panic("(%p %p) Refcount negative: %d", call, call->tc_func, refs);
1115 	}
1116 
1117 	if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
1118 	    == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
1119 		thread_call_wait_once_locked(call, s);
1120 		/* thread call lock has been unlocked */
1121 	} else {
1122 		enable_ints_and_unlock(group, s);
1123 	}
1124 
1125 	if (refs == 0) {
1126 		if (!(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1127 			panic("(%p %p) freeing an uninitialized call", call, call->tc_func);
1128 		}
1129 
1130 		if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
1131 			panic("(%p %p) Someone waiting on a thread call that is scheduled for free",
1132 			    call, call->tc_func);
1133 		}
1134 
1135 		if (call->tc_flags & THREAD_CALL_RUNNING) {
1136 			panic("(%p %p) freeing a running once call", call, call->tc_func);
1137 		}
1138 
1139 		if (call->tc_finish_count != call->tc_submit_count) {
1140 			panic("(%p %p) thread call submit/finish imbalance: %lld %lld",
1141 			    call, call->tc_func,
1142 			    call->tc_submit_count, call->tc_finish_count);
1143 		}
1144 
1145 		call->tc_flags &= ~THREAD_CALL_INITIALIZED;
1146 
1147 		zfree(thread_call_zone, call);
1148 	}
1149 
1150 	return TRUE;
1151 }
1152 
1153 /*
1154  *	thread_call_enter:
1155  *
1156  *	Enqueue a callout entry to occur "soon".
1157  *
1158  *	Returns TRUE if the call was
1159  *	already on a queue.
1160  */
1161 boolean_t
thread_call_enter(thread_call_t call)1162 thread_call_enter(
1163 	thread_call_t           call)
1164 {
1165 	return thread_call_enter1(call, 0);
1166 }
1167 
1168 boolean_t
thread_call_enter1(thread_call_t call,thread_call_param_t param1)1169 thread_call_enter1(
1170 	thread_call_t                   call,
1171 	thread_call_param_t             param1)
1172 {
1173 	if (call->tc_func == NULL || !(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1174 		panic("(%p %p) uninitialized thread call", call, call->tc_func);
1175 	}
1176 
1177 	assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
1178 
1179 	thread_call_group_t group = thread_call_get_group(call);
1180 	bool result = true;
1181 
1182 	spl_t s = disable_ints_and_lock(group);
1183 
1184 	if (call->tc_queue != &group->pending_queue) {
1185 		result = _pending_call_enqueue(call, group, mach_absolute_time());
1186 	}
1187 
1188 	call->tc_param1 = param1;
1189 
1190 	enable_ints_and_unlock(group, s);
1191 
1192 	return result;
1193 }
1194 
1195 /*
1196  *	thread_call_enter_delayed:
1197  *
1198  *	Enqueue a callout entry to occur
1199  *	at the stated time.
1200  *
1201  *	Returns TRUE if the call was
1202  *	already on a queue.
1203  */
1204 boolean_t
thread_call_enter_delayed(thread_call_t call,uint64_t deadline)1205 thread_call_enter_delayed(
1206 	thread_call_t           call,
1207 	uint64_t                deadline)
1208 {
1209 	if (call == NULL) {
1210 		panic("NULL call in %s", __FUNCTION__);
1211 	}
1212 	return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
1213 }
1214 
1215 boolean_t
thread_call_enter1_delayed(thread_call_t call,thread_call_param_t param1,uint64_t deadline)1216 thread_call_enter1_delayed(
1217 	thread_call_t                   call,
1218 	thread_call_param_t             param1,
1219 	uint64_t                        deadline)
1220 {
1221 	if (call == NULL) {
1222 		panic("NULL call in %s", __FUNCTION__);
1223 	}
1224 
1225 	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
1226 }
1227 
1228 boolean_t
thread_call_enter_delayed_with_leeway(thread_call_t call,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1229 thread_call_enter_delayed_with_leeway(
1230 	thread_call_t           call,
1231 	thread_call_param_t     param1,
1232 	uint64_t                deadline,
1233 	uint64_t                leeway,
1234 	unsigned int            flags)
1235 {
1236 	if (call == NULL) {
1237 		panic("NULL call in %s", __FUNCTION__);
1238 	}
1239 
1240 	return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
1241 }
1242 
1243 
1244 /*
1245  * thread_call_enter_delayed_internal:
1246  * enqueue a callout entry to occur at the stated time
1247  *
1248  * Returns True if the call was already on a queue
1249  * params:
1250  * call     - structure encapsulating state of the callout
1251  * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1252  * deadline - time deadline in nanoseconds
1253  * leeway   - timer slack represented as delta of deadline.
1254  * flags    - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1255  *            THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1256  *            THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1257  *                                                                        than mach_absolute_time
1258  */
1259 boolean_t
thread_call_enter_delayed_internal(thread_call_t call,thread_call_func_t alt_func,thread_call_param_t alt_param0,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1260 thread_call_enter_delayed_internal(
1261 	thread_call_t           call,
1262 	thread_call_func_t      alt_func,
1263 	thread_call_param_t     alt_param0,
1264 	thread_call_param_t     param1,
1265 	uint64_t                deadline,
1266 	uint64_t                leeway,
1267 	unsigned int            flags)
1268 {
1269 	uint64_t                now, sdeadline;
1270 
1271 	thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1272 
1273 	/* direct mapping between thread_call, timer_call, and timeout_urgency values */
1274 	uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
1275 
1276 	if (call == NULL) {
1277 		/* allocate a structure out of internal storage, as a convenience for BSD callers */
1278 		call = _internal_call_allocate(alt_func, alt_param0);
1279 	}
1280 
1281 	thread_call_group_t group = thread_call_get_group(call);
1282 
1283 	spl_t s = disable_ints_and_lock(group);
1284 
1285 	/*
1286 	 * kevent and IOTES let you change flavor for an existing timer, so we have to
1287 	 * support flipping flavors for enqueued thread calls.
1288 	 */
1289 	if (flavor == TCF_CONTINUOUS) {
1290 		now = mach_continuous_time();
1291 	} else {
1292 		now = mach_absolute_time();
1293 	}
1294 
1295 	call->tc_flags |= THREAD_CALL_DELAYED;
1296 
1297 	call->tc_soft_deadline = sdeadline = deadline;
1298 
1299 	boolean_t ratelimited = FALSE;
1300 	uint64_t slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1301 
1302 	if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
1303 		slop = leeway;
1304 	}
1305 
1306 	if (UINT64_MAX - deadline <= slop) {
1307 		deadline = UINT64_MAX;
1308 	} else {
1309 		deadline += slop;
1310 	}
1311 
1312 	if (ratelimited) {
1313 		call->tc_flags |= THREAD_CALL_RATELIMITED;
1314 	} else {
1315 		call->tc_flags &= ~THREAD_CALL_RATELIMITED;
1316 	}
1317 
1318 	call->tc_param1 = param1;
1319 
1320 	call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1321 
1322 	bool result = _delayed_call_enqueue(call, group, deadline, flavor);
1323 
1324 	_arm_delayed_call_timer(call, group, flavor);
1325 
1326 #if CONFIG_DTRACE
1327 	DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
1328 	    uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1329 	    (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1330 #endif
1331 
1332 	enable_ints_and_unlock(group, s);
1333 
1334 	return result;
1335 }
1336 
1337 /*
1338  * Remove a callout entry from the queue
1339  * Called with thread_call_lock held
1340  */
1341 static bool
thread_call_cancel_locked(thread_call_t call)1342 thread_call_cancel_locked(thread_call_t call)
1343 {
1344 	bool canceled;
1345 
1346 	if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
1347 		call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1348 		canceled = true;
1349 
1350 		/* if reschedule was set, it must not have been queued */
1351 		assert(call->tc_queue == NULL);
1352 	} else {
1353 		bool queue_head_changed = false;
1354 
1355 		thread_call_flavor_t flavor = thread_call_get_flavor(call);
1356 		thread_call_group_t  group  = thread_call_get_group(call);
1357 
1358 		if (call->tc_pqlink.deadline != 0 &&
1359 		    call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
1360 			assert(call->tc_queue == &group->delayed_queues[flavor]);
1361 			queue_head_changed = true;
1362 		}
1363 
1364 		canceled = _call_dequeue(call, group);
1365 
1366 		if (queue_head_changed) {
1367 			if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
1368 				timer_call_cancel(&group->delayed_timers[flavor]);
1369 			}
1370 		}
1371 	}
1372 
1373 #if CONFIG_DTRACE
1374 	DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
1375 	    0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1376 #endif
1377 
1378 	return canceled;
1379 }
1380 
1381 /*
1382  *	thread_call_cancel:
1383  *
1384  *	Dequeue a callout entry.
1385  *
1386  *	Returns TRUE if the call was
1387  *	on a queue.
1388  */
1389 boolean_t
thread_call_cancel(thread_call_t call)1390 thread_call_cancel(thread_call_t call)
1391 {
1392 	thread_call_group_t group = thread_call_get_group(call);
1393 
1394 	spl_t s = disable_ints_and_lock(group);
1395 
1396 	boolean_t result = thread_call_cancel_locked(call);
1397 
1398 	enable_ints_and_unlock(group, s);
1399 
1400 	return result;
1401 }
1402 
1403 /*
1404  * Cancel a thread call.  If it cannot be cancelled (i.e.
1405  * is already in flight), waits for the most recent invocation
1406  * to finish.  Note that if clients re-submit this thread call,
1407  * it may still be pending or in flight when thread_call_cancel_wait
1408  * returns, but all requests to execute this work item prior
1409  * to the call to thread_call_cancel_wait will have finished.
1410  */
1411 boolean_t
thread_call_cancel_wait(thread_call_t call)1412 thread_call_cancel_wait(thread_call_t call)
1413 {
1414 	thread_call_group_t group = thread_call_get_group(call);
1415 
1416 	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1417 		panic("(%p %p) thread_call_cancel_wait: can't wait on thread call whose storage I don't own",
1418 		    call, call->tc_func);
1419 	}
1420 
1421 	if (!ml_get_interrupts_enabled()) {
1422 		panic("(%p %p) unsafe thread_call_cancel_wait",
1423 		    call, call->tc_func);
1424 	}
1425 
1426 	thread_t self = current_thread();
1427 
1428 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
1429 	    self->thc_state && self->thc_state->thc_call == call) {
1430 		panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1431 		    call, call->tc_func);
1432 	}
1433 
1434 	spl_t s = disable_ints_and_lock(group);
1435 
1436 	boolean_t canceled = thread_call_cancel_locked(call);
1437 
1438 	if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1439 		/*
1440 		 * A cancel-wait on a 'once' call will both cancel
1441 		 * the pending call and wait for the in-flight call
1442 		 */
1443 
1444 		thread_call_wait_once_locked(call, s);
1445 		/* thread call lock unlocked */
1446 	} else {
1447 		/*
1448 		 * A cancel-wait on a normal call will only wait for the in-flight calls
1449 		 * if it did not cancel the pending call.
1450 		 *
1451 		 * TODO: This seems less than useful - shouldn't it do the wait as well?
1452 		 */
1453 
1454 		if (canceled == FALSE) {
1455 			thread_call_wait_locked(call, s);
1456 			/* thread call lock unlocked */
1457 		} else {
1458 			enable_ints_and_unlock(group, s);
1459 		}
1460 	}
1461 
1462 	return canceled;
1463 }
1464 
1465 
1466 /*
1467  *	thread_call_wake:
1468  *
1469  *	Wake a call thread to service
1470  *	pending call entries.  May wake
1471  *	the daemon thread in order to
1472  *	create additional call threads.
1473  *
1474  *	Called with thread_call_lock held.
1475  *
1476  *	For high-priority group, only does wakeup/creation if there are no threads
1477  *	running.
1478  */
1479 static void
thread_call_wake(thread_call_group_t group)1480 thread_call_wake(
1481 	thread_call_group_t             group)
1482 {
1483 	/*
1484 	 * New behavior: use threads if you've got 'em.
1485 	 * Traditional behavior: wake only if no threads running.
1486 	 */
1487 	if (group_isparallel(group) || group->active_count == 0) {
1488 		if (group->idle_count) {
1489 			__assert_only kern_return_t kr;
1490 
1491 			kr = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
1492 			    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1493 			assert(kr == KERN_SUCCESS);
1494 
1495 			group->idle_count--;
1496 			group->active_count++;
1497 
1498 			if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1499 				if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1500 					group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
1501 				}
1502 			}
1503 		} else {
1504 			if (thread_call_group_should_add_thread(group) &&
1505 			    os_atomic_cmpxchg(&thread_call_daemon_awake,
1506 			    false, true, relaxed)) {
1507 				waitq_wakeup64_all(&daemon_waitq,
1508 				    CAST_EVENT64_T(&thread_call_daemon_awake),
1509 				    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1510 			}
1511 		}
1512 	}
1513 }
1514 
1515 /*
1516  *	sched_call_thread:
1517  *
1518  *	Call out invoked by the scheduler.
1519  */
1520 static void
sched_call_thread(int type,thread_t thread)1521 sched_call_thread(
1522 	int                             type,
1523 	thread_t                thread)
1524 {
1525 	thread_call_group_t             group;
1526 
1527 	assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
1528 	assert(thread->thc_state != NULL);
1529 
1530 	group = thread->thc_state->thc_group;
1531 	assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1532 	assert((group - &thread_call_groups[0]) > THREAD_CALL_INDEX_INVALID);
1533 
1534 	thread_call_lock_spin(group);
1535 
1536 	switch (type) {
1537 	case SCHED_CALL_BLOCK:
1538 		assert(group->active_count);
1539 		--group->active_count;
1540 		group->blocked_count++;
1541 		if (group->pending_count > 0) {
1542 			thread_call_wake(group);
1543 		}
1544 		break;
1545 
1546 	case SCHED_CALL_UNBLOCK:
1547 		assert(group->blocked_count);
1548 		--group->blocked_count;
1549 		group->active_count++;
1550 		break;
1551 	}
1552 
1553 	thread_call_unlock(group);
1554 }
1555 
1556 /*
1557  * Interrupts disabled, lock held; returns the same way.
1558  * Only called on thread calls whose storage we own.  Wakes up
1559  * anyone who might be waiting on this work item and frees it
1560  * if the client has so requested.
1561  */
1562 static bool
thread_call_finish(thread_call_t call,thread_call_group_t group,spl_t * s)1563 thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1564 {
1565 	thread_call_group_t call_group = thread_call_get_group(call);
1566 	if (group != call_group) {
1567 		panic("(%p %p) call finishing from wrong group: %p",
1568 		    call, call->tc_func, call_group);
1569 	}
1570 
1571 	bool repend = false;
1572 	bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
1573 	bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
1574 
1575 	call->tc_finish_count++;
1576 
1577 	if (!signal && alloc) {
1578 		/* The thread call thread owns a ref until the call is finished */
1579 		if (call->tc_refs <= 0) {
1580 			panic("(%p %p) thread_call_finish: detected over-released thread call",
1581 			    call, call->tc_func);
1582 		}
1583 		call->tc_refs--;
1584 	}
1585 
1586 	thread_call_flags_t old_flags = call->tc_flags;
1587 	call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1588 
1589 	if ((!alloc || call->tc_refs != 0) &&
1590 	    (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
1591 		assert(old_flags & THREAD_CALL_ONCE);
1592 		thread_call_flavor_t flavor = thread_call_get_flavor(call);
1593 
1594 		if (old_flags & THREAD_CALL_DELAYED) {
1595 			uint64_t now = mach_absolute_time();
1596 			if (flavor == TCF_CONTINUOUS) {
1597 				now = absolutetime_to_continuoustime(now);
1598 			}
1599 			if (call->tc_soft_deadline <= now) {
1600 				/* The deadline has already expired, go straight to pending */
1601 				call->tc_flags &= ~(THREAD_CALL_DELAYED | THREAD_CALL_RATELIMITED);
1602 				call->tc_pqlink.deadline = 0;
1603 			}
1604 		}
1605 
1606 		if (call->tc_pqlink.deadline) {
1607 			_delayed_call_enqueue(call, group, call->tc_pqlink.deadline, flavor);
1608 			if (!signal) {
1609 				_arm_delayed_call_timer(call, group, flavor);
1610 			}
1611 		} else if (signal) {
1612 			call->tc_submit_count++;
1613 			repend = true;
1614 		} else {
1615 			_pending_call_enqueue(call, group, mach_absolute_time());
1616 		}
1617 	}
1618 
1619 	if (!signal && alloc && call->tc_refs == 0) {
1620 		if ((old_flags & THREAD_CALL_WAIT) != 0) {
1621 			panic("(%p %p) Someone waiting on a thread call that is scheduled for free",
1622 			    call, call->tc_func);
1623 		}
1624 
1625 		if (call->tc_finish_count != call->tc_submit_count) {
1626 			panic("(%p %p) thread call submit/finish imbalance: %lld %lld",
1627 			    call, call->tc_func,
1628 			    call->tc_submit_count, call->tc_finish_count);
1629 		}
1630 
1631 		if (call->tc_func == NULL || !(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1632 			panic("(%p %p) uninitialized thread call", call, call->tc_func);
1633 		}
1634 
1635 		call->tc_flags &= ~THREAD_CALL_INITIALIZED;
1636 
1637 		enable_ints_and_unlock(group, *s);
1638 
1639 		zfree(thread_call_zone, call);
1640 
1641 		*s = disable_ints_and_lock(group);
1642 	}
1643 
1644 	if ((old_flags & THREAD_CALL_WAIT) != 0) {
1645 		/*
1646 		 * This may wake up a thread with a registered sched_call.
1647 		 * That call might need the group lock, so we drop the lock
1648 		 * to avoid deadlocking.
1649 		 *
1650 		 * We also must use a separate waitq from the idle waitq, as
1651 		 * this path goes waitq lock->thread lock->group lock, but
1652 		 * the idle wait goes group lock->waitq_lock->thread_lock.
1653 		 */
1654 		thread_call_unlock(group);
1655 
1656 		waitq_wakeup64_all(&group->waiters_waitq, CAST_EVENT64_T(call),
1657 		    THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1658 
1659 		thread_call_lock_spin(group);
1660 		/* THREAD_CALL_SIGNAL call may have been freed */
1661 	}
1662 
1663 	return repend;
1664 }
1665 
1666 /*
1667  * thread_call_invoke
1668  *
1669  * Invoke the function provided for this thread call
1670  *
1671  * Note that the thread call object can be deallocated by the function if we do not control its storage.
1672  */
1673 static void __attribute__((noinline))
thread_call_invoke(thread_call_func_t func,thread_call_param_t param0,thread_call_param_t param1,__unused thread_call_t call)1674 thread_call_invoke(thread_call_func_t func,
1675     thread_call_param_t param0,
1676     thread_call_param_t param1,
1677     __unused thread_call_t call)
1678 {
1679 #if DEVELOPMENT || DEBUG
1680 	KERNEL_DEBUG_CONSTANT(
1681 		MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_START,
1682 		VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1683 #endif /* DEVELOPMENT || DEBUG */
1684 
1685 #if CONFIG_DTRACE
1686 	uint64_t tc_ttd = call->tc_ttd;
1687 	boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1688 	DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1689 	    (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1690 #endif
1691 
1692 	(*func)(param0, param1);
1693 
1694 #if CONFIG_DTRACE
1695 	DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1696 	    (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1697 #endif
1698 
1699 #if DEVELOPMENT || DEBUG
1700 	KERNEL_DEBUG_CONSTANT(
1701 		MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_END,
1702 		VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1703 #endif /* DEVELOPMENT || DEBUG */
1704 }
1705 
1706 /*
1707  *	thread_call_thread:
1708  */
1709 static void
thread_call_thread(thread_call_group_t group,wait_result_t wres)1710 thread_call_thread(
1711 	thread_call_group_t             group,
1712 	wait_result_t                   wres)
1713 {
1714 	thread_t self = current_thread();
1715 
1716 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1717 		(void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1718 	}
1719 
1720 	/*
1721 	 * A wakeup with THREAD_INTERRUPTED indicates that
1722 	 * we should terminate.
1723 	 */
1724 	if (wres == THREAD_INTERRUPTED) {
1725 		thread_terminate(self);
1726 
1727 		/* NOTREACHED */
1728 		panic("thread_terminate() returned?");
1729 	}
1730 
1731 	spl_t s = disable_ints_and_lock(group);
1732 
1733 	struct thread_call_thread_state thc_state = { .thc_group = group };
1734 	self->thc_state = &thc_state;
1735 
1736 	thread_sched_call(self, sched_call_thread);
1737 
1738 	while (group->pending_count > 0) {
1739 		thread_call_t call = qe_dequeue_head(&group->pending_queue,
1740 		    struct thread_call, tc_qlink);
1741 		assert(call != NULL);
1742 
1743 		/*
1744 		 * This thread_call_get_group is also here to validate
1745 		 * sanity of the thing popped off the queue
1746 		 */
1747 		thread_call_group_t call_group = thread_call_get_group(call);
1748 		if (group != call_group) {
1749 			panic("(%p %p) call on pending_queue from wrong group %p",
1750 			    call, call->tc_func, call_group);
1751 		}
1752 
1753 		group->pending_count--;
1754 		if (group->pending_count == 0) {
1755 			assert(queue_empty(&group->pending_queue));
1756 		}
1757 
1758 		thread_call_func_t  func   = call->tc_func;
1759 		thread_call_param_t param0 = call->tc_param0;
1760 		thread_call_param_t param1 = call->tc_param1;
1761 
1762 		if (func == NULL) {
1763 			panic("pending call with NULL func: %p", call);
1764 		}
1765 
1766 		call->tc_queue = NULL;
1767 
1768 		if (_is_internal_call(call)) {
1769 			_internal_call_release(call);
1770 		}
1771 
1772 		/*
1773 		 * Can only do wakeups for thread calls whose storage
1774 		 * we control.
1775 		 */
1776 		bool needs_finish = false;
1777 		if (call->tc_flags & THREAD_CALL_ALLOC) {
1778 			call->tc_refs++;        /* Delay free until we're done */
1779 		}
1780 		if (call->tc_flags & (THREAD_CALL_ALLOC | THREAD_CALL_ONCE)) {
1781 			/*
1782 			 * If THREAD_CALL_ONCE is used, and the timer wasn't
1783 			 * THREAD_CALL_ALLOC, then clients swear they will use
1784 			 * thread_call_cancel_wait() before destroying
1785 			 * the thread call.
1786 			 *
1787 			 * Else, the storage for the thread call might have
1788 			 * disappeared when thread_call_invoke() ran.
1789 			 */
1790 			needs_finish = true;
1791 			call->tc_flags |= THREAD_CALL_RUNNING;
1792 		}
1793 
1794 		thc_state.thc_call = call;
1795 		thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
1796 		thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
1797 		thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
1798 		thc_state.thc_func = func;
1799 		thc_state.thc_param0 = param0;
1800 		thc_state.thc_param1 = param1;
1801 		thc_state.thc_IOTES_invocation_timestamp = 0;
1802 
1803 		enable_ints_and_unlock(group, s);
1804 
1805 		thc_state.thc_call_start = mach_absolute_time();
1806 
1807 		thread_call_invoke(func, param0, param1, call);
1808 
1809 		thc_state.thc_call = NULL;
1810 
1811 		if (get_preemption_level() != 0) {
1812 			int pl = get_preemption_level();
1813 			panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1814 			    pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1815 		}
1816 
1817 		s = disable_ints_and_lock(group);
1818 
1819 		if (needs_finish) {
1820 			/* Release refcount, may free, may temporarily drop lock */
1821 			thread_call_finish(call, group, &s);
1822 		}
1823 	}
1824 
1825 	thread_sched_call(self, NULL);
1826 	group->active_count--;
1827 
1828 	if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1829 		ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1830 		if (self->callout_woken_from_platform_idle) {
1831 			ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1832 		}
1833 	}
1834 
1835 	self->callout_woken_from_icontext = FALSE;
1836 	self->callout_woken_from_platform_idle = FALSE;
1837 	self->callout_woke_thread = FALSE;
1838 
1839 	self->thc_state = NULL;
1840 
1841 	if (group_isparallel(group)) {
1842 		/*
1843 		 * For new style of thread group, thread always blocks.
1844 		 * If we have more than the target number of threads,
1845 		 * and this is the first to block, and it isn't active
1846 		 * already, set a timer for deallocating a thread if we
1847 		 * continue to have a surplus.
1848 		 */
1849 		group->idle_count++;
1850 
1851 		if (group->idle_count == 1) {
1852 			group->idle_timestamp = mach_absolute_time();
1853 		}
1854 
1855 		if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
1856 		    ((group->active_count + group->idle_count) > group->target_thread_count)) {
1857 			thread_call_start_deallocate_timer(group);
1858 		}
1859 
1860 		/* Wait for more work (or termination) */
1861 		wres = waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, 0);
1862 		if (wres != THREAD_WAITING) {
1863 			panic("kcall worker unable to assert wait %d", wres);
1864 		}
1865 
1866 		enable_ints_and_unlock(group, s);
1867 
1868 		thread_block_parameter((thread_continue_t)thread_call_thread, group);
1869 	} else {
1870 		if (group->idle_count < group->target_thread_count) {
1871 			group->idle_count++;
1872 
1873 			waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, 0); /* Interrupted means to exit */
1874 
1875 			enable_ints_and_unlock(group, s);
1876 
1877 			thread_block_parameter((thread_continue_t)thread_call_thread, group);
1878 			/* NOTREACHED */
1879 		}
1880 	}
1881 
1882 	enable_ints_and_unlock(group, s);
1883 
1884 	thread_terminate(self);
1885 	/* NOTREACHED */
1886 }
1887 
1888 void
thread_call_start_iotes_invocation(__assert_only thread_call_t call)1889 thread_call_start_iotes_invocation(__assert_only thread_call_t call)
1890 {
1891 	thread_t self = current_thread();
1892 
1893 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1894 		/* not a thread call thread, might be a workloop IOTES */
1895 		return;
1896 	}
1897 
1898 	assert(self->thc_state);
1899 	assert(self->thc_state->thc_call == call);
1900 
1901 	self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
1902 }
1903 
1904 
1905 /*
1906  *	thread_call_daemon: walk list of groups, allocating
1907  *	threads if appropriate (as determined by
1908  *	thread_call_group_should_add_thread()).
1909  */
1910 static void
thread_call_daemon_continue(__unused void * arg,__unused wait_result_t w)1911 thread_call_daemon_continue(__unused void *arg,
1912     __unused wait_result_t w)
1913 {
1914 	do {
1915 		os_atomic_store(&thread_call_daemon_awake, false, relaxed);
1916 
1917 		for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
1918 			thread_call_group_t group = &thread_call_groups[i];
1919 
1920 			spl_t s = disable_ints_and_lock(group);
1921 
1922 			while (thread_call_group_should_add_thread(group)) {
1923 				group->active_count++;
1924 
1925 				enable_ints_and_unlock(group, s);
1926 
1927 				thread_call_thread_create(group);
1928 
1929 				s = disable_ints_and_lock(group);
1930 			}
1931 
1932 			enable_ints_and_unlock(group, s);
1933 		}
1934 	} while (os_atomic_load(&thread_call_daemon_awake, relaxed));
1935 
1936 	waitq_assert_wait64(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, 0);
1937 
1938 	if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
1939 		clear_wait(current_thread(), THREAD_AWAKENED);
1940 	}
1941 
1942 	thread_block_parameter(thread_call_daemon_continue, NULL);
1943 	/* NOTREACHED */
1944 }
1945 
1946 static void
thread_call_daemon(__unused void * arg,__unused wait_result_t w)1947 thread_call_daemon(
1948 	__unused void    *arg,
1949 	__unused wait_result_t w)
1950 {
1951 	thread_t        self = current_thread();
1952 
1953 	self->options |= TH_OPT_VMPRIV;
1954 	vm_page_free_reserve(2);        /* XXX */
1955 
1956 	thread_set_thread_name(self, "thread_call_daemon");
1957 
1958 	thread_call_daemon_continue(NULL, 0);
1959 	/* NOTREACHED */
1960 }
1961 
1962 /*
1963  * Schedule timer to deallocate a worker thread if we have a surplus
1964  * of threads (in excess of the group's target) and at least one thread
1965  * is idle the whole time.
1966  */
1967 static void
thread_call_start_deallocate_timer(thread_call_group_t group)1968 thread_call_start_deallocate_timer(thread_call_group_t group)
1969 {
1970 	__assert_only bool already_enqueued;
1971 
1972 	assert(group->idle_count > 0);
1973 	assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
1974 
1975 	group->tcg_flags |= TCG_DEALLOC_ACTIVE;
1976 
1977 	uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1978 
1979 	already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
1980 
1981 	assert(already_enqueued == false);
1982 }
1983 
1984 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1985 void
thread_call_delayed_timer(timer_call_param_t p0,timer_call_param_t p1)1986 thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1987 {
1988 	thread_call_group_t  group  = (thread_call_group_t)  p0;
1989 	thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1990 
1991 	assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1992 	assert((group - &thread_call_groups[0]) > THREAD_CALL_INDEX_INVALID);
1993 
1994 	thread_call_t   call;
1995 	uint64_t        now;
1996 
1997 	thread_call_lock_spin(group);
1998 
1999 	if (flavor == TCF_CONTINUOUS) {
2000 		now = mach_continuous_time();
2001 	} else if (flavor == TCF_ABSOLUTE) {
2002 		now = mach_absolute_time();
2003 	} else {
2004 		panic("invalid timer flavor: %d", flavor);
2005 	}
2006 
2007 	while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
2008 	    struct thread_call, tc_pqlink)) != NULL) {
2009 		assert(thread_call_get_group(call) == group);
2010 		assert(thread_call_get_flavor(call) == flavor);
2011 
2012 		/*
2013 		 * if we hit a call that isn't yet ready to expire,
2014 		 * then we're done for now
2015 		 * TODO: The next timer in the list could have a larger leeway
2016 		 *       and therefore be ready to expire.
2017 		 */
2018 		if (call->tc_soft_deadline > now) {
2019 			break;
2020 		}
2021 
2022 		/*
2023 		 * If we hit a rate-limited timer, don't eagerly wake it up.
2024 		 * Wait until it reaches the end of the leeway window.
2025 		 *
2026 		 * TODO: What if the next timer is not rate-limited?
2027 		 *       Have a separate rate-limited queue to avoid this
2028 		 */
2029 		if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
2030 		    (call->tc_pqlink.deadline > now) &&
2031 		    (ml_timer_forced_evaluation() == FALSE)) {
2032 			break;
2033 		}
2034 
2035 		if (THREAD_CALL_SIGNAL & call->tc_flags) {
2036 			__assert_only queue_head_t *old_queue;
2037 			old_queue = thread_call_dequeue(call);
2038 			assert(old_queue == &group->delayed_queues[flavor]);
2039 
2040 			do {
2041 				thread_call_func_t  func   = call->tc_func;
2042 				thread_call_param_t param0 = call->tc_param0;
2043 				thread_call_param_t param1 = call->tc_param1;
2044 
2045 				call->tc_flags |= THREAD_CALL_RUNNING;
2046 
2047 				thread_call_unlock(group);
2048 				thread_call_invoke(func, param0, param1, call);
2049 				thread_call_lock_spin(group);
2050 
2051 				/* finish may detect that the call has been re-pended */
2052 			} while (thread_call_finish(call, group, NULL));
2053 			/* call may have been freed by the finish */
2054 		} else {
2055 			_pending_call_enqueue(call, group, now);
2056 		}
2057 	}
2058 
2059 	_arm_delayed_call_timer(call, group, flavor);
2060 
2061 	thread_call_unlock(group);
2062 }
2063 
2064 static void
thread_call_delayed_timer_rescan(thread_call_group_t group,thread_call_flavor_t flavor)2065 thread_call_delayed_timer_rescan(thread_call_group_t group,
2066     thread_call_flavor_t flavor)
2067 {
2068 	thread_call_t call;
2069 	uint64_t now;
2070 
2071 	spl_t s = disable_ints_and_lock(group);
2072 
2073 	assert(ml_timer_forced_evaluation() == TRUE);
2074 
2075 	if (flavor == TCF_CONTINUOUS) {
2076 		now = mach_continuous_time();
2077 	} else {
2078 		now = mach_absolute_time();
2079 	}
2080 
2081 	qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
2082 		if (call->tc_soft_deadline <= now) {
2083 			_pending_call_enqueue(call, group, now);
2084 		} else {
2085 			uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
2086 			assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
2087 			/*
2088 			 * On a latency quality-of-service level change,
2089 			 * re-sort potentially rate-limited callout. The platform
2090 			 * layer determines which timers require this.
2091 			 *
2092 			 * This trick works by updating the deadline value to
2093 			 * equal soft-deadline, effectively crushing away
2094 			 * timer coalescing slop values for any armed
2095 			 * timer in the queue.
2096 			 *
2097 			 * TODO: keep a hint on the timer to tell whether its inputs changed, so we
2098 			 * only have to crush coalescing for timers that need it.
2099 			 *
2100 			 * TODO: Keep a separate queue of timers above the re-sort
2101 			 * threshold, so we only have to look at those.
2102 			 */
2103 			if (timer_resort_threshold(skew)) {
2104 				_call_dequeue(call, group);
2105 				_delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
2106 			}
2107 		}
2108 	}
2109 
2110 	_arm_delayed_call_timer(NULL, group, flavor);
2111 
2112 	enable_ints_and_unlock(group, s);
2113 }
2114 
2115 void
thread_call_delayed_timer_rescan_all(void)2116 thread_call_delayed_timer_rescan_all(void)
2117 {
2118 	for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
2119 		for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
2120 			thread_call_delayed_timer_rescan(&thread_call_groups[i], flavor);
2121 		}
2122 	}
2123 }
2124 
2125 /*
2126  * Timer callback to tell a thread to terminate if
2127  * we have an excess of threads and at least one has been
2128  * idle for a long time.
2129  */
2130 static void
thread_call_dealloc_timer(timer_call_param_t p0,__unused timer_call_param_t p1)2131 thread_call_dealloc_timer(
2132 	timer_call_param_t              p0,
2133 	__unused timer_call_param_t     p1)
2134 {
2135 	thread_call_group_t group = (thread_call_group_t)p0;
2136 	uint64_t now;
2137 	kern_return_t res;
2138 	bool terminated = false;
2139 
2140 	thread_call_lock_spin(group);
2141 
2142 	assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
2143 
2144 	now = mach_absolute_time();
2145 
2146 	if (group->idle_count > 0) {
2147 		if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
2148 			terminated = true;
2149 			group->idle_count--;
2150 			res = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
2151 			    THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT);
2152 			if (res != KERN_SUCCESS) {
2153 				panic("Unable to wake up idle thread for termination (%d)", res);
2154 			}
2155 		}
2156 	}
2157 
2158 	group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
2159 
2160 	/*
2161 	 * If we still have an excess of threads, schedule another
2162 	 * invocation of this function.
2163 	 */
2164 	if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
2165 		/*
2166 		 * If we killed someone just now, push out the
2167 		 * next deadline.
2168 		 */
2169 		if (terminated) {
2170 			group->idle_timestamp = now;
2171 		}
2172 
2173 		thread_call_start_deallocate_timer(group);
2174 	}
2175 
2176 	thread_call_unlock(group);
2177 }
2178 
2179 /*
2180  * Wait for the invocation of the thread call to complete
2181  * We know there's only one in flight because of the 'once' flag.
2182  *
2183  * If a subsequent invocation comes in before we wake up, that's OK
2184  *
2185  * TODO: Here is where we will add priority inheritance to the thread executing
2186  * the thread call in case it's lower priority than the current thread
2187  *      <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2188  *
2189  * Takes the thread call lock locked, returns unlocked
2190  *      This lets us avoid a spurious take/drop after waking up from thread_block
2191  *
2192  * This thread could be a thread call thread itself, blocking and therefore making a
2193  * sched_call upcall into the thread call subsystem, needing the group lock.
2194  * However, we're saved from deadlock because the 'block' upcall is made in
2195  * thread_block, not in assert_wait.
2196  */
2197 static bool
thread_call_wait_once_locked(thread_call_t call,spl_t s)2198 thread_call_wait_once_locked(thread_call_t call, spl_t s)
2199 {
2200 	assert(call->tc_flags & THREAD_CALL_ALLOC);
2201 	assert(call->tc_flags & THREAD_CALL_ONCE);
2202 
2203 	thread_call_group_t group = thread_call_get_group(call);
2204 
2205 	if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
2206 		enable_ints_and_unlock(group, s);
2207 		return false;
2208 	}
2209 
2210 	/* call is running, so we have to wait for it */
2211 	call->tc_flags |= THREAD_CALL_WAIT;
2212 
2213 	wait_result_t res = waitq_assert_wait64(&group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, 0);
2214 	if (res != THREAD_WAITING) {
2215 		panic("Unable to assert wait: %d", res);
2216 	}
2217 
2218 	enable_ints_and_unlock(group, s);
2219 
2220 	res = thread_block(THREAD_CONTINUE_NULL);
2221 	if (res != THREAD_AWAKENED) {
2222 		panic("Awoken with %d?", res);
2223 	}
2224 
2225 	/* returns unlocked */
2226 	return true;
2227 }
2228 
2229 /*
2230  * Wait for an in-flight invocation to complete
2231  * Does NOT try to cancel, so the client doesn't need to hold their
2232  * lock while calling this function.
2233  *
2234  * Returns whether or not it had to wait.
2235  *
2236  * Only works for THREAD_CALL_ONCE calls.
2237  */
2238 boolean_t
thread_call_wait_once(thread_call_t call)2239 thread_call_wait_once(thread_call_t call)
2240 {
2241 	if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
2242 		panic("(%p %p) thread_call_wait_once: can't wait on thread call whose storage I don't own",
2243 		    call, call->tc_func);
2244 	}
2245 
2246 	if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
2247 		panic("(%p %p) thread_call_wait_once: can't wait_once on a non-once call",
2248 		    call, call->tc_func);
2249 	}
2250 
2251 	if (!ml_get_interrupts_enabled()) {
2252 		panic("(%p %p) unsafe thread_call_wait_once",
2253 		    call, call->tc_func);
2254 	}
2255 
2256 	thread_t self = current_thread();
2257 
2258 	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
2259 	    self->thc_state && self->thc_state->thc_call == call) {
2260 		panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2261 		    call, call->tc_func);
2262 	}
2263 
2264 	thread_call_group_t group = thread_call_get_group(call);
2265 
2266 	spl_t s = disable_ints_and_lock(group);
2267 
2268 	bool waited = thread_call_wait_once_locked(call, s);
2269 	/* thread call lock unlocked */
2270 
2271 	return waited;
2272 }
2273 
2274 
2275 /*
2276  * Wait for all requested invocations of a thread call prior to now
2277  * to finish.  Can only be invoked on thread calls whose storage we manage.
2278  * Just waits for the finish count to catch up to the submit count we find
2279  * at the beginning of our wait.
2280  *
2281  * Called with thread_call_lock held.  Returns with lock released.
2282  */
2283 static void
thread_call_wait_locked(thread_call_t call,spl_t s)2284 thread_call_wait_locked(thread_call_t call, spl_t s)
2285 {
2286 	thread_call_group_t group = thread_call_get_group(call);
2287 
2288 	assert(call->tc_flags & THREAD_CALL_ALLOC);
2289 
2290 	uint64_t submit_count = call->tc_submit_count;
2291 
2292 	while (call->tc_finish_count < submit_count) {
2293 		call->tc_flags |= THREAD_CALL_WAIT;
2294 
2295 		wait_result_t res = waitq_assert_wait64(&group->waiters_waitq,
2296 		    CAST_EVENT64_T(call), THREAD_UNINT, 0);
2297 
2298 		if (res != THREAD_WAITING) {
2299 			panic("Unable to assert wait: %d", res);
2300 		}
2301 
2302 		enable_ints_and_unlock(group, s);
2303 
2304 		res = thread_block(THREAD_CONTINUE_NULL);
2305 		if (res != THREAD_AWAKENED) {
2306 			panic("Awoken with %d?", res);
2307 		}
2308 
2309 		s = disable_ints_and_lock(group);
2310 	}
2311 
2312 	enable_ints_and_unlock(group, s);
2313 }
2314 
2315 /*
2316  * Determine whether a thread call is either on a queue or
2317  * currently being executed.
2318  */
2319 boolean_t
thread_call_isactive(thread_call_t call)2320 thread_call_isactive(thread_call_t call)
2321 {
2322 	thread_call_group_t group = thread_call_get_group(call);
2323 
2324 	spl_t s = disable_ints_and_lock(group);
2325 	boolean_t active = (call->tc_submit_count > call->tc_finish_count);
2326 	enable_ints_and_unlock(group, s);
2327 
2328 	return active;
2329 }
2330 
2331 /*
2332  * adjust_cont_time_thread_calls
2333  * on wake, reenqueue delayed call timer for continuous time thread call groups
2334  */
2335 void
adjust_cont_time_thread_calls(void)2336 adjust_cont_time_thread_calls(void)
2337 {
2338 	for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
2339 		thread_call_group_t group = &thread_call_groups[i];
2340 		spl_t s = disable_ints_and_lock(group);
2341 
2342 		/* only the continuous timers need to be re-armed */
2343 
2344 		_arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
2345 		enable_ints_and_unlock(group, s);
2346 	}
2347 }
2348