1 /*
2 * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/zalloc.h>
34 #include <kern/sched_prim.h>
35 #include <kern/clock.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/waitq.h>
39 #include <kern/ledger.h>
40 #include <kern/policy_internal.h>
41
42 #include <vm/vm_pageout.h>
43
44 #include <kern/thread_call.h>
45 #include <kern/timer_call.h>
46
47 #include <libkern/OSAtomic.h>
48 #include <kern/timer_queue.h>
49
50 #include <sys/kdebug.h>
51 #if CONFIG_DTRACE
52 #include <mach/sdt.h>
53 #endif
54 #include <machine/machine_routines.h>
55
56 static KALLOC_TYPE_DEFINE(thread_call_zone, thread_call_data_t, KT_PRIV_ACCT);
57
58 typedef enum {
59 TCF_ABSOLUTE = 0,
60 TCF_CONTINUOUS = 1,
61 TCF_COUNT = 2,
62 } thread_call_flavor_t;
63
64 __options_decl(thread_call_group_flags_t, uint32_t, {
65 TCG_NONE = 0x0,
66 TCG_PARALLEL = 0x1,
67 TCG_DEALLOC_ACTIVE = 0x2,
68 });
69
70 static struct thread_call_group {
71 __attribute__((aligned(128))) lck_ticket_t tcg_lock;
72
73 const char * tcg_name;
74
75 queue_head_t pending_queue;
76 uint32_t pending_count;
77
78 queue_head_t delayed_queues[TCF_COUNT];
79 struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
80 timer_call_data_t delayed_timers[TCF_COUNT];
81
82 timer_call_data_t dealloc_timer;
83
84 struct waitq idle_waitq;
85 uint64_t idle_timestamp;
86 uint32_t idle_count, active_count, blocked_count;
87
88 uint32_t tcg_thread_pri;
89 uint32_t target_thread_count;
90
91 thread_call_group_flags_t tcg_flags;
92
93 struct waitq waiters_waitq;
94 } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
95 [THREAD_CALL_INDEX_INVALID] = {
96 .tcg_name = "invalid",
97 },
98 [THREAD_CALL_INDEX_HIGH] = {
99 .tcg_name = "high",
100 .tcg_thread_pri = BASEPRI_PREEMPT_HIGH,
101 .target_thread_count = 4,
102 .tcg_flags = TCG_NONE,
103 },
104 [THREAD_CALL_INDEX_KERNEL] = {
105 .tcg_name = "kernel",
106 .tcg_thread_pri = BASEPRI_KERNEL,
107 .target_thread_count = 1,
108 .tcg_flags = TCG_PARALLEL,
109 },
110 [THREAD_CALL_INDEX_USER] = {
111 .tcg_name = "user",
112 .tcg_thread_pri = BASEPRI_DEFAULT,
113 .target_thread_count = 1,
114 .tcg_flags = TCG_PARALLEL,
115 },
116 [THREAD_CALL_INDEX_LOW] = {
117 .tcg_name = "low",
118 .tcg_thread_pri = MAXPRI_THROTTLE,
119 .target_thread_count = 1,
120 .tcg_flags = TCG_PARALLEL,
121 },
122 [THREAD_CALL_INDEX_KERNEL_HIGH] = {
123 .tcg_name = "kernel-high",
124 .tcg_thread_pri = BASEPRI_PREEMPT,
125 .target_thread_count = 2,
126 .tcg_flags = TCG_NONE,
127 },
128 [THREAD_CALL_INDEX_QOS_UI] = {
129 .tcg_name = "qos-ui",
130 .tcg_thread_pri = BASEPRI_FOREGROUND,
131 .target_thread_count = 1,
132 .tcg_flags = TCG_NONE,
133 },
134 [THREAD_CALL_INDEX_QOS_IN] = {
135 .tcg_name = "qos-in",
136 .tcg_thread_pri = BASEPRI_USER_INITIATED,
137 .target_thread_count = 1,
138 .tcg_flags = TCG_NONE,
139 },
140 [THREAD_CALL_INDEX_QOS_UT] = {
141 .tcg_name = "qos-ut",
142 .tcg_thread_pri = BASEPRI_UTILITY,
143 .target_thread_count = 1,
144 .tcg_flags = TCG_NONE,
145 },
146 };
147
148 typedef struct thread_call_group *thread_call_group_t;
149
150 #define INTERNAL_CALL_COUNT 768
151 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
152 #define THREAD_CALL_ADD_RATIO 4
153 #define THREAD_CALL_MACH_FACTOR_CAP 3
154 #define THREAD_CALL_GROUP_MAX_THREADS 500
155
156 struct thread_call_thread_state {
157 struct thread_call_group * thc_group;
158 struct thread_call * thc_call; /* debug only, may be deallocated */
159 uint64_t thc_call_start;
160 uint64_t thc_call_soft_deadline;
161 uint64_t thc_call_hard_deadline;
162 uint64_t thc_call_pending_timestamp;
163 uint64_t thc_IOTES_invocation_timestamp;
164 thread_call_func_t thc_func;
165 thread_call_param_t thc_param0;
166 thread_call_param_t thc_param1;
167 };
168
169 static bool thread_call_daemon_awake = true;
170 /*
171 * This special waitq exists because the daemon thread
172 * might need to be woken while already holding a global waitq locked.
173 */
174 static struct waitq daemon_waitq;
175
176 static thread_call_data_t internal_call_storage[INTERNAL_CALL_COUNT];
177 static queue_head_t thread_call_internal_queue;
178 int thread_call_internal_queue_count = 0;
179 static uint64_t thread_call_dealloc_interval_abs;
180
181 static void _internal_call_init(void);
182
183 static thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
184 static bool _is_internal_call(thread_call_t call);
185 static void _internal_call_release(thread_call_t call);
186 static bool _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
187 static bool _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
188 uint64_t deadline, thread_call_flavor_t flavor);
189 static bool _call_dequeue(thread_call_t call, thread_call_group_t group);
190 static void thread_call_wake(thread_call_group_t group);
191 static void thread_call_daemon(void *arg, wait_result_t w);
192 static void thread_call_thread(thread_call_group_t group, wait_result_t wres);
193 static void thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
194 static void thread_call_group_setup(thread_call_group_t group);
195 static void sched_call_thread(int type, thread_t thread);
196 static void thread_call_start_deallocate_timer(thread_call_group_t group);
197 static void thread_call_wait_locked(thread_call_t call, spl_t s);
198 static bool thread_call_wait_once_locked(thread_call_t call, spl_t s);
199
200 static boolean_t thread_call_enter_delayed_internal(thread_call_t call,
201 thread_call_func_t alt_func, thread_call_param_t alt_param0,
202 thread_call_param_t param1, uint64_t deadline,
203 uint64_t leeway, unsigned int flags);
204
205 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
206 extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
207
208 LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
209
210
211 static void
thread_call_lock_spin(thread_call_group_t group)212 thread_call_lock_spin(thread_call_group_t group)
213 {
214 lck_ticket_lock(&group->tcg_lock, &thread_call_lck_grp);
215 }
216
217 static void
thread_call_unlock(thread_call_group_t group)218 thread_call_unlock(thread_call_group_t group)
219 {
220 lck_ticket_unlock(&group->tcg_lock);
221 }
222
223 static void __assert_only
thread_call_assert_locked(thread_call_group_t group)224 thread_call_assert_locked(thread_call_group_t group)
225 {
226 lck_ticket_assert_owned(&group->tcg_lock);
227 }
228
229
230 static spl_t
disable_ints_and_lock(thread_call_group_t group)231 disable_ints_and_lock(thread_call_group_t group)
232 {
233 spl_t s = splsched();
234 thread_call_lock_spin(group);
235
236 return s;
237 }
238
239 static void
enable_ints_and_unlock(thread_call_group_t group,spl_t s)240 enable_ints_and_unlock(thread_call_group_t group, spl_t s)
241 {
242 thread_call_unlock(group);
243 splx(s);
244 }
245
246 static thread_call_group_t
thread_call_get_group(thread_call_t call)247 thread_call_get_group(thread_call_t call)
248 {
249 thread_call_index_t index = call->tc_index;
250 thread_call_flags_t flags = call->tc_flags;
251 thread_call_func_t func = call->tc_func;
252
253 if (index == THREAD_CALL_INDEX_INVALID || index >= THREAD_CALL_INDEX_MAX) {
254 panic("(%p %p) invalid thread call index: %d", call, func, index);
255 }
256
257 if (func == NULL || !(flags & THREAD_CALL_INITIALIZED)) {
258 panic("(%p %p) uninitialized thread call", call, func);
259 }
260
261 if (flags & THREAD_CALL_ALLOC) {
262 kalloc_type_require(thread_call_data_t, call);
263 }
264
265 return &thread_call_groups[index];
266 }
267
268 /* Lock held */
269 static thread_call_flavor_t
thread_call_get_flavor(thread_call_t call)270 thread_call_get_flavor(thread_call_t call)
271 {
272 return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
273 }
274
275 /* Lock held */
276 static thread_call_flavor_t
thread_call_set_flavor(thread_call_t call,thread_call_flavor_t flavor)277 thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
278 {
279 assert(flavor == TCF_CONTINUOUS || flavor == TCF_ABSOLUTE);
280 thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
281
282 if (old_flavor != flavor) {
283 if (flavor == TCF_CONTINUOUS) {
284 call->tc_flags |= THREAD_CALL_FLAG_CONTINUOUS;
285 } else {
286 call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
287 }
288 }
289
290 return old_flavor;
291 }
292
293 /* returns true if it was on a queue */
294 static bool
thread_call_enqueue_tail(thread_call_t call,queue_t new_queue)295 thread_call_enqueue_tail(
296 thread_call_t call,
297 queue_t new_queue)
298 {
299 queue_t old_queue = call->tc_queue;
300
301 thread_call_group_t group = thread_call_get_group(call);
302 thread_call_flavor_t flavor = thread_call_get_flavor(call);
303
304 if (old_queue != NULL &&
305 old_queue != &group->delayed_queues[flavor]) {
306 panic("thread call (%p %p) on bad queue (old_queue: %p)",
307 call, call->tc_func, old_queue);
308 }
309
310 if (old_queue == &group->delayed_queues[flavor]) {
311 priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
312 }
313
314 if (old_queue == NULL) {
315 enqueue_tail(new_queue, &call->tc_qlink);
316 } else {
317 re_queue_tail(new_queue, &call->tc_qlink);
318 }
319
320 call->tc_queue = new_queue;
321
322 return old_queue != NULL;
323 }
324
325 static queue_head_t *
thread_call_dequeue(thread_call_t call)326 thread_call_dequeue(
327 thread_call_t call)
328 {
329 queue_t old_queue = call->tc_queue;
330
331 thread_call_group_t group = thread_call_get_group(call);
332 thread_call_flavor_t flavor = thread_call_get_flavor(call);
333
334 if (old_queue != NULL &&
335 old_queue != &group->pending_queue &&
336 old_queue != &group->delayed_queues[flavor]) {
337 panic("thread call (%p %p) on bad queue (old_queue: %p)",
338 call, call->tc_func, old_queue);
339 }
340
341 if (old_queue == &group->delayed_queues[flavor]) {
342 priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
343 }
344
345 if (old_queue != NULL) {
346 remqueue(&call->tc_qlink);
347
348 call->tc_queue = NULL;
349 }
350 return old_queue;
351 }
352
353 static queue_head_t *
thread_call_enqueue_deadline(thread_call_t call,thread_call_group_t group,thread_call_flavor_t flavor,uint64_t deadline)354 thread_call_enqueue_deadline(
355 thread_call_t call,
356 thread_call_group_t group,
357 thread_call_flavor_t flavor,
358 uint64_t deadline)
359 {
360 queue_t old_queue = call->tc_queue;
361 queue_t new_queue = &group->delayed_queues[flavor];
362
363 thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
364
365 if (old_queue != NULL &&
366 old_queue != &group->pending_queue &&
367 old_queue != &group->delayed_queues[old_flavor]) {
368 panic("thread call (%p %p) on bad queue (old_queue: %p)",
369 call, call->tc_func, old_queue);
370 }
371
372 if (old_queue == new_queue) {
373 /* optimize the same-queue case to avoid a full re-insert */
374 uint64_t old_deadline = call->tc_pqlink.deadline;
375 call->tc_pqlink.deadline = deadline;
376
377 if (old_deadline < deadline) {
378 priority_queue_entry_increased(&group->delayed_pqueues[flavor],
379 &call->tc_pqlink);
380 } else {
381 priority_queue_entry_decreased(&group->delayed_pqueues[flavor],
382 &call->tc_pqlink);
383 }
384 } else {
385 if (old_queue == &group->delayed_queues[old_flavor]) {
386 priority_queue_remove(&group->delayed_pqueues[old_flavor],
387 &call->tc_pqlink);
388 }
389
390 call->tc_pqlink.deadline = deadline;
391
392 priority_queue_insert(&group->delayed_pqueues[flavor], &call->tc_pqlink);
393 }
394
395 if (old_queue == NULL) {
396 enqueue_tail(new_queue, &call->tc_qlink);
397 } else if (old_queue != new_queue) {
398 re_queue_tail(new_queue, &call->tc_qlink);
399 }
400
401 call->tc_queue = new_queue;
402
403 return old_queue;
404 }
405
406 uint64_t
thread_call_get_armed_deadline(thread_call_t call)407 thread_call_get_armed_deadline(thread_call_t call)
408 {
409 return call->tc_pqlink.deadline;
410 }
411
412
413 static bool
group_isparallel(thread_call_group_t group)414 group_isparallel(thread_call_group_t group)
415 {
416 return (group->tcg_flags & TCG_PARALLEL) != 0;
417 }
418
419 static bool
thread_call_group_should_add_thread(thread_call_group_t group)420 thread_call_group_should_add_thread(thread_call_group_t group)
421 {
422 if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
423 panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
424 group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
425 group->active_count, group->blocked_count, group->idle_count);
426 }
427
428 if (group_isparallel(group) == false) {
429 if (group->pending_count > 0 && group->active_count == 0) {
430 return true;
431 }
432
433 return false;
434 }
435
436 if (group->pending_count > 0) {
437 if (group->idle_count > 0) {
438 return false;
439 }
440
441 uint32_t thread_count = group->active_count;
442
443 /*
444 * Add a thread if either there are no threads,
445 * the group has fewer than its target number of
446 * threads, or the amount of work is large relative
447 * to the number of threads. In the last case, pay attention
448 * to the total load on the system, and back off if
449 * it's high.
450 */
451 if ((thread_count == 0) ||
452 (thread_count < group->target_thread_count) ||
453 ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
454 (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
455 return true;
456 }
457 }
458
459 return false;
460 }
461
462 static void
thread_call_group_setup(thread_call_group_t group)463 thread_call_group_setup(thread_call_group_t group)
464 {
465 lck_ticket_init(&group->tcg_lock, &thread_call_lck_grp);
466
467 queue_init(&group->pending_queue);
468
469 for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
470 queue_init(&group->delayed_queues[flavor]);
471 priority_queue_init(&group->delayed_pqueues[flavor]);
472 timer_call_setup(&group->delayed_timers[flavor], thread_call_delayed_timer, group);
473 }
474
475 timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
476
477 waitq_init(&group->waiters_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
478
479 /* Reverse the wait order so we re-use the most recently parked thread from the pool */
480 waitq_init(&group->idle_waitq, WQT_QUEUE, SYNC_POLICY_REVERSED);
481 }
482
483 /*
484 * Simple wrapper for creating threads bound to
485 * thread call groups.
486 */
487 static void
thread_call_thread_create(thread_call_group_t group)488 thread_call_thread_create(
489 thread_call_group_t group)
490 {
491 thread_t thread;
492 kern_return_t result;
493
494 int thread_pri = group->tcg_thread_pri;
495
496 result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
497 group, thread_pri, &thread);
498 if (result != KERN_SUCCESS) {
499 panic("cannot create new thread call thread %d", result);
500 }
501
502 if (thread_pri <= BASEPRI_KERNEL) {
503 /*
504 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
505 * in kernel if there are higher priority threads available.
506 */
507 thread_set_eager_preempt(thread);
508 }
509
510 char name[MAXTHREADNAMESIZE] = "";
511
512 int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
513
514 snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
515 thread_set_thread_name(thread, name);
516
517 thread_deallocate(thread);
518 }
519
520 /*
521 * thread_call_initialize:
522 *
523 * Initialize this module, called
524 * early during system initialization.
525 */
526 __startup_func
527 static void
thread_call_initialize(void)528 thread_call_initialize(void)
529 {
530 nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
531 waitq_init(&daemon_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
532
533 for (uint32_t i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
534 thread_call_group_setup(&thread_call_groups[i]);
535 }
536
537 _internal_call_init();
538
539 thread_t thread;
540 kern_return_t result;
541
542 result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
543 NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
544 if (result != KERN_SUCCESS) {
545 panic("thread_call_initialize failed (%d)", result);
546 }
547
548 thread_deallocate(thread);
549 }
550 STARTUP(THREAD_CALL, STARTUP_RANK_FIRST, thread_call_initialize);
551
552 void
thread_call_setup_with_options(thread_call_t call,thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)553 thread_call_setup_with_options(
554 thread_call_t call,
555 thread_call_func_t func,
556 thread_call_param_t param0,
557 thread_call_priority_t pri,
558 thread_call_options_t options)
559 {
560 if (func == NULL) {
561 panic("initializing thread call with NULL func");
562 }
563
564 bzero(call, sizeof(*call));
565
566 *call = (struct thread_call) {
567 .tc_func = func,
568 .tc_param0 = param0,
569 .tc_flags = THREAD_CALL_INITIALIZED,
570 };
571
572 switch (pri) {
573 case THREAD_CALL_PRIORITY_HIGH:
574 call->tc_index = THREAD_CALL_INDEX_HIGH;
575 break;
576 case THREAD_CALL_PRIORITY_KERNEL:
577 call->tc_index = THREAD_CALL_INDEX_KERNEL;
578 break;
579 case THREAD_CALL_PRIORITY_USER:
580 call->tc_index = THREAD_CALL_INDEX_USER;
581 break;
582 case THREAD_CALL_PRIORITY_LOW:
583 call->tc_index = THREAD_CALL_INDEX_LOW;
584 break;
585 case THREAD_CALL_PRIORITY_KERNEL_HIGH:
586 call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
587 break;
588 default:
589 panic("Invalid thread call pri value: %d", pri);
590 break;
591 }
592
593 if (options & THREAD_CALL_OPTIONS_ONCE) {
594 call->tc_flags |= THREAD_CALL_ONCE;
595 }
596 if (options & THREAD_CALL_OPTIONS_SIGNAL) {
597 call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
598 }
599 }
600
601 void
thread_call_setup(thread_call_t call,thread_call_func_t func,thread_call_param_t param0)602 thread_call_setup(
603 thread_call_t call,
604 thread_call_func_t func,
605 thread_call_param_t param0)
606 {
607 thread_call_setup_with_options(call, func, param0,
608 THREAD_CALL_PRIORITY_HIGH, 0);
609 }
610
611 static void
_internal_call_init(void)612 _internal_call_init(void)
613 {
614 /* Function-only thread calls are only kept in the default HIGH group */
615 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
616
617 spl_t s = disable_ints_and_lock(group);
618
619 queue_init(&thread_call_internal_queue);
620
621 for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
622 enqueue_tail(&thread_call_internal_queue, &internal_call_storage[i].tc_qlink);
623 thread_call_internal_queue_count++;
624 }
625
626 enable_ints_and_unlock(group, s);
627 }
628
629 /*
630 * _internal_call_allocate:
631 *
632 * Allocate an internal callout entry.
633 *
634 * Called with thread_call_lock held.
635 */
636 static thread_call_t
_internal_call_allocate(thread_call_func_t func,thread_call_param_t param0)637 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
638 {
639 /* Function-only thread calls are only kept in the default HIGH group */
640 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
641
642 spl_t s = disable_ints_and_lock(group);
643
644 thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
645 struct thread_call, tc_qlink);
646
647 if (call == NULL) {
648 panic("_internal_call_allocate: thread_call_internal_queue empty");
649 }
650
651 thread_call_internal_queue_count--;
652
653 thread_call_setup(call, func, param0);
654 /* THREAD_CALL_ALLOC not set, do not free back to zone */
655 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
656 enable_ints_and_unlock(group, s);
657
658 return call;
659 }
660
661 /* Check if a call is internal and needs to be returned to the internal pool. */
662 static bool
_is_internal_call(thread_call_t call)663 _is_internal_call(thread_call_t call)
664 {
665 if (call >= internal_call_storage &&
666 call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
667 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
668 return true;
669 }
670 return false;
671 }
672
673 /*
674 * _internal_call_release:
675 *
676 * Release an internal callout entry which
677 * is no longer pending (or delayed).
678 *
679 * Called with thread_call_lock held.
680 */
681 static void
_internal_call_release(thread_call_t call)682 _internal_call_release(thread_call_t call)
683 {
684 assert(_is_internal_call(call));
685
686 thread_call_group_t group = thread_call_get_group(call);
687
688 assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
689 thread_call_assert_locked(group);
690
691 call->tc_flags &= ~THREAD_CALL_INITIALIZED;
692
693 enqueue_head(&thread_call_internal_queue, &call->tc_qlink);
694 thread_call_internal_queue_count++;
695 }
696
697 /*
698 * _pending_call_enqueue:
699 *
700 * Place an entry at the end of the
701 * pending queue, to be executed soon.
702 *
703 * Returns TRUE if the entry was already
704 * on a queue.
705 *
706 * Called with thread_call_lock held.
707 */
708 static bool
_pending_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t now)709 _pending_call_enqueue(thread_call_t call,
710 thread_call_group_t group,
711 uint64_t now)
712 {
713 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
714 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
715 call->tc_pqlink.deadline = 0;
716
717 thread_call_flags_t flags = call->tc_flags;
718 call->tc_flags |= THREAD_CALL_RESCHEDULE;
719
720 assert(call->tc_queue == NULL);
721
722 return flags & THREAD_CALL_RESCHEDULE;
723 }
724
725 call->tc_pending_timestamp = now;
726
727 bool was_on_queue = thread_call_enqueue_tail(call, &group->pending_queue);
728
729 if (!was_on_queue) {
730 call->tc_submit_count++;
731 }
732
733 group->pending_count++;
734
735 thread_call_wake(group);
736
737 return was_on_queue;
738 }
739
740 /*
741 * _delayed_call_enqueue:
742 *
743 * Place an entry on the delayed queue,
744 * after existing entries with an earlier
745 * (or identical) deadline.
746 *
747 * Returns TRUE if the entry was already
748 * on a queue.
749 *
750 * Called with thread_call_lock held.
751 */
752 static bool
_delayed_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t deadline,thread_call_flavor_t flavor)753 _delayed_call_enqueue(
754 thread_call_t call,
755 thread_call_group_t group,
756 uint64_t deadline,
757 thread_call_flavor_t flavor)
758 {
759 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
760 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
761 call->tc_pqlink.deadline = deadline;
762
763 thread_call_flags_t flags = call->tc_flags;
764 call->tc_flags |= THREAD_CALL_RESCHEDULE;
765
766 assert(call->tc_queue == NULL);
767 thread_call_set_flavor(call, flavor);
768
769 return flags & THREAD_CALL_RESCHEDULE;
770 }
771
772 queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
773
774 if (old_queue == &group->pending_queue) {
775 group->pending_count--;
776 } else if (old_queue == NULL) {
777 call->tc_submit_count++;
778 }
779
780 return old_queue != NULL;
781 }
782
783 /*
784 * _call_dequeue:
785 *
786 * Remove an entry from a queue.
787 *
788 * Returns TRUE if the entry was on a queue.
789 *
790 * Called with thread_call_lock held.
791 */
792 static bool
_call_dequeue(thread_call_t call,thread_call_group_t group)793 _call_dequeue(
794 thread_call_t call,
795 thread_call_group_t group)
796 {
797 queue_head_t *old_queue = thread_call_dequeue(call);
798
799 if (old_queue == NULL) {
800 return false;
801 }
802
803 call->tc_finish_count++;
804
805 if (old_queue == &group->pending_queue) {
806 group->pending_count--;
807 }
808
809 return true;
810 }
811
812 /*
813 * _arm_delayed_call_timer:
814 *
815 * Check if the timer needs to be armed for this flavor,
816 * and if so, arm it.
817 *
818 * If call is non-NULL, only re-arm the timer if the specified call
819 * is the first in the queue.
820 *
821 * Returns true if the timer was armed/re-armed, false if it was left unset
822 * Caller should cancel the timer if need be.
823 *
824 * Called with thread_call_lock held.
825 */
826 static bool
_arm_delayed_call_timer(thread_call_t new_call,thread_call_group_t group,thread_call_flavor_t flavor)827 _arm_delayed_call_timer(thread_call_t new_call,
828 thread_call_group_t group,
829 thread_call_flavor_t flavor)
830 {
831 /* No calls implies no timer needed */
832 if (queue_empty(&group->delayed_queues[flavor])) {
833 return false;
834 }
835
836 thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
837
838 /* We only need to change the hard timer if this new call is the first in the list */
839 if (new_call != NULL && new_call != call) {
840 return false;
841 }
842
843 assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
844
845 uint64_t fire_at = call->tc_soft_deadline;
846
847 if (flavor == TCF_CONTINUOUS) {
848 assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
849 fire_at = continuoustime_to_absolutetime(fire_at);
850 } else {
851 assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
852 }
853
854 /*
855 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
856 * which does not take into account later-deadline timers with a larger leeway.
857 * This is a valid coalescing behavior, but masks a possible window to
858 * fire a timer instead of going idle.
859 */
860 uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
861
862 timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
863 fire_at, leeway,
864 TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LEEWAY,
865 ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
866
867 return true;
868 }
869
870 /*
871 * _cancel_func_from_queue:
872 *
873 * Remove the first (or all) matching
874 * entries from the specified queue.
875 *
876 * Returns TRUE if any matching entries
877 * were found.
878 *
879 * Called with thread_call_lock held.
880 */
881 static boolean_t
_cancel_func_from_queue(thread_call_func_t func,thread_call_param_t param0,thread_call_group_t group,boolean_t remove_all,queue_head_t * queue)882 _cancel_func_from_queue(thread_call_func_t func,
883 thread_call_param_t param0,
884 thread_call_group_t group,
885 boolean_t remove_all,
886 queue_head_t *queue)
887 {
888 boolean_t call_removed = FALSE;
889 thread_call_t call;
890
891 qe_foreach_element_safe(call, queue, tc_qlink) {
892 if (call->tc_func != func ||
893 call->tc_param0 != param0) {
894 continue;
895 }
896
897 _call_dequeue(call, group);
898
899 if (_is_internal_call(call)) {
900 _internal_call_release(call);
901 }
902
903 call_removed = TRUE;
904 if (!remove_all) {
905 break;
906 }
907 }
908
909 return call_removed;
910 }
911
912 /*
913 * thread_call_func_delayed:
914 *
915 * Enqueue a function callout to
916 * occur at the stated time.
917 */
918 void
thread_call_func_delayed(thread_call_func_t func,thread_call_param_t param,uint64_t deadline)919 thread_call_func_delayed(
920 thread_call_func_t func,
921 thread_call_param_t param,
922 uint64_t deadline)
923 {
924 (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
925 }
926
927 /*
928 * thread_call_func_delayed_with_leeway:
929 *
930 * Same as thread_call_func_delayed(), but with
931 * leeway/flags threaded through.
932 */
933
934 void
thread_call_func_delayed_with_leeway(thread_call_func_t func,thread_call_param_t param,uint64_t deadline,uint64_t leeway,uint32_t flags)935 thread_call_func_delayed_with_leeway(
936 thread_call_func_t func,
937 thread_call_param_t param,
938 uint64_t deadline,
939 uint64_t leeway,
940 uint32_t flags)
941 {
942 (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
943 }
944
945 /*
946 * thread_call_func_cancel:
947 *
948 * Dequeue a function callout.
949 *
950 * Removes one (or all) { function, argument }
951 * instance(s) from either (or both)
952 * the pending and the delayed queue,
953 * in that order.
954 *
955 * Returns TRUE if any calls were cancelled.
956 *
957 * This iterates all of the pending or delayed thread calls in the group,
958 * which is really inefficient. Switch to an allocated thread call instead.
959 *
960 * TODO: Give 'func' thread calls their own group, so this silliness doesn't
961 * affect the main 'high' group.
962 */
963 boolean_t
thread_call_func_cancel(thread_call_func_t func,thread_call_param_t param,boolean_t cancel_all)964 thread_call_func_cancel(
965 thread_call_func_t func,
966 thread_call_param_t param,
967 boolean_t cancel_all)
968 {
969 boolean_t result;
970
971 if (func == NULL) {
972 panic("trying to cancel NULL func");
973 }
974
975 /* Function-only thread calls are only kept in the default HIGH group */
976 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
977
978 spl_t s = disable_ints_and_lock(group);
979
980 if (cancel_all) {
981 /* exhaustively search every queue, and return true if any search found something */
982 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
983 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) |
984 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
985 } else {
986 /* early-exit as soon as we find something, don't search other queues */
987 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
988 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
989 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
990 }
991
992 enable_ints_and_unlock(group, s);
993
994 return result;
995 }
996
997 /*
998 * Allocate a thread call with a given priority. Importances other than
999 * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
1000 * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
1001 * threads which are not in the normal "urgent" bands).
1002 */
1003 thread_call_t
thread_call_allocate_with_priority(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri)1004 thread_call_allocate_with_priority(
1005 thread_call_func_t func,
1006 thread_call_param_t param0,
1007 thread_call_priority_t pri)
1008 {
1009 return thread_call_allocate_with_options(func, param0, pri, 0);
1010 }
1011
1012 thread_call_t
thread_call_allocate_with_options(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)1013 thread_call_allocate_with_options(
1014 thread_call_func_t func,
1015 thread_call_param_t param0,
1016 thread_call_priority_t pri,
1017 thread_call_options_t options)
1018 {
1019 thread_call_t call = zalloc(thread_call_zone);
1020
1021 thread_call_setup_with_options(call, func, param0, pri, options);
1022 call->tc_refs = 1;
1023 call->tc_flags |= THREAD_CALL_ALLOC;
1024
1025 return call;
1026 }
1027
1028 thread_call_t
thread_call_allocate_with_qos(thread_call_func_t func,thread_call_param_t param0,int qos_tier,thread_call_options_t options)1029 thread_call_allocate_with_qos(thread_call_func_t func,
1030 thread_call_param_t param0,
1031 int qos_tier,
1032 thread_call_options_t options)
1033 {
1034 thread_call_t call = thread_call_allocate(func, param0);
1035
1036 switch (qos_tier) {
1037 case THREAD_QOS_UNSPECIFIED:
1038 call->tc_index = THREAD_CALL_INDEX_HIGH;
1039 break;
1040 case THREAD_QOS_LEGACY:
1041 call->tc_index = THREAD_CALL_INDEX_USER;
1042 break;
1043 case THREAD_QOS_MAINTENANCE:
1044 case THREAD_QOS_BACKGROUND:
1045 call->tc_index = THREAD_CALL_INDEX_LOW;
1046 break;
1047 case THREAD_QOS_UTILITY:
1048 call->tc_index = THREAD_CALL_INDEX_QOS_UT;
1049 break;
1050 case THREAD_QOS_USER_INITIATED:
1051 call->tc_index = THREAD_CALL_INDEX_QOS_IN;
1052 break;
1053 case THREAD_QOS_USER_INTERACTIVE:
1054 call->tc_index = THREAD_CALL_INDEX_QOS_UI;
1055 break;
1056 default:
1057 panic("Invalid thread call qos value: %d", qos_tier);
1058 break;
1059 }
1060
1061 if (options & THREAD_CALL_OPTIONS_ONCE) {
1062 call->tc_flags |= THREAD_CALL_ONCE;
1063 }
1064
1065 /* does not support THREAD_CALL_OPTIONS_SIGNAL */
1066
1067 return call;
1068 }
1069
1070
1071 /*
1072 * thread_call_allocate:
1073 *
1074 * Allocate a callout entry.
1075 */
1076 thread_call_t
thread_call_allocate(thread_call_func_t func,thread_call_param_t param0)1077 thread_call_allocate(
1078 thread_call_func_t func,
1079 thread_call_param_t param0)
1080 {
1081 return thread_call_allocate_with_options(func, param0,
1082 THREAD_CALL_PRIORITY_HIGH, 0);
1083 }
1084
1085 /*
1086 * thread_call_free:
1087 *
1088 * Release a callout. If the callout is currently
1089 * executing, it will be freed when all invocations
1090 * finish.
1091 *
1092 * If the callout is currently armed to fire again, then
1093 * freeing is not allowed and returns FALSE. The
1094 * client must have canceled the pending invocation before freeing.
1095 */
1096 boolean_t
thread_call_free(thread_call_t call)1097 thread_call_free(
1098 thread_call_t call)
1099 {
1100 thread_call_group_t group = thread_call_get_group(call);
1101
1102 spl_t s = disable_ints_and_lock(group);
1103
1104 if (call->tc_queue != NULL ||
1105 ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
1106 thread_call_unlock(group);
1107 splx(s);
1108
1109 return FALSE;
1110 }
1111
1112 int32_t refs = --call->tc_refs;
1113 if (refs < 0) {
1114 panic("(%p %p) Refcount negative: %d", call, call->tc_func, refs);
1115 }
1116
1117 if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
1118 == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
1119 thread_call_wait_once_locked(call, s);
1120 /* thread call lock has been unlocked */
1121 } else {
1122 enable_ints_and_unlock(group, s);
1123 }
1124
1125 if (refs == 0) {
1126 if (!(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1127 panic("(%p %p) freeing an uninitialized call", call, call->tc_func);
1128 }
1129
1130 if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
1131 panic("(%p %p) Someone waiting on a thread call that is scheduled for free",
1132 call, call->tc_func);
1133 }
1134
1135 if (call->tc_flags & THREAD_CALL_RUNNING) {
1136 panic("(%p %p) freeing a running once call", call, call->tc_func);
1137 }
1138
1139 if (call->tc_finish_count != call->tc_submit_count) {
1140 panic("(%p %p) thread call submit/finish imbalance: %lld %lld",
1141 call, call->tc_func,
1142 call->tc_submit_count, call->tc_finish_count);
1143 }
1144
1145 call->tc_flags &= ~THREAD_CALL_INITIALIZED;
1146
1147 zfree(thread_call_zone, call);
1148 }
1149
1150 return TRUE;
1151 }
1152
1153 /*
1154 * thread_call_enter:
1155 *
1156 * Enqueue a callout entry to occur "soon".
1157 *
1158 * Returns TRUE if the call was
1159 * already on a queue.
1160 */
1161 boolean_t
thread_call_enter(thread_call_t call)1162 thread_call_enter(
1163 thread_call_t call)
1164 {
1165 return thread_call_enter1(call, 0);
1166 }
1167
1168 boolean_t
thread_call_enter1(thread_call_t call,thread_call_param_t param1)1169 thread_call_enter1(
1170 thread_call_t call,
1171 thread_call_param_t param1)
1172 {
1173 if (call->tc_func == NULL || !(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1174 panic("(%p %p) uninitialized thread call", call, call->tc_func);
1175 }
1176
1177 assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
1178
1179 thread_call_group_t group = thread_call_get_group(call);
1180 bool result = true;
1181
1182 spl_t s = disable_ints_and_lock(group);
1183
1184 if (call->tc_queue != &group->pending_queue) {
1185 result = _pending_call_enqueue(call, group, mach_absolute_time());
1186 }
1187
1188 call->tc_param1 = param1;
1189
1190 enable_ints_and_unlock(group, s);
1191
1192 return result;
1193 }
1194
1195 /*
1196 * thread_call_enter_delayed:
1197 *
1198 * Enqueue a callout entry to occur
1199 * at the stated time.
1200 *
1201 * Returns TRUE if the call was
1202 * already on a queue.
1203 */
1204 boolean_t
thread_call_enter_delayed(thread_call_t call,uint64_t deadline)1205 thread_call_enter_delayed(
1206 thread_call_t call,
1207 uint64_t deadline)
1208 {
1209 if (call == NULL) {
1210 panic("NULL call in %s", __FUNCTION__);
1211 }
1212 return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
1213 }
1214
1215 boolean_t
thread_call_enter1_delayed(thread_call_t call,thread_call_param_t param1,uint64_t deadline)1216 thread_call_enter1_delayed(
1217 thread_call_t call,
1218 thread_call_param_t param1,
1219 uint64_t deadline)
1220 {
1221 if (call == NULL) {
1222 panic("NULL call in %s", __FUNCTION__);
1223 }
1224
1225 return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
1226 }
1227
1228 boolean_t
thread_call_enter_delayed_with_leeway(thread_call_t call,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1229 thread_call_enter_delayed_with_leeway(
1230 thread_call_t call,
1231 thread_call_param_t param1,
1232 uint64_t deadline,
1233 uint64_t leeway,
1234 unsigned int flags)
1235 {
1236 if (call == NULL) {
1237 panic("NULL call in %s", __FUNCTION__);
1238 }
1239
1240 return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
1241 }
1242
1243
1244 /*
1245 * thread_call_enter_delayed_internal:
1246 * enqueue a callout entry to occur at the stated time
1247 *
1248 * Returns True if the call was already on a queue
1249 * params:
1250 * call - structure encapsulating state of the callout
1251 * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1252 * deadline - time deadline in nanoseconds
1253 * leeway - timer slack represented as delta of deadline.
1254 * flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1255 * THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1256 * THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1257 * than mach_absolute_time
1258 */
1259 boolean_t
thread_call_enter_delayed_internal(thread_call_t call,thread_call_func_t alt_func,thread_call_param_t alt_param0,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1260 thread_call_enter_delayed_internal(
1261 thread_call_t call,
1262 thread_call_func_t alt_func,
1263 thread_call_param_t alt_param0,
1264 thread_call_param_t param1,
1265 uint64_t deadline,
1266 uint64_t leeway,
1267 unsigned int flags)
1268 {
1269 uint64_t now, sdeadline;
1270
1271 thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1272
1273 /* direct mapping between thread_call, timer_call, and timeout_urgency values */
1274 uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
1275
1276 if (call == NULL) {
1277 /* allocate a structure out of internal storage, as a convenience for BSD callers */
1278 call = _internal_call_allocate(alt_func, alt_param0);
1279 }
1280
1281 thread_call_group_t group = thread_call_get_group(call);
1282
1283 spl_t s = disable_ints_and_lock(group);
1284
1285 /*
1286 * kevent and IOTES let you change flavor for an existing timer, so we have to
1287 * support flipping flavors for enqueued thread calls.
1288 */
1289 if (flavor == TCF_CONTINUOUS) {
1290 now = mach_continuous_time();
1291 } else {
1292 now = mach_absolute_time();
1293 }
1294
1295 call->tc_flags |= THREAD_CALL_DELAYED;
1296
1297 call->tc_soft_deadline = sdeadline = deadline;
1298
1299 boolean_t ratelimited = FALSE;
1300 uint64_t slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1301
1302 if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
1303 slop = leeway;
1304 }
1305
1306 if (UINT64_MAX - deadline <= slop) {
1307 deadline = UINT64_MAX;
1308 } else {
1309 deadline += slop;
1310 }
1311
1312 if (ratelimited) {
1313 call->tc_flags |= THREAD_CALL_RATELIMITED;
1314 } else {
1315 call->tc_flags &= ~THREAD_CALL_RATELIMITED;
1316 }
1317
1318 call->tc_param1 = param1;
1319
1320 call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1321
1322 bool result = _delayed_call_enqueue(call, group, deadline, flavor);
1323
1324 _arm_delayed_call_timer(call, group, flavor);
1325
1326 #if CONFIG_DTRACE
1327 DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
1328 uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1329 (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1330 #endif
1331
1332 enable_ints_and_unlock(group, s);
1333
1334 return result;
1335 }
1336
1337 /*
1338 * Remove a callout entry from the queue
1339 * Called with thread_call_lock held
1340 */
1341 static bool
thread_call_cancel_locked(thread_call_t call)1342 thread_call_cancel_locked(thread_call_t call)
1343 {
1344 bool canceled;
1345
1346 if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
1347 call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1348 canceled = true;
1349
1350 /* if reschedule was set, it must not have been queued */
1351 assert(call->tc_queue == NULL);
1352 } else {
1353 bool queue_head_changed = false;
1354
1355 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1356 thread_call_group_t group = thread_call_get_group(call);
1357
1358 if (call->tc_pqlink.deadline != 0 &&
1359 call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
1360 assert(call->tc_queue == &group->delayed_queues[flavor]);
1361 queue_head_changed = true;
1362 }
1363
1364 canceled = _call_dequeue(call, group);
1365
1366 if (queue_head_changed) {
1367 if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
1368 timer_call_cancel(&group->delayed_timers[flavor]);
1369 }
1370 }
1371 }
1372
1373 #if CONFIG_DTRACE
1374 DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
1375 0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1376 #endif
1377
1378 return canceled;
1379 }
1380
1381 /*
1382 * thread_call_cancel:
1383 *
1384 * Dequeue a callout entry.
1385 *
1386 * Returns TRUE if the call was
1387 * on a queue.
1388 */
1389 boolean_t
thread_call_cancel(thread_call_t call)1390 thread_call_cancel(thread_call_t call)
1391 {
1392 thread_call_group_t group = thread_call_get_group(call);
1393
1394 spl_t s = disable_ints_and_lock(group);
1395
1396 boolean_t result = thread_call_cancel_locked(call);
1397
1398 enable_ints_and_unlock(group, s);
1399
1400 return result;
1401 }
1402
1403 /*
1404 * Cancel a thread call. If it cannot be cancelled (i.e.
1405 * is already in flight), waits for the most recent invocation
1406 * to finish. Note that if clients re-submit this thread call,
1407 * it may still be pending or in flight when thread_call_cancel_wait
1408 * returns, but all requests to execute this work item prior
1409 * to the call to thread_call_cancel_wait will have finished.
1410 */
1411 boolean_t
thread_call_cancel_wait(thread_call_t call)1412 thread_call_cancel_wait(thread_call_t call)
1413 {
1414 thread_call_group_t group = thread_call_get_group(call);
1415
1416 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1417 panic("(%p %p) thread_call_cancel_wait: can't wait on thread call whose storage I don't own",
1418 call, call->tc_func);
1419 }
1420
1421 if (!ml_get_interrupts_enabled()) {
1422 panic("(%p %p) unsafe thread_call_cancel_wait",
1423 call, call->tc_func);
1424 }
1425
1426 thread_t self = current_thread();
1427
1428 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
1429 self->thc_state && self->thc_state->thc_call == call) {
1430 panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1431 call, call->tc_func);
1432 }
1433
1434 spl_t s = disable_ints_and_lock(group);
1435
1436 boolean_t canceled = thread_call_cancel_locked(call);
1437
1438 if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1439 /*
1440 * A cancel-wait on a 'once' call will both cancel
1441 * the pending call and wait for the in-flight call
1442 */
1443
1444 thread_call_wait_once_locked(call, s);
1445 /* thread call lock unlocked */
1446 } else {
1447 /*
1448 * A cancel-wait on a normal call will only wait for the in-flight calls
1449 * if it did not cancel the pending call.
1450 *
1451 * TODO: This seems less than useful - shouldn't it do the wait as well?
1452 */
1453
1454 if (canceled == FALSE) {
1455 thread_call_wait_locked(call, s);
1456 /* thread call lock unlocked */
1457 } else {
1458 enable_ints_and_unlock(group, s);
1459 }
1460 }
1461
1462 return canceled;
1463 }
1464
1465
1466 /*
1467 * thread_call_wake:
1468 *
1469 * Wake a call thread to service
1470 * pending call entries. May wake
1471 * the daemon thread in order to
1472 * create additional call threads.
1473 *
1474 * Called with thread_call_lock held.
1475 *
1476 * For high-priority group, only does wakeup/creation if there are no threads
1477 * running.
1478 */
1479 static void
thread_call_wake(thread_call_group_t group)1480 thread_call_wake(
1481 thread_call_group_t group)
1482 {
1483 /*
1484 * New behavior: use threads if you've got 'em.
1485 * Traditional behavior: wake only if no threads running.
1486 */
1487 if (group_isparallel(group) || group->active_count == 0) {
1488 if (group->idle_count) {
1489 __assert_only kern_return_t kr;
1490
1491 kr = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
1492 THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1493 assert(kr == KERN_SUCCESS);
1494
1495 group->idle_count--;
1496 group->active_count++;
1497
1498 if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1499 if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1500 group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
1501 }
1502 }
1503 } else {
1504 if (thread_call_group_should_add_thread(group) &&
1505 os_atomic_cmpxchg(&thread_call_daemon_awake,
1506 false, true, relaxed)) {
1507 waitq_wakeup64_all(&daemon_waitq,
1508 CAST_EVENT64_T(&thread_call_daemon_awake),
1509 THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1510 }
1511 }
1512 }
1513 }
1514
1515 /*
1516 * sched_call_thread:
1517 *
1518 * Call out invoked by the scheduler.
1519 */
1520 static void
sched_call_thread(int type,thread_t thread)1521 sched_call_thread(
1522 int type,
1523 thread_t thread)
1524 {
1525 thread_call_group_t group;
1526
1527 assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
1528 assert(thread->thc_state != NULL);
1529
1530 group = thread->thc_state->thc_group;
1531 assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1532 assert((group - &thread_call_groups[0]) > THREAD_CALL_INDEX_INVALID);
1533
1534 thread_call_lock_spin(group);
1535
1536 switch (type) {
1537 case SCHED_CALL_BLOCK:
1538 assert(group->active_count);
1539 --group->active_count;
1540 group->blocked_count++;
1541 if (group->pending_count > 0) {
1542 thread_call_wake(group);
1543 }
1544 break;
1545
1546 case SCHED_CALL_UNBLOCK:
1547 assert(group->blocked_count);
1548 --group->blocked_count;
1549 group->active_count++;
1550 break;
1551 }
1552
1553 thread_call_unlock(group);
1554 }
1555
1556 /*
1557 * Interrupts disabled, lock held; returns the same way.
1558 * Only called on thread calls whose storage we own. Wakes up
1559 * anyone who might be waiting on this work item and frees it
1560 * if the client has so requested.
1561 */
1562 static bool
thread_call_finish(thread_call_t call,thread_call_group_t group,spl_t * s)1563 thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1564 {
1565 thread_call_group_t call_group = thread_call_get_group(call);
1566 if (group != call_group) {
1567 panic("(%p %p) call finishing from wrong group: %p",
1568 call, call->tc_func, call_group);
1569 }
1570
1571 bool repend = false;
1572 bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
1573 bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
1574
1575 call->tc_finish_count++;
1576
1577 if (!signal && alloc) {
1578 /* The thread call thread owns a ref until the call is finished */
1579 if (call->tc_refs <= 0) {
1580 panic("(%p %p) thread_call_finish: detected over-released thread call",
1581 call, call->tc_func);
1582 }
1583 call->tc_refs--;
1584 }
1585
1586 thread_call_flags_t old_flags = call->tc_flags;
1587 call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1588
1589 if ((!alloc || call->tc_refs != 0) &&
1590 (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
1591 assert(old_flags & THREAD_CALL_ONCE);
1592 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1593
1594 if (old_flags & THREAD_CALL_DELAYED) {
1595 uint64_t now = mach_absolute_time();
1596 if (flavor == TCF_CONTINUOUS) {
1597 now = absolutetime_to_continuoustime(now);
1598 }
1599 if (call->tc_soft_deadline <= now) {
1600 /* The deadline has already expired, go straight to pending */
1601 call->tc_flags &= ~(THREAD_CALL_DELAYED | THREAD_CALL_RATELIMITED);
1602 call->tc_pqlink.deadline = 0;
1603 }
1604 }
1605
1606 if (call->tc_pqlink.deadline) {
1607 _delayed_call_enqueue(call, group, call->tc_pqlink.deadline, flavor);
1608 if (!signal) {
1609 _arm_delayed_call_timer(call, group, flavor);
1610 }
1611 } else if (signal) {
1612 call->tc_submit_count++;
1613 repend = true;
1614 } else {
1615 _pending_call_enqueue(call, group, mach_absolute_time());
1616 }
1617 }
1618
1619 if (!signal && alloc && call->tc_refs == 0) {
1620 if ((old_flags & THREAD_CALL_WAIT) != 0) {
1621 panic("(%p %p) Someone waiting on a thread call that is scheduled for free",
1622 call, call->tc_func);
1623 }
1624
1625 if (call->tc_finish_count != call->tc_submit_count) {
1626 panic("(%p %p) thread call submit/finish imbalance: %lld %lld",
1627 call, call->tc_func,
1628 call->tc_submit_count, call->tc_finish_count);
1629 }
1630
1631 if (call->tc_func == NULL || !(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1632 panic("(%p %p) uninitialized thread call", call, call->tc_func);
1633 }
1634
1635 call->tc_flags &= ~THREAD_CALL_INITIALIZED;
1636
1637 enable_ints_and_unlock(group, *s);
1638
1639 zfree(thread_call_zone, call);
1640
1641 *s = disable_ints_and_lock(group);
1642 }
1643
1644 if ((old_flags & THREAD_CALL_WAIT) != 0) {
1645 /*
1646 * This may wake up a thread with a registered sched_call.
1647 * That call might need the group lock, so we drop the lock
1648 * to avoid deadlocking.
1649 *
1650 * We also must use a separate waitq from the idle waitq, as
1651 * this path goes waitq lock->thread lock->group lock, but
1652 * the idle wait goes group lock->waitq_lock->thread_lock.
1653 */
1654 thread_call_unlock(group);
1655
1656 waitq_wakeup64_all(&group->waiters_waitq, CAST_EVENT64_T(call),
1657 THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
1658
1659 thread_call_lock_spin(group);
1660 /* THREAD_CALL_SIGNAL call may have been freed */
1661 }
1662
1663 return repend;
1664 }
1665
1666 /*
1667 * thread_call_invoke
1668 *
1669 * Invoke the function provided for this thread call
1670 *
1671 * Note that the thread call object can be deallocated by the function if we do not control its storage.
1672 */
1673 static void __attribute__((noinline))
thread_call_invoke(thread_call_func_t func,thread_call_param_t param0,thread_call_param_t param1,__unused thread_call_t call)1674 thread_call_invoke(thread_call_func_t func,
1675 thread_call_param_t param0,
1676 thread_call_param_t param1,
1677 __unused thread_call_t call)
1678 {
1679 #if DEVELOPMENT || DEBUG
1680 KERNEL_DEBUG_CONSTANT(
1681 MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_START,
1682 VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1683 #endif /* DEVELOPMENT || DEBUG */
1684
1685 #if CONFIG_DTRACE
1686 uint64_t tc_ttd = call->tc_ttd;
1687 boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1688 DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1689 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1690 #endif
1691
1692 (*func)(param0, param1);
1693
1694 #if CONFIG_DTRACE
1695 DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1696 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1697 #endif
1698
1699 #if DEVELOPMENT || DEBUG
1700 KERNEL_DEBUG_CONSTANT(
1701 MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_END,
1702 VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1703 #endif /* DEVELOPMENT || DEBUG */
1704 }
1705
1706 /*
1707 * thread_call_thread:
1708 */
1709 static void
thread_call_thread(thread_call_group_t group,wait_result_t wres)1710 thread_call_thread(
1711 thread_call_group_t group,
1712 wait_result_t wres)
1713 {
1714 thread_t self = current_thread();
1715
1716 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1717 (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1718 }
1719
1720 /*
1721 * A wakeup with THREAD_INTERRUPTED indicates that
1722 * we should terminate.
1723 */
1724 if (wres == THREAD_INTERRUPTED) {
1725 thread_terminate(self);
1726
1727 /* NOTREACHED */
1728 panic("thread_terminate() returned?");
1729 }
1730
1731 spl_t s = disable_ints_and_lock(group);
1732
1733 struct thread_call_thread_state thc_state = { .thc_group = group };
1734 self->thc_state = &thc_state;
1735
1736 thread_sched_call(self, sched_call_thread);
1737
1738 while (group->pending_count > 0) {
1739 thread_call_t call = qe_dequeue_head(&group->pending_queue,
1740 struct thread_call, tc_qlink);
1741 assert(call != NULL);
1742
1743 /*
1744 * This thread_call_get_group is also here to validate
1745 * sanity of the thing popped off the queue
1746 */
1747 thread_call_group_t call_group = thread_call_get_group(call);
1748 if (group != call_group) {
1749 panic("(%p %p) call on pending_queue from wrong group %p",
1750 call, call->tc_func, call_group);
1751 }
1752
1753 group->pending_count--;
1754 if (group->pending_count == 0) {
1755 assert(queue_empty(&group->pending_queue));
1756 }
1757
1758 thread_call_func_t func = call->tc_func;
1759 thread_call_param_t param0 = call->tc_param0;
1760 thread_call_param_t param1 = call->tc_param1;
1761
1762 if (func == NULL) {
1763 panic("pending call with NULL func: %p", call);
1764 }
1765
1766 call->tc_queue = NULL;
1767
1768 if (_is_internal_call(call)) {
1769 _internal_call_release(call);
1770 }
1771
1772 /*
1773 * Can only do wakeups for thread calls whose storage
1774 * we control.
1775 */
1776 bool needs_finish = false;
1777 if (call->tc_flags & THREAD_CALL_ALLOC) {
1778 call->tc_refs++; /* Delay free until we're done */
1779 }
1780 if (call->tc_flags & (THREAD_CALL_ALLOC | THREAD_CALL_ONCE)) {
1781 /*
1782 * If THREAD_CALL_ONCE is used, and the timer wasn't
1783 * THREAD_CALL_ALLOC, then clients swear they will use
1784 * thread_call_cancel_wait() before destroying
1785 * the thread call.
1786 *
1787 * Else, the storage for the thread call might have
1788 * disappeared when thread_call_invoke() ran.
1789 */
1790 needs_finish = true;
1791 call->tc_flags |= THREAD_CALL_RUNNING;
1792 }
1793
1794 thc_state.thc_call = call;
1795 thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
1796 thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
1797 thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
1798 thc_state.thc_func = func;
1799 thc_state.thc_param0 = param0;
1800 thc_state.thc_param1 = param1;
1801 thc_state.thc_IOTES_invocation_timestamp = 0;
1802
1803 enable_ints_and_unlock(group, s);
1804
1805 thc_state.thc_call_start = mach_absolute_time();
1806
1807 thread_call_invoke(func, param0, param1, call);
1808
1809 thc_state.thc_call = NULL;
1810
1811 if (get_preemption_level() != 0) {
1812 int pl = get_preemption_level();
1813 panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1814 pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1815 }
1816
1817 s = disable_ints_and_lock(group);
1818
1819 if (needs_finish) {
1820 /* Release refcount, may free, may temporarily drop lock */
1821 thread_call_finish(call, group, &s);
1822 }
1823 }
1824
1825 thread_sched_call(self, NULL);
1826 group->active_count--;
1827
1828 if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1829 ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1830 if (self->callout_woken_from_platform_idle) {
1831 ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1832 }
1833 }
1834
1835 self->callout_woken_from_icontext = FALSE;
1836 self->callout_woken_from_platform_idle = FALSE;
1837 self->callout_woke_thread = FALSE;
1838
1839 self->thc_state = NULL;
1840
1841 if (group_isparallel(group)) {
1842 /*
1843 * For new style of thread group, thread always blocks.
1844 * If we have more than the target number of threads,
1845 * and this is the first to block, and it isn't active
1846 * already, set a timer for deallocating a thread if we
1847 * continue to have a surplus.
1848 */
1849 group->idle_count++;
1850
1851 if (group->idle_count == 1) {
1852 group->idle_timestamp = mach_absolute_time();
1853 }
1854
1855 if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
1856 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1857 thread_call_start_deallocate_timer(group);
1858 }
1859
1860 /* Wait for more work (or termination) */
1861 wres = waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, 0);
1862 if (wres != THREAD_WAITING) {
1863 panic("kcall worker unable to assert wait %d", wres);
1864 }
1865
1866 enable_ints_and_unlock(group, s);
1867
1868 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1869 } else {
1870 if (group->idle_count < group->target_thread_count) {
1871 group->idle_count++;
1872
1873 waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, 0); /* Interrupted means to exit */
1874
1875 enable_ints_and_unlock(group, s);
1876
1877 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1878 /* NOTREACHED */
1879 }
1880 }
1881
1882 enable_ints_and_unlock(group, s);
1883
1884 thread_terminate(self);
1885 /* NOTREACHED */
1886 }
1887
1888 void
thread_call_start_iotes_invocation(__assert_only thread_call_t call)1889 thread_call_start_iotes_invocation(__assert_only thread_call_t call)
1890 {
1891 thread_t self = current_thread();
1892
1893 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1894 /* not a thread call thread, might be a workloop IOTES */
1895 return;
1896 }
1897
1898 assert(self->thc_state);
1899 assert(self->thc_state->thc_call == call);
1900
1901 self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
1902 }
1903
1904
1905 /*
1906 * thread_call_daemon: walk list of groups, allocating
1907 * threads if appropriate (as determined by
1908 * thread_call_group_should_add_thread()).
1909 */
1910 static void
thread_call_daemon_continue(__unused void * arg,__unused wait_result_t w)1911 thread_call_daemon_continue(__unused void *arg,
1912 __unused wait_result_t w)
1913 {
1914 do {
1915 os_atomic_store(&thread_call_daemon_awake, false, relaxed);
1916
1917 for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
1918 thread_call_group_t group = &thread_call_groups[i];
1919
1920 spl_t s = disable_ints_and_lock(group);
1921
1922 while (thread_call_group_should_add_thread(group)) {
1923 group->active_count++;
1924
1925 enable_ints_and_unlock(group, s);
1926
1927 thread_call_thread_create(group);
1928
1929 s = disable_ints_and_lock(group);
1930 }
1931
1932 enable_ints_and_unlock(group, s);
1933 }
1934 } while (os_atomic_load(&thread_call_daemon_awake, relaxed));
1935
1936 waitq_assert_wait64(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, 0);
1937
1938 if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
1939 clear_wait(current_thread(), THREAD_AWAKENED);
1940 }
1941
1942 thread_block_parameter(thread_call_daemon_continue, NULL);
1943 /* NOTREACHED */
1944 }
1945
1946 static void
thread_call_daemon(__unused void * arg,__unused wait_result_t w)1947 thread_call_daemon(
1948 __unused void *arg,
1949 __unused wait_result_t w)
1950 {
1951 thread_t self = current_thread();
1952
1953 self->options |= TH_OPT_VMPRIV;
1954 vm_page_free_reserve(2); /* XXX */
1955
1956 thread_set_thread_name(self, "thread_call_daemon");
1957
1958 thread_call_daemon_continue(NULL, 0);
1959 /* NOTREACHED */
1960 }
1961
1962 /*
1963 * Schedule timer to deallocate a worker thread if we have a surplus
1964 * of threads (in excess of the group's target) and at least one thread
1965 * is idle the whole time.
1966 */
1967 static void
thread_call_start_deallocate_timer(thread_call_group_t group)1968 thread_call_start_deallocate_timer(thread_call_group_t group)
1969 {
1970 __assert_only bool already_enqueued;
1971
1972 assert(group->idle_count > 0);
1973 assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
1974
1975 group->tcg_flags |= TCG_DEALLOC_ACTIVE;
1976
1977 uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1978
1979 already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
1980
1981 assert(already_enqueued == false);
1982 }
1983
1984 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1985 void
thread_call_delayed_timer(timer_call_param_t p0,timer_call_param_t p1)1986 thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1987 {
1988 thread_call_group_t group = (thread_call_group_t) p0;
1989 thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1990
1991 assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1992 assert((group - &thread_call_groups[0]) > THREAD_CALL_INDEX_INVALID);
1993
1994 thread_call_t call;
1995 uint64_t now;
1996
1997 thread_call_lock_spin(group);
1998
1999 if (flavor == TCF_CONTINUOUS) {
2000 now = mach_continuous_time();
2001 } else if (flavor == TCF_ABSOLUTE) {
2002 now = mach_absolute_time();
2003 } else {
2004 panic("invalid timer flavor: %d", flavor);
2005 }
2006
2007 while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
2008 struct thread_call, tc_pqlink)) != NULL) {
2009 assert(thread_call_get_group(call) == group);
2010 assert(thread_call_get_flavor(call) == flavor);
2011
2012 /*
2013 * if we hit a call that isn't yet ready to expire,
2014 * then we're done for now
2015 * TODO: The next timer in the list could have a larger leeway
2016 * and therefore be ready to expire.
2017 */
2018 if (call->tc_soft_deadline > now) {
2019 break;
2020 }
2021
2022 /*
2023 * If we hit a rate-limited timer, don't eagerly wake it up.
2024 * Wait until it reaches the end of the leeway window.
2025 *
2026 * TODO: What if the next timer is not rate-limited?
2027 * Have a separate rate-limited queue to avoid this
2028 */
2029 if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
2030 (call->tc_pqlink.deadline > now) &&
2031 (ml_timer_forced_evaluation() == FALSE)) {
2032 break;
2033 }
2034
2035 if (THREAD_CALL_SIGNAL & call->tc_flags) {
2036 __assert_only queue_head_t *old_queue;
2037 old_queue = thread_call_dequeue(call);
2038 assert(old_queue == &group->delayed_queues[flavor]);
2039
2040 do {
2041 thread_call_func_t func = call->tc_func;
2042 thread_call_param_t param0 = call->tc_param0;
2043 thread_call_param_t param1 = call->tc_param1;
2044
2045 call->tc_flags |= THREAD_CALL_RUNNING;
2046
2047 thread_call_unlock(group);
2048 thread_call_invoke(func, param0, param1, call);
2049 thread_call_lock_spin(group);
2050
2051 /* finish may detect that the call has been re-pended */
2052 } while (thread_call_finish(call, group, NULL));
2053 /* call may have been freed by the finish */
2054 } else {
2055 _pending_call_enqueue(call, group, now);
2056 }
2057 }
2058
2059 _arm_delayed_call_timer(call, group, flavor);
2060
2061 thread_call_unlock(group);
2062 }
2063
2064 static void
thread_call_delayed_timer_rescan(thread_call_group_t group,thread_call_flavor_t flavor)2065 thread_call_delayed_timer_rescan(thread_call_group_t group,
2066 thread_call_flavor_t flavor)
2067 {
2068 thread_call_t call;
2069 uint64_t now;
2070
2071 spl_t s = disable_ints_and_lock(group);
2072
2073 assert(ml_timer_forced_evaluation() == TRUE);
2074
2075 if (flavor == TCF_CONTINUOUS) {
2076 now = mach_continuous_time();
2077 } else {
2078 now = mach_absolute_time();
2079 }
2080
2081 qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
2082 if (call->tc_soft_deadline <= now) {
2083 _pending_call_enqueue(call, group, now);
2084 } else {
2085 uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
2086 assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
2087 /*
2088 * On a latency quality-of-service level change,
2089 * re-sort potentially rate-limited callout. The platform
2090 * layer determines which timers require this.
2091 *
2092 * This trick works by updating the deadline value to
2093 * equal soft-deadline, effectively crushing away
2094 * timer coalescing slop values for any armed
2095 * timer in the queue.
2096 *
2097 * TODO: keep a hint on the timer to tell whether its inputs changed, so we
2098 * only have to crush coalescing for timers that need it.
2099 *
2100 * TODO: Keep a separate queue of timers above the re-sort
2101 * threshold, so we only have to look at those.
2102 */
2103 if (timer_resort_threshold(skew)) {
2104 _call_dequeue(call, group);
2105 _delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
2106 }
2107 }
2108 }
2109
2110 _arm_delayed_call_timer(NULL, group, flavor);
2111
2112 enable_ints_and_unlock(group, s);
2113 }
2114
2115 void
thread_call_delayed_timer_rescan_all(void)2116 thread_call_delayed_timer_rescan_all(void)
2117 {
2118 for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
2119 for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
2120 thread_call_delayed_timer_rescan(&thread_call_groups[i], flavor);
2121 }
2122 }
2123 }
2124
2125 /*
2126 * Timer callback to tell a thread to terminate if
2127 * we have an excess of threads and at least one has been
2128 * idle for a long time.
2129 */
2130 static void
thread_call_dealloc_timer(timer_call_param_t p0,__unused timer_call_param_t p1)2131 thread_call_dealloc_timer(
2132 timer_call_param_t p0,
2133 __unused timer_call_param_t p1)
2134 {
2135 thread_call_group_t group = (thread_call_group_t)p0;
2136 uint64_t now;
2137 kern_return_t res;
2138 bool terminated = false;
2139
2140 thread_call_lock_spin(group);
2141
2142 assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
2143
2144 now = mach_absolute_time();
2145
2146 if (group->idle_count > 0) {
2147 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
2148 terminated = true;
2149 group->idle_count--;
2150 res = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
2151 THREAD_INTERRUPTED, WAITQ_WAKEUP_DEFAULT);
2152 if (res != KERN_SUCCESS) {
2153 panic("Unable to wake up idle thread for termination (%d)", res);
2154 }
2155 }
2156 }
2157
2158 group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
2159
2160 /*
2161 * If we still have an excess of threads, schedule another
2162 * invocation of this function.
2163 */
2164 if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
2165 /*
2166 * If we killed someone just now, push out the
2167 * next deadline.
2168 */
2169 if (terminated) {
2170 group->idle_timestamp = now;
2171 }
2172
2173 thread_call_start_deallocate_timer(group);
2174 }
2175
2176 thread_call_unlock(group);
2177 }
2178
2179 /*
2180 * Wait for the invocation of the thread call to complete
2181 * We know there's only one in flight because of the 'once' flag.
2182 *
2183 * If a subsequent invocation comes in before we wake up, that's OK
2184 *
2185 * TODO: Here is where we will add priority inheritance to the thread executing
2186 * the thread call in case it's lower priority than the current thread
2187 * <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2188 *
2189 * Takes the thread call lock locked, returns unlocked
2190 * This lets us avoid a spurious take/drop after waking up from thread_block
2191 *
2192 * This thread could be a thread call thread itself, blocking and therefore making a
2193 * sched_call upcall into the thread call subsystem, needing the group lock.
2194 * However, we're saved from deadlock because the 'block' upcall is made in
2195 * thread_block, not in assert_wait.
2196 */
2197 static bool
thread_call_wait_once_locked(thread_call_t call,spl_t s)2198 thread_call_wait_once_locked(thread_call_t call, spl_t s)
2199 {
2200 assert(call->tc_flags & THREAD_CALL_ALLOC);
2201 assert(call->tc_flags & THREAD_CALL_ONCE);
2202
2203 thread_call_group_t group = thread_call_get_group(call);
2204
2205 if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
2206 enable_ints_and_unlock(group, s);
2207 return false;
2208 }
2209
2210 /* call is running, so we have to wait for it */
2211 call->tc_flags |= THREAD_CALL_WAIT;
2212
2213 wait_result_t res = waitq_assert_wait64(&group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, 0);
2214 if (res != THREAD_WAITING) {
2215 panic("Unable to assert wait: %d", res);
2216 }
2217
2218 enable_ints_and_unlock(group, s);
2219
2220 res = thread_block(THREAD_CONTINUE_NULL);
2221 if (res != THREAD_AWAKENED) {
2222 panic("Awoken with %d?", res);
2223 }
2224
2225 /* returns unlocked */
2226 return true;
2227 }
2228
2229 /*
2230 * Wait for an in-flight invocation to complete
2231 * Does NOT try to cancel, so the client doesn't need to hold their
2232 * lock while calling this function.
2233 *
2234 * Returns whether or not it had to wait.
2235 *
2236 * Only works for THREAD_CALL_ONCE calls.
2237 */
2238 boolean_t
thread_call_wait_once(thread_call_t call)2239 thread_call_wait_once(thread_call_t call)
2240 {
2241 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
2242 panic("(%p %p) thread_call_wait_once: can't wait on thread call whose storage I don't own",
2243 call, call->tc_func);
2244 }
2245
2246 if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
2247 panic("(%p %p) thread_call_wait_once: can't wait_once on a non-once call",
2248 call, call->tc_func);
2249 }
2250
2251 if (!ml_get_interrupts_enabled()) {
2252 panic("(%p %p) unsafe thread_call_wait_once",
2253 call, call->tc_func);
2254 }
2255
2256 thread_t self = current_thread();
2257
2258 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
2259 self->thc_state && self->thc_state->thc_call == call) {
2260 panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2261 call, call->tc_func);
2262 }
2263
2264 thread_call_group_t group = thread_call_get_group(call);
2265
2266 spl_t s = disable_ints_and_lock(group);
2267
2268 bool waited = thread_call_wait_once_locked(call, s);
2269 /* thread call lock unlocked */
2270
2271 return waited;
2272 }
2273
2274
2275 /*
2276 * Wait for all requested invocations of a thread call prior to now
2277 * to finish. Can only be invoked on thread calls whose storage we manage.
2278 * Just waits for the finish count to catch up to the submit count we find
2279 * at the beginning of our wait.
2280 *
2281 * Called with thread_call_lock held. Returns with lock released.
2282 */
2283 static void
thread_call_wait_locked(thread_call_t call,spl_t s)2284 thread_call_wait_locked(thread_call_t call, spl_t s)
2285 {
2286 thread_call_group_t group = thread_call_get_group(call);
2287
2288 assert(call->tc_flags & THREAD_CALL_ALLOC);
2289
2290 uint64_t submit_count = call->tc_submit_count;
2291
2292 while (call->tc_finish_count < submit_count) {
2293 call->tc_flags |= THREAD_CALL_WAIT;
2294
2295 wait_result_t res = waitq_assert_wait64(&group->waiters_waitq,
2296 CAST_EVENT64_T(call), THREAD_UNINT, 0);
2297
2298 if (res != THREAD_WAITING) {
2299 panic("Unable to assert wait: %d", res);
2300 }
2301
2302 enable_ints_and_unlock(group, s);
2303
2304 res = thread_block(THREAD_CONTINUE_NULL);
2305 if (res != THREAD_AWAKENED) {
2306 panic("Awoken with %d?", res);
2307 }
2308
2309 s = disable_ints_and_lock(group);
2310 }
2311
2312 enable_ints_and_unlock(group, s);
2313 }
2314
2315 /*
2316 * Determine whether a thread call is either on a queue or
2317 * currently being executed.
2318 */
2319 boolean_t
thread_call_isactive(thread_call_t call)2320 thread_call_isactive(thread_call_t call)
2321 {
2322 thread_call_group_t group = thread_call_get_group(call);
2323
2324 spl_t s = disable_ints_and_lock(group);
2325 boolean_t active = (call->tc_submit_count > call->tc_finish_count);
2326 enable_ints_and_unlock(group, s);
2327
2328 return active;
2329 }
2330
2331 /*
2332 * adjust_cont_time_thread_calls
2333 * on wake, reenqueue delayed call timer for continuous time thread call groups
2334 */
2335 void
adjust_cont_time_thread_calls(void)2336 adjust_cont_time_thread_calls(void)
2337 {
2338 for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
2339 thread_call_group_t group = &thread_call_groups[i];
2340 spl_t s = disable_ints_and_lock(group);
2341
2342 /* only the continuous timers need to be re-armed */
2343
2344 _arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
2345 enable_ints_and_unlock(group, s);
2346 }
2347 }
2348