1 /*
2 * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/zalloc.h>
34 #include <kern/sched_prim.h>
35 #include <kern/clock.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/waitq.h>
39 #include <kern/ledger.h>
40 #include <kern/policy_internal.h>
41
42 #include <vm/vm_pageout.h>
43
44 #include <kern/thread_call.h>
45 #include <kern/timer_call.h>
46
47 #include <libkern/OSAtomic.h>
48 #include <kern/timer_queue.h>
49
50 #include <sys/kdebug.h>
51 #if CONFIG_DTRACE
52 #include <mach/sdt.h>
53 #endif
54 #include <machine/machine_routines.h>
55
56 static ZONE_DEFINE_TYPE(thread_call_zone, "thread_call",
57 thread_call_data_t, ZC_ZFREE_CLEARMEM);
58
59 typedef enum {
60 TCF_ABSOLUTE = 0,
61 TCF_CONTINUOUS = 1,
62 TCF_COUNT = 2,
63 } thread_call_flavor_t;
64
65 __options_decl(thread_call_group_flags_t, uint32_t, {
66 TCG_NONE = 0x0,
67 TCG_PARALLEL = 0x1,
68 TCG_DEALLOC_ACTIVE = 0x2,
69 });
70
71 static struct thread_call_group {
72 __attribute__((aligned(128))) lck_ticket_t tcg_lock;
73
74 const char * tcg_name;
75
76 queue_head_t pending_queue;
77 uint32_t pending_count;
78
79 queue_head_t delayed_queues[TCF_COUNT];
80 struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
81 timer_call_data_t delayed_timers[TCF_COUNT];
82
83 timer_call_data_t dealloc_timer;
84
85 struct waitq idle_waitq;
86 uint64_t idle_timestamp;
87 uint32_t idle_count, active_count, blocked_count;
88
89 uint32_t tcg_thread_pri;
90 uint32_t target_thread_count;
91
92 thread_call_group_flags_t tcg_flags;
93
94 struct waitq waiters_waitq;
95 } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
96 [THREAD_CALL_INDEX_HIGH] = {
97 .tcg_name = "high",
98 .tcg_thread_pri = BASEPRI_PREEMPT_HIGH,
99 .target_thread_count = 4,
100 .tcg_flags = TCG_NONE,
101 },
102 [THREAD_CALL_INDEX_KERNEL] = {
103 .tcg_name = "kernel",
104 .tcg_thread_pri = BASEPRI_KERNEL,
105 .target_thread_count = 1,
106 .tcg_flags = TCG_PARALLEL,
107 },
108 [THREAD_CALL_INDEX_USER] = {
109 .tcg_name = "user",
110 .tcg_thread_pri = BASEPRI_DEFAULT,
111 .target_thread_count = 1,
112 .tcg_flags = TCG_PARALLEL,
113 },
114 [THREAD_CALL_INDEX_LOW] = {
115 .tcg_name = "low",
116 .tcg_thread_pri = MAXPRI_THROTTLE,
117 .target_thread_count = 1,
118 .tcg_flags = TCG_PARALLEL,
119 },
120 [THREAD_CALL_INDEX_KERNEL_HIGH] = {
121 .tcg_name = "kernel-high",
122 .tcg_thread_pri = BASEPRI_PREEMPT,
123 .target_thread_count = 2,
124 .tcg_flags = TCG_NONE,
125 },
126 [THREAD_CALL_INDEX_QOS_UI] = {
127 .tcg_name = "qos-ui",
128 .tcg_thread_pri = BASEPRI_FOREGROUND,
129 .target_thread_count = 1,
130 .tcg_flags = TCG_NONE,
131 },
132 [THREAD_CALL_INDEX_QOS_IN] = {
133 .tcg_name = "qos-in",
134 .tcg_thread_pri = BASEPRI_USER_INITIATED,
135 .target_thread_count = 1,
136 .tcg_flags = TCG_NONE,
137 },
138 [THREAD_CALL_INDEX_QOS_UT] = {
139 .tcg_name = "qos-ut",
140 .tcg_thread_pri = BASEPRI_UTILITY,
141 .target_thread_count = 1,
142 .tcg_flags = TCG_NONE,
143 },
144 };
145
146 typedef struct thread_call_group *thread_call_group_t;
147
148 #define INTERNAL_CALL_COUNT 768
149 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
150 #define THREAD_CALL_ADD_RATIO 4
151 #define THREAD_CALL_MACH_FACTOR_CAP 3
152 #define THREAD_CALL_GROUP_MAX_THREADS 500
153
154 struct thread_call_thread_state {
155 struct thread_call_group * thc_group;
156 struct thread_call * thc_call; /* debug only, may be deallocated */
157 uint64_t thc_call_start;
158 uint64_t thc_call_soft_deadline;
159 uint64_t thc_call_hard_deadline;
160 uint64_t thc_call_pending_timestamp;
161 uint64_t thc_IOTES_invocation_timestamp;
162 thread_call_func_t thc_func;
163 thread_call_param_t thc_param0;
164 thread_call_param_t thc_param1;
165 };
166
167 static bool thread_call_daemon_awake = true;
168 /*
169 * This special waitq exists because the daemon thread
170 * might need to be woken while already holding a global waitq locked.
171 */
172 static struct waitq daemon_waitq;
173
174 static thread_call_data_t internal_call_storage[INTERNAL_CALL_COUNT];
175 static queue_head_t thread_call_internal_queue;
176 int thread_call_internal_queue_count = 0;
177 static uint64_t thread_call_dealloc_interval_abs;
178
179 static void _internal_call_init(void);
180
181 static thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
182 static bool _is_internal_call(thread_call_t call);
183 static void _internal_call_release(thread_call_t call);
184 static bool _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
185 static bool _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
186 uint64_t deadline, thread_call_flavor_t flavor);
187 static bool _call_dequeue(thread_call_t call, thread_call_group_t group);
188 static void thread_call_wake(thread_call_group_t group);
189 static void thread_call_daemon(void *arg);
190 static void thread_call_thread(thread_call_group_t group, wait_result_t wres);
191 static void thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
192 static void thread_call_group_setup(thread_call_group_t group);
193 static void sched_call_thread(int type, thread_t thread);
194 static void thread_call_start_deallocate_timer(thread_call_group_t group);
195 static void thread_call_wait_locked(thread_call_t call, spl_t s);
196 static bool thread_call_wait_once_locked(thread_call_t call, spl_t s);
197
198 static boolean_t thread_call_enter_delayed_internal(thread_call_t call,
199 thread_call_func_t alt_func, thread_call_param_t alt_param0,
200 thread_call_param_t param1, uint64_t deadline,
201 uint64_t leeway, unsigned int flags);
202
203 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
204 extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
205
206 LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
207
208
209 static void
thread_call_lock_spin(thread_call_group_t group)210 thread_call_lock_spin(thread_call_group_t group)
211 {
212 lck_ticket_lock(&group->tcg_lock, &thread_call_lck_grp);
213 }
214
215 static void
thread_call_unlock(thread_call_group_t group)216 thread_call_unlock(thread_call_group_t group)
217 {
218 lck_ticket_unlock(&group->tcg_lock);
219 }
220
221 static void __assert_only
thread_call_assert_locked(thread_call_group_t group)222 thread_call_assert_locked(thread_call_group_t group)
223 {
224 lck_ticket_assert_owned(&group->tcg_lock);
225 }
226
227
228 static spl_t
disable_ints_and_lock(thread_call_group_t group)229 disable_ints_and_lock(thread_call_group_t group)
230 {
231 spl_t s = splsched();
232 thread_call_lock_spin(group);
233
234 return s;
235 }
236
237 static void
enable_ints_and_unlock(thread_call_group_t group,spl_t s)238 enable_ints_and_unlock(thread_call_group_t group, spl_t s)
239 {
240 thread_call_unlock(group);
241 splx(s);
242 }
243
244 /* Lock held */
245 static thread_call_group_t
thread_call_get_group(thread_call_t call)246 thread_call_get_group(thread_call_t call)
247 {
248 thread_call_index_t index = call->tc_index;
249
250 assert(index >= 0 && index < THREAD_CALL_INDEX_MAX);
251
252 return &thread_call_groups[index];
253 }
254
255 /* Lock held */
256 static thread_call_flavor_t
thread_call_get_flavor(thread_call_t call)257 thread_call_get_flavor(thread_call_t call)
258 {
259 return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
260 }
261
262 /* Lock held */
263 static thread_call_flavor_t
thread_call_set_flavor(thread_call_t call,thread_call_flavor_t flavor)264 thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
265 {
266 assert(flavor == TCF_CONTINUOUS || flavor == TCF_ABSOLUTE);
267 thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
268
269 if (old_flavor != flavor) {
270 if (flavor == TCF_CONTINUOUS) {
271 call->tc_flags |= THREAD_CALL_FLAG_CONTINUOUS;
272 } else {
273 call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
274 }
275 }
276
277 return old_flavor;
278 }
279
280 /* returns true if it was on a queue */
281 static bool
thread_call_enqueue_tail(thread_call_t call,queue_t new_queue)282 thread_call_enqueue_tail(
283 thread_call_t call,
284 queue_t new_queue)
285 {
286 queue_t old_queue = call->tc_queue;
287
288 thread_call_group_t group = thread_call_get_group(call);
289 thread_call_flavor_t flavor = thread_call_get_flavor(call);
290
291 if (old_queue != NULL &&
292 old_queue != &group->delayed_queues[flavor]) {
293 panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
294 }
295
296 if (old_queue == &group->delayed_queues[flavor]) {
297 priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
298 }
299
300 if (old_queue == NULL) {
301 enqueue_tail(new_queue, &call->tc_qlink);
302 } else {
303 re_queue_tail(new_queue, &call->tc_qlink);
304 }
305
306 call->tc_queue = new_queue;
307
308 return old_queue != NULL;
309 }
310
311 static queue_head_t *
thread_call_dequeue(thread_call_t call)312 thread_call_dequeue(
313 thread_call_t call)
314 {
315 queue_t old_queue = call->tc_queue;
316
317 thread_call_group_t group = thread_call_get_group(call);
318 thread_call_flavor_t flavor = thread_call_get_flavor(call);
319
320 if (old_queue != NULL &&
321 old_queue != &group->pending_queue &&
322 old_queue != &group->delayed_queues[flavor]) {
323 panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
324 }
325
326 if (old_queue == &group->delayed_queues[flavor]) {
327 priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
328 }
329
330 if (old_queue != NULL) {
331 remqueue(&call->tc_qlink);
332
333 call->tc_queue = NULL;
334 }
335 return old_queue;
336 }
337
338 static queue_head_t *
thread_call_enqueue_deadline(thread_call_t call,thread_call_group_t group,thread_call_flavor_t flavor,uint64_t deadline)339 thread_call_enqueue_deadline(
340 thread_call_t call,
341 thread_call_group_t group,
342 thread_call_flavor_t flavor,
343 uint64_t deadline)
344 {
345 queue_t old_queue = call->tc_queue;
346 queue_t new_queue = &group->delayed_queues[flavor];
347
348 thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
349
350 if (old_queue != NULL &&
351 old_queue != &group->pending_queue &&
352 old_queue != &group->delayed_queues[old_flavor]) {
353 panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
354 }
355
356 if (old_queue == new_queue) {
357 /* optimize the same-queue case to avoid a full re-insert */
358 uint64_t old_deadline = call->tc_pqlink.deadline;
359 call->tc_pqlink.deadline = deadline;
360
361 if (old_deadline < deadline) {
362 priority_queue_entry_increased(&group->delayed_pqueues[flavor],
363 &call->tc_pqlink);
364 } else {
365 priority_queue_entry_decreased(&group->delayed_pqueues[flavor],
366 &call->tc_pqlink);
367 }
368 } else {
369 if (old_queue == &group->delayed_queues[old_flavor]) {
370 priority_queue_remove(&group->delayed_pqueues[old_flavor],
371 &call->tc_pqlink);
372 }
373
374 call->tc_pqlink.deadline = deadline;
375
376 priority_queue_insert(&group->delayed_pqueues[flavor], &call->tc_pqlink);
377 }
378
379 if (old_queue == NULL) {
380 enqueue_tail(new_queue, &call->tc_qlink);
381 } else if (old_queue != new_queue) {
382 re_queue_tail(new_queue, &call->tc_qlink);
383 }
384
385 call->tc_queue = new_queue;
386
387 return old_queue;
388 }
389
390 uint64_t
thread_call_get_armed_deadline(thread_call_t call)391 thread_call_get_armed_deadline(thread_call_t call)
392 {
393 return call->tc_pqlink.deadline;
394 }
395
396
397 static bool
group_isparallel(thread_call_group_t group)398 group_isparallel(thread_call_group_t group)
399 {
400 return (group->tcg_flags & TCG_PARALLEL) != 0;
401 }
402
403 static bool
thread_call_group_should_add_thread(thread_call_group_t group)404 thread_call_group_should_add_thread(thread_call_group_t group)
405 {
406 if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
407 panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
408 group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
409 group->active_count, group->blocked_count, group->idle_count);
410 }
411
412 if (group_isparallel(group) == false) {
413 if (group->pending_count > 0 && group->active_count == 0) {
414 return true;
415 }
416
417 return false;
418 }
419
420 if (group->pending_count > 0) {
421 if (group->idle_count > 0) {
422 return false;
423 }
424
425 uint32_t thread_count = group->active_count;
426
427 /*
428 * Add a thread if either there are no threads,
429 * the group has fewer than its target number of
430 * threads, or the amount of work is large relative
431 * to the number of threads. In the last case, pay attention
432 * to the total load on the system, and back off if
433 * it's high.
434 */
435 if ((thread_count == 0) ||
436 (thread_count < group->target_thread_count) ||
437 ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
438 (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
439 return true;
440 }
441 }
442
443 return false;
444 }
445
446 static void
thread_call_group_setup(thread_call_group_t group)447 thread_call_group_setup(thread_call_group_t group)
448 {
449 lck_ticket_init(&group->tcg_lock, &thread_call_lck_grp);
450
451 queue_init(&group->pending_queue);
452
453 for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
454 queue_init(&group->delayed_queues[flavor]);
455 priority_queue_init(&group->delayed_pqueues[flavor]);
456 timer_call_setup(&group->delayed_timers[flavor], thread_call_delayed_timer, group);
457 }
458
459 timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
460
461 waitq_init(&group->waiters_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
462
463 /* Reverse the wait order so we re-use the most recently parked thread from the pool */
464 waitq_init(&group->idle_waitq, WQT_QUEUE, SYNC_POLICY_REVERSED);
465 }
466
467 /*
468 * Simple wrapper for creating threads bound to
469 * thread call groups.
470 */
471 static void
thread_call_thread_create(thread_call_group_t group)472 thread_call_thread_create(
473 thread_call_group_t group)
474 {
475 thread_t thread;
476 kern_return_t result;
477
478 int thread_pri = group->tcg_thread_pri;
479
480 result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
481 group, thread_pri, &thread);
482 if (result != KERN_SUCCESS) {
483 panic("cannot create new thread call thread %d", result);
484 }
485
486 if (thread_pri <= BASEPRI_KERNEL) {
487 /*
488 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
489 * in kernel if there are higher priority threads available.
490 */
491 thread_set_eager_preempt(thread);
492 }
493
494 char name[MAXTHREADNAMESIZE] = "";
495
496 int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
497
498 snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
499 thread_set_thread_name(thread, name);
500
501 thread_deallocate(thread);
502 }
503
504 /*
505 * thread_call_initialize:
506 *
507 * Initialize this module, called
508 * early during system initialization.
509 */
510 __startup_func
511 static void
thread_call_initialize(void)512 thread_call_initialize(void)
513 {
514 nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
515 waitq_init(&daemon_waitq, WQT_QUEUE, SYNC_POLICY_FIFO);
516
517 for (uint32_t i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
518 thread_call_group_setup(&thread_call_groups[i]);
519 }
520
521 _internal_call_init();
522
523 thread_t thread;
524 kern_return_t result;
525
526 result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
527 NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
528 if (result != KERN_SUCCESS) {
529 panic("thread_call_initialize");
530 }
531
532 thread_deallocate(thread);
533 }
534 STARTUP(THREAD_CALL, STARTUP_RANK_FIRST, thread_call_initialize);
535
536 void
thread_call_setup_with_options(thread_call_t call,thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)537 thread_call_setup_with_options(
538 thread_call_t call,
539 thread_call_func_t func,
540 thread_call_param_t param0,
541 thread_call_priority_t pri,
542 thread_call_options_t options)
543 {
544 bzero(call, sizeof(*call));
545
546 *call = (struct thread_call) {
547 .tc_func = func,
548 .tc_param0 = param0,
549 };
550
551 switch (pri) {
552 case THREAD_CALL_PRIORITY_HIGH:
553 call->tc_index = THREAD_CALL_INDEX_HIGH;
554 break;
555 case THREAD_CALL_PRIORITY_KERNEL:
556 call->tc_index = THREAD_CALL_INDEX_KERNEL;
557 break;
558 case THREAD_CALL_PRIORITY_USER:
559 call->tc_index = THREAD_CALL_INDEX_USER;
560 break;
561 case THREAD_CALL_PRIORITY_LOW:
562 call->tc_index = THREAD_CALL_INDEX_LOW;
563 break;
564 case THREAD_CALL_PRIORITY_KERNEL_HIGH:
565 call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
566 break;
567 default:
568 panic("Invalid thread call pri value: %d", pri);
569 break;
570 }
571
572 if (options & THREAD_CALL_OPTIONS_ONCE) {
573 call->tc_flags |= THREAD_CALL_ONCE;
574 }
575 if (options & THREAD_CALL_OPTIONS_SIGNAL) {
576 call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
577 }
578 }
579
580 void
thread_call_setup(thread_call_t call,thread_call_func_t func,thread_call_param_t param0)581 thread_call_setup(
582 thread_call_t call,
583 thread_call_func_t func,
584 thread_call_param_t param0)
585 {
586 thread_call_setup_with_options(call, func, param0,
587 THREAD_CALL_PRIORITY_HIGH, 0);
588 }
589
590 static void
_internal_call_init(void)591 _internal_call_init(void)
592 {
593 /* Function-only thread calls are only kept in the default HIGH group */
594 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
595
596 spl_t s = disable_ints_and_lock(group);
597
598 queue_init(&thread_call_internal_queue);
599
600 for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
601 enqueue_tail(&thread_call_internal_queue, &internal_call_storage[i].tc_qlink);
602 thread_call_internal_queue_count++;
603 }
604
605 enable_ints_and_unlock(group, s);
606 }
607
608 /*
609 * _internal_call_allocate:
610 *
611 * Allocate an internal callout entry.
612 *
613 * Called with thread_call_lock held.
614 */
615 static thread_call_t
_internal_call_allocate(thread_call_func_t func,thread_call_param_t param0)616 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
617 {
618 /* Function-only thread calls are only kept in the default HIGH group */
619 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
620
621 spl_t s = disable_ints_and_lock(group);
622
623 thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
624 struct thread_call, tc_qlink);
625
626 if (call == NULL) {
627 panic("_internal_call_allocate: thread_call_internal_queue empty");
628 }
629
630 thread_call_internal_queue_count--;
631
632 thread_call_setup(call, func, param0);
633 /* THREAD_CALL_ALLOC not set, do not free back to zone */
634 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
635 enable_ints_and_unlock(group, s);
636
637 return call;
638 }
639
640 /* Check if a call is internal and needs to be returned to the internal pool. */
641 static bool
_is_internal_call(thread_call_t call)642 _is_internal_call(thread_call_t call)
643 {
644 if (call >= internal_call_storage &&
645 call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
646 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
647 return true;
648 }
649 return false;
650 }
651
652 /*
653 * _internal_call_release:
654 *
655 * Release an internal callout entry which
656 * is no longer pending (or delayed).
657 *
658 * Called with thread_call_lock held.
659 */
660 static void
_internal_call_release(thread_call_t call)661 _internal_call_release(thread_call_t call)
662 {
663 assert(_is_internal_call(call));
664
665 thread_call_group_t group = thread_call_get_group(call);
666
667 assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
668 thread_call_assert_locked(group);
669
670 enqueue_head(&thread_call_internal_queue, &call->tc_qlink);
671 thread_call_internal_queue_count++;
672 }
673
674 /*
675 * _pending_call_enqueue:
676 *
677 * Place an entry at the end of the
678 * pending queue, to be executed soon.
679 *
680 * Returns TRUE if the entry was already
681 * on a queue.
682 *
683 * Called with thread_call_lock held.
684 */
685 static bool
_pending_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t now)686 _pending_call_enqueue(thread_call_t call,
687 thread_call_group_t group,
688 uint64_t now)
689 {
690 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
691 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
692 call->tc_pqlink.deadline = 0;
693
694 thread_call_flags_t flags = call->tc_flags;
695 call->tc_flags |= THREAD_CALL_RESCHEDULE;
696
697 assert(call->tc_queue == NULL);
698
699 return flags & THREAD_CALL_RESCHEDULE;
700 }
701
702 call->tc_pending_timestamp = now;
703
704 bool was_on_queue = thread_call_enqueue_tail(call, &group->pending_queue);
705
706 if (!was_on_queue) {
707 call->tc_submit_count++;
708 }
709
710 group->pending_count++;
711
712 thread_call_wake(group);
713
714 return was_on_queue;
715 }
716
717 /*
718 * _delayed_call_enqueue:
719 *
720 * Place an entry on the delayed queue,
721 * after existing entries with an earlier
722 * (or identical) deadline.
723 *
724 * Returns TRUE if the entry was already
725 * on a queue.
726 *
727 * Called with thread_call_lock held.
728 */
729 static bool
_delayed_call_enqueue(thread_call_t call,thread_call_group_t group,uint64_t deadline,thread_call_flavor_t flavor)730 _delayed_call_enqueue(
731 thread_call_t call,
732 thread_call_group_t group,
733 uint64_t deadline,
734 thread_call_flavor_t flavor)
735 {
736 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
737 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
738 call->tc_pqlink.deadline = deadline;
739
740 thread_call_flags_t flags = call->tc_flags;
741 call->tc_flags |= THREAD_CALL_RESCHEDULE;
742
743 assert(call->tc_queue == NULL);
744 thread_call_set_flavor(call, flavor);
745
746 return flags & THREAD_CALL_RESCHEDULE;
747 }
748
749 queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
750
751 if (old_queue == &group->pending_queue) {
752 group->pending_count--;
753 } else if (old_queue == NULL) {
754 call->tc_submit_count++;
755 }
756
757 return old_queue != NULL;
758 }
759
760 /*
761 * _call_dequeue:
762 *
763 * Remove an entry from a queue.
764 *
765 * Returns TRUE if the entry was on a queue.
766 *
767 * Called with thread_call_lock held.
768 */
769 static bool
_call_dequeue(thread_call_t call,thread_call_group_t group)770 _call_dequeue(
771 thread_call_t call,
772 thread_call_group_t group)
773 {
774 queue_head_t *old_queue = thread_call_dequeue(call);
775
776 if (old_queue == NULL) {
777 return false;
778 }
779
780 call->tc_finish_count++;
781
782 if (old_queue == &group->pending_queue) {
783 group->pending_count--;
784 }
785
786 return true;
787 }
788
789 /*
790 * _arm_delayed_call_timer:
791 *
792 * Check if the timer needs to be armed for this flavor,
793 * and if so, arm it.
794 *
795 * If call is non-NULL, only re-arm the timer if the specified call
796 * is the first in the queue.
797 *
798 * Returns true if the timer was armed/re-armed, false if it was left unset
799 * Caller should cancel the timer if need be.
800 *
801 * Called with thread_call_lock held.
802 */
803 static bool
_arm_delayed_call_timer(thread_call_t new_call,thread_call_group_t group,thread_call_flavor_t flavor)804 _arm_delayed_call_timer(thread_call_t new_call,
805 thread_call_group_t group,
806 thread_call_flavor_t flavor)
807 {
808 /* No calls implies no timer needed */
809 if (queue_empty(&group->delayed_queues[flavor])) {
810 return false;
811 }
812
813 thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
814
815 /* We only need to change the hard timer if this new call is the first in the list */
816 if (new_call != NULL && new_call != call) {
817 return false;
818 }
819
820 assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
821
822 uint64_t fire_at = call->tc_soft_deadline;
823
824 if (flavor == TCF_CONTINUOUS) {
825 assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
826 fire_at = continuoustime_to_absolutetime(fire_at);
827 } else {
828 assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
829 }
830
831 /*
832 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
833 * which does not take into account later-deadline timers with a larger leeway.
834 * This is a valid coalescing behavior, but masks a possible window to
835 * fire a timer instead of going idle.
836 */
837 uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
838
839 timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
840 fire_at, leeway,
841 TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LEEWAY,
842 ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
843
844 return true;
845 }
846
847 /*
848 * _cancel_func_from_queue:
849 *
850 * Remove the first (or all) matching
851 * entries from the specified queue.
852 *
853 * Returns TRUE if any matching entries
854 * were found.
855 *
856 * Called with thread_call_lock held.
857 */
858 static boolean_t
_cancel_func_from_queue(thread_call_func_t func,thread_call_param_t param0,thread_call_group_t group,boolean_t remove_all,queue_head_t * queue)859 _cancel_func_from_queue(thread_call_func_t func,
860 thread_call_param_t param0,
861 thread_call_group_t group,
862 boolean_t remove_all,
863 queue_head_t *queue)
864 {
865 boolean_t call_removed = FALSE;
866 thread_call_t call;
867
868 qe_foreach_element_safe(call, queue, tc_qlink) {
869 if (call->tc_func != func ||
870 call->tc_param0 != param0) {
871 continue;
872 }
873
874 _call_dequeue(call, group);
875
876 if (_is_internal_call(call)) {
877 _internal_call_release(call);
878 }
879
880 call_removed = TRUE;
881 if (!remove_all) {
882 break;
883 }
884 }
885
886 return call_removed;
887 }
888
889 /*
890 * thread_call_func_delayed:
891 *
892 * Enqueue a function callout to
893 * occur at the stated time.
894 */
895 void
thread_call_func_delayed(thread_call_func_t func,thread_call_param_t param,uint64_t deadline)896 thread_call_func_delayed(
897 thread_call_func_t func,
898 thread_call_param_t param,
899 uint64_t deadline)
900 {
901 (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
902 }
903
904 /*
905 * thread_call_func_delayed_with_leeway:
906 *
907 * Same as thread_call_func_delayed(), but with
908 * leeway/flags threaded through.
909 */
910
911 void
thread_call_func_delayed_with_leeway(thread_call_func_t func,thread_call_param_t param,uint64_t deadline,uint64_t leeway,uint32_t flags)912 thread_call_func_delayed_with_leeway(
913 thread_call_func_t func,
914 thread_call_param_t param,
915 uint64_t deadline,
916 uint64_t leeway,
917 uint32_t flags)
918 {
919 (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
920 }
921
922 /*
923 * thread_call_func_cancel:
924 *
925 * Dequeue a function callout.
926 *
927 * Removes one (or all) { function, argument }
928 * instance(s) from either (or both)
929 * the pending and the delayed queue,
930 * in that order.
931 *
932 * Returns TRUE if any calls were cancelled.
933 *
934 * This iterates all of the pending or delayed thread calls in the group,
935 * which is really inefficient. Switch to an allocated thread call instead.
936 *
937 * TODO: Give 'func' thread calls their own group, so this silliness doesn't
938 * affect the main 'high' group.
939 */
940 boolean_t
thread_call_func_cancel(thread_call_func_t func,thread_call_param_t param,boolean_t cancel_all)941 thread_call_func_cancel(
942 thread_call_func_t func,
943 thread_call_param_t param,
944 boolean_t cancel_all)
945 {
946 boolean_t result;
947
948 assert(func != NULL);
949
950 /* Function-only thread calls are only kept in the default HIGH group */
951 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
952
953 spl_t s = disable_ints_and_lock(group);
954
955 if (cancel_all) {
956 /* exhaustively search every queue, and return true if any search found something */
957 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
958 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) |
959 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
960 } else {
961 /* early-exit as soon as we find something, don't search other queues */
962 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
963 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
964 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
965 }
966
967 enable_ints_and_unlock(group, s);
968
969 return result;
970 }
971
972 /*
973 * Allocate a thread call with a given priority. Importances other than
974 * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
975 * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
976 * threads which are not in the normal "urgent" bands).
977 */
978 thread_call_t
thread_call_allocate_with_priority(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri)979 thread_call_allocate_with_priority(
980 thread_call_func_t func,
981 thread_call_param_t param0,
982 thread_call_priority_t pri)
983 {
984 return thread_call_allocate_with_options(func, param0, pri, 0);
985 }
986
987 thread_call_t
thread_call_allocate_with_options(thread_call_func_t func,thread_call_param_t param0,thread_call_priority_t pri,thread_call_options_t options)988 thread_call_allocate_with_options(
989 thread_call_func_t func,
990 thread_call_param_t param0,
991 thread_call_priority_t pri,
992 thread_call_options_t options)
993 {
994 thread_call_t call = zalloc(thread_call_zone);
995
996 thread_call_setup_with_options(call, func, param0, pri, options);
997 call->tc_refs = 1;
998 call->tc_flags |= THREAD_CALL_ALLOC;
999
1000 return call;
1001 }
1002
1003 thread_call_t
thread_call_allocate_with_qos(thread_call_func_t func,thread_call_param_t param0,int qos_tier,thread_call_options_t options)1004 thread_call_allocate_with_qos(thread_call_func_t func,
1005 thread_call_param_t param0,
1006 int qos_tier,
1007 thread_call_options_t options)
1008 {
1009 thread_call_t call = thread_call_allocate(func, param0);
1010
1011 switch (qos_tier) {
1012 case THREAD_QOS_UNSPECIFIED:
1013 call->tc_index = THREAD_CALL_INDEX_HIGH;
1014 break;
1015 case THREAD_QOS_LEGACY:
1016 call->tc_index = THREAD_CALL_INDEX_USER;
1017 break;
1018 case THREAD_QOS_MAINTENANCE:
1019 case THREAD_QOS_BACKGROUND:
1020 call->tc_index = THREAD_CALL_INDEX_LOW;
1021 break;
1022 case THREAD_QOS_UTILITY:
1023 call->tc_index = THREAD_CALL_INDEX_QOS_UT;
1024 break;
1025 case THREAD_QOS_USER_INITIATED:
1026 call->tc_index = THREAD_CALL_INDEX_QOS_IN;
1027 break;
1028 case THREAD_QOS_USER_INTERACTIVE:
1029 call->tc_index = THREAD_CALL_INDEX_QOS_UI;
1030 break;
1031 default:
1032 panic("Invalid thread call qos value: %d", qos_tier);
1033 break;
1034 }
1035
1036 if (options & THREAD_CALL_OPTIONS_ONCE) {
1037 call->tc_flags |= THREAD_CALL_ONCE;
1038 }
1039
1040 /* does not support THREAD_CALL_OPTIONS_SIGNAL */
1041
1042 return call;
1043 }
1044
1045
1046 /*
1047 * thread_call_allocate:
1048 *
1049 * Allocate a callout entry.
1050 */
1051 thread_call_t
thread_call_allocate(thread_call_func_t func,thread_call_param_t param0)1052 thread_call_allocate(
1053 thread_call_func_t func,
1054 thread_call_param_t param0)
1055 {
1056 return thread_call_allocate_with_options(func, param0,
1057 THREAD_CALL_PRIORITY_HIGH, 0);
1058 }
1059
1060 /*
1061 * thread_call_free:
1062 *
1063 * Release a callout. If the callout is currently
1064 * executing, it will be freed when all invocations
1065 * finish.
1066 *
1067 * If the callout is currently armed to fire again, then
1068 * freeing is not allowed and returns FALSE. The
1069 * client must have canceled the pending invocation before freeing.
1070 */
1071 boolean_t
thread_call_free(thread_call_t call)1072 thread_call_free(
1073 thread_call_t call)
1074 {
1075 thread_call_group_t group = thread_call_get_group(call);
1076
1077 spl_t s = disable_ints_and_lock(group);
1078
1079 if (call->tc_queue != NULL ||
1080 ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
1081 thread_call_unlock(group);
1082 splx(s);
1083
1084 return FALSE;
1085 }
1086
1087 int32_t refs = --call->tc_refs;
1088 if (refs < 0) {
1089 panic("Refcount negative: %d", refs);
1090 }
1091
1092 if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
1093 == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
1094 thread_call_wait_once_locked(call, s);
1095 /* thread call lock has been unlocked */
1096 } else {
1097 enable_ints_and_unlock(group, s);
1098 }
1099
1100 if (refs == 0) {
1101 assert(call->tc_finish_count == call->tc_submit_count);
1102 zfree(thread_call_zone, call);
1103 }
1104
1105 return TRUE;
1106 }
1107
1108 /*
1109 * thread_call_enter:
1110 *
1111 * Enqueue a callout entry to occur "soon".
1112 *
1113 * Returns TRUE if the call was
1114 * already on a queue.
1115 */
1116 boolean_t
thread_call_enter(thread_call_t call)1117 thread_call_enter(
1118 thread_call_t call)
1119 {
1120 return thread_call_enter1(call, 0);
1121 }
1122
1123 boolean_t
thread_call_enter1(thread_call_t call,thread_call_param_t param1)1124 thread_call_enter1(
1125 thread_call_t call,
1126 thread_call_param_t param1)
1127 {
1128 assert(call->tc_func != NULL);
1129 assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
1130
1131 thread_call_group_t group = thread_call_get_group(call);
1132 bool result = true;
1133
1134 spl_t s = disable_ints_and_lock(group);
1135
1136 if (call->tc_queue != &group->pending_queue) {
1137 result = _pending_call_enqueue(call, group, mach_absolute_time());
1138 }
1139
1140 call->tc_param1 = param1;
1141
1142 enable_ints_and_unlock(group, s);
1143
1144 return result;
1145 }
1146
1147 /*
1148 * thread_call_enter_delayed:
1149 *
1150 * Enqueue a callout entry to occur
1151 * at the stated time.
1152 *
1153 * Returns TRUE if the call was
1154 * already on a queue.
1155 */
1156 boolean_t
thread_call_enter_delayed(thread_call_t call,uint64_t deadline)1157 thread_call_enter_delayed(
1158 thread_call_t call,
1159 uint64_t deadline)
1160 {
1161 assert(call != NULL);
1162 return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
1163 }
1164
1165 boolean_t
thread_call_enter1_delayed(thread_call_t call,thread_call_param_t param1,uint64_t deadline)1166 thread_call_enter1_delayed(
1167 thread_call_t call,
1168 thread_call_param_t param1,
1169 uint64_t deadline)
1170 {
1171 assert(call != NULL);
1172 return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
1173 }
1174
1175 boolean_t
thread_call_enter_delayed_with_leeway(thread_call_t call,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1176 thread_call_enter_delayed_with_leeway(
1177 thread_call_t call,
1178 thread_call_param_t param1,
1179 uint64_t deadline,
1180 uint64_t leeway,
1181 unsigned int flags)
1182 {
1183 assert(call != NULL);
1184 return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
1185 }
1186
1187
1188 /*
1189 * thread_call_enter_delayed_internal:
1190 * enqueue a callout entry to occur at the stated time
1191 *
1192 * Returns True if the call was already on a queue
1193 * params:
1194 * call - structure encapsulating state of the callout
1195 * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1196 * deadline - time deadline in nanoseconds
1197 * leeway - timer slack represented as delta of deadline.
1198 * flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1199 * THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1200 * THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1201 * than mach_absolute_time
1202 */
1203 boolean_t
thread_call_enter_delayed_internal(thread_call_t call,thread_call_func_t alt_func,thread_call_param_t alt_param0,thread_call_param_t param1,uint64_t deadline,uint64_t leeway,unsigned int flags)1204 thread_call_enter_delayed_internal(
1205 thread_call_t call,
1206 thread_call_func_t alt_func,
1207 thread_call_param_t alt_param0,
1208 thread_call_param_t param1,
1209 uint64_t deadline,
1210 uint64_t leeway,
1211 unsigned int flags)
1212 {
1213 uint64_t now, sdeadline;
1214
1215 thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1216
1217 /* direct mapping between thread_call, timer_call, and timeout_urgency values */
1218 uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
1219
1220 if (call == NULL) {
1221 /* allocate a structure out of internal storage, as a convenience for BSD callers */
1222 call = _internal_call_allocate(alt_func, alt_param0);
1223 }
1224
1225 assert(call->tc_func != NULL);
1226 thread_call_group_t group = thread_call_get_group(call);
1227
1228 spl_t s = disable_ints_and_lock(group);
1229
1230 /*
1231 * kevent and IOTES let you change flavor for an existing timer, so we have to
1232 * support flipping flavors for enqueued thread calls.
1233 */
1234 if (flavor == TCF_CONTINUOUS) {
1235 now = mach_continuous_time();
1236 } else {
1237 now = mach_absolute_time();
1238 }
1239
1240 call->tc_flags |= THREAD_CALL_DELAYED;
1241
1242 call->tc_soft_deadline = sdeadline = deadline;
1243
1244 boolean_t ratelimited = FALSE;
1245 uint64_t slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1246
1247 if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
1248 slop = leeway;
1249 }
1250
1251 if (UINT64_MAX - deadline <= slop) {
1252 deadline = UINT64_MAX;
1253 } else {
1254 deadline += slop;
1255 }
1256
1257 if (ratelimited) {
1258 call->tc_flags |= THREAD_CALL_RATELIMITED;
1259 } else {
1260 call->tc_flags &= ~THREAD_CALL_RATELIMITED;
1261 }
1262
1263 call->tc_param1 = param1;
1264
1265 call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1266
1267 bool result = _delayed_call_enqueue(call, group, deadline, flavor);
1268
1269 _arm_delayed_call_timer(call, group, flavor);
1270
1271 #if CONFIG_DTRACE
1272 DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
1273 uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1274 (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1275 #endif
1276
1277 enable_ints_and_unlock(group, s);
1278
1279 return result;
1280 }
1281
1282 /*
1283 * Remove a callout entry from the queue
1284 * Called with thread_call_lock held
1285 */
1286 static bool
thread_call_cancel_locked(thread_call_t call)1287 thread_call_cancel_locked(thread_call_t call)
1288 {
1289 bool canceled;
1290
1291 if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
1292 call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1293 canceled = true;
1294
1295 /* if reschedule was set, it must not have been queued */
1296 assert(call->tc_queue == NULL);
1297 } else {
1298 bool queue_head_changed = false;
1299
1300 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1301 thread_call_group_t group = thread_call_get_group(call);
1302
1303 if (call->tc_pqlink.deadline != 0 &&
1304 call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
1305 assert(call->tc_queue == &group->delayed_queues[flavor]);
1306 queue_head_changed = true;
1307 }
1308
1309 canceled = _call_dequeue(call, group);
1310
1311 if (queue_head_changed) {
1312 if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
1313 timer_call_cancel(&group->delayed_timers[flavor]);
1314 }
1315 }
1316 }
1317
1318 #if CONFIG_DTRACE
1319 DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
1320 0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1321 #endif
1322
1323 return canceled;
1324 }
1325
1326 /*
1327 * thread_call_cancel:
1328 *
1329 * Dequeue a callout entry.
1330 *
1331 * Returns TRUE if the call was
1332 * on a queue.
1333 */
1334 boolean_t
thread_call_cancel(thread_call_t call)1335 thread_call_cancel(thread_call_t call)
1336 {
1337 thread_call_group_t group = thread_call_get_group(call);
1338
1339 spl_t s = disable_ints_and_lock(group);
1340
1341 boolean_t result = thread_call_cancel_locked(call);
1342
1343 enable_ints_and_unlock(group, s);
1344
1345 return result;
1346 }
1347
1348 /*
1349 * Cancel a thread call. If it cannot be cancelled (i.e.
1350 * is already in flight), waits for the most recent invocation
1351 * to finish. Note that if clients re-submit this thread call,
1352 * it may still be pending or in flight when thread_call_cancel_wait
1353 * returns, but all requests to execute this work item prior
1354 * to the call to thread_call_cancel_wait will have finished.
1355 */
1356 boolean_t
thread_call_cancel_wait(thread_call_t call)1357 thread_call_cancel_wait(thread_call_t call)
1358 {
1359 thread_call_group_t group = thread_call_get_group(call);
1360
1361 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1362 panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1363 }
1364
1365 if (!ml_get_interrupts_enabled()) {
1366 panic("unsafe thread_call_cancel_wait");
1367 }
1368
1369 thread_t self = current_thread();
1370
1371 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
1372 self->thc_state && self->thc_state->thc_call == call) {
1373 panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1374 call, call->tc_func);
1375 }
1376
1377 spl_t s = disable_ints_and_lock(group);
1378
1379 boolean_t canceled = thread_call_cancel_locked(call);
1380
1381 if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1382 /*
1383 * A cancel-wait on a 'once' call will both cancel
1384 * the pending call and wait for the in-flight call
1385 */
1386
1387 thread_call_wait_once_locked(call, s);
1388 /* thread call lock unlocked */
1389 } else {
1390 /*
1391 * A cancel-wait on a normal call will only wait for the in-flight calls
1392 * if it did not cancel the pending call.
1393 *
1394 * TODO: This seems less than useful - shouldn't it do the wait as well?
1395 */
1396
1397 if (canceled == FALSE) {
1398 thread_call_wait_locked(call, s);
1399 /* thread call lock unlocked */
1400 } else {
1401 enable_ints_and_unlock(group, s);
1402 }
1403 }
1404
1405 return canceled;
1406 }
1407
1408
1409 /*
1410 * thread_call_wake:
1411 *
1412 * Wake a call thread to service
1413 * pending call entries. May wake
1414 * the daemon thread in order to
1415 * create additional call threads.
1416 *
1417 * Called with thread_call_lock held.
1418 *
1419 * For high-priority group, only does wakeup/creation if there are no threads
1420 * running.
1421 */
1422 static void
thread_call_wake(thread_call_group_t group)1423 thread_call_wake(
1424 thread_call_group_t group)
1425 {
1426 /*
1427 * New behavior: use threads if you've got 'em.
1428 * Traditional behavior: wake only if no threads running.
1429 */
1430 if (group_isparallel(group) || group->active_count == 0) {
1431 if (group->idle_count) {
1432 __assert_only kern_return_t kr;
1433
1434 kr = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
1435 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1436 assert(kr == KERN_SUCCESS);
1437
1438 group->idle_count--;
1439 group->active_count++;
1440
1441 if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1442 if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1443 group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
1444 }
1445 }
1446 } else {
1447 if (thread_call_group_should_add_thread(group) &&
1448 os_atomic_cmpxchg(&thread_call_daemon_awake,
1449 false, true, relaxed)) {
1450 waitq_wakeup64_all(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake),
1451 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1452 }
1453 }
1454 }
1455 }
1456
1457 /*
1458 * sched_call_thread:
1459 *
1460 * Call out invoked by the scheduler.
1461 */
1462 static void
sched_call_thread(int type,thread_t thread)1463 sched_call_thread(
1464 int type,
1465 thread_t thread)
1466 {
1467 thread_call_group_t group;
1468
1469 assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
1470 assert(thread->thc_state != NULL);
1471
1472 group = thread->thc_state->thc_group;
1473 assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1474
1475 thread_call_lock_spin(group);
1476
1477 switch (type) {
1478 case SCHED_CALL_BLOCK:
1479 assert(group->active_count);
1480 --group->active_count;
1481 group->blocked_count++;
1482 if (group->pending_count > 0) {
1483 thread_call_wake(group);
1484 }
1485 break;
1486
1487 case SCHED_CALL_UNBLOCK:
1488 assert(group->blocked_count);
1489 --group->blocked_count;
1490 group->active_count++;
1491 break;
1492 }
1493
1494 thread_call_unlock(group);
1495 }
1496
1497 /*
1498 * Interrupts disabled, lock held; returns the same way.
1499 * Only called on thread calls whose storage we own. Wakes up
1500 * anyone who might be waiting on this work item and frees it
1501 * if the client has so requested.
1502 */
1503 static bool
thread_call_finish(thread_call_t call,thread_call_group_t group,spl_t * s)1504 thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1505 {
1506 assert(thread_call_get_group(call) == group);
1507
1508 bool repend = false;
1509 bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
1510 bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
1511
1512 call->tc_finish_count++;
1513
1514 if (!signal && alloc) {
1515 /* The thread call thread owns a ref until the call is finished */
1516 if (call->tc_refs <= 0) {
1517 panic("thread_call_finish: detected over-released thread call: %p", call);
1518 }
1519 call->tc_refs--;
1520 }
1521
1522 thread_call_flags_t old_flags = call->tc_flags;
1523 call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1524
1525 if ((!alloc || call->tc_refs != 0) &&
1526 (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
1527 assert(old_flags & THREAD_CALL_ONCE);
1528 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1529
1530 if (old_flags & THREAD_CALL_DELAYED) {
1531 uint64_t now = mach_absolute_time();
1532 if (flavor == TCF_CONTINUOUS) {
1533 now = absolutetime_to_continuoustime(now);
1534 }
1535 if (call->tc_soft_deadline <= now) {
1536 /* The deadline has already expired, go straight to pending */
1537 call->tc_flags &= ~(THREAD_CALL_DELAYED | THREAD_CALL_RATELIMITED);
1538 call->tc_pqlink.deadline = 0;
1539 }
1540 }
1541
1542 if (call->tc_pqlink.deadline) {
1543 _delayed_call_enqueue(call, group, call->tc_pqlink.deadline, flavor);
1544 if (!signal) {
1545 _arm_delayed_call_timer(call, group, flavor);
1546 }
1547 } else if (signal) {
1548 call->tc_submit_count++;
1549 repend = true;
1550 } else {
1551 _pending_call_enqueue(call, group, mach_absolute_time());
1552 }
1553 }
1554
1555 if (!signal && alloc && call->tc_refs == 0) {
1556 if ((old_flags & THREAD_CALL_WAIT) != 0) {
1557 panic("Someone waiting on a thread call that is scheduled for free: %p", call->tc_func);
1558 }
1559
1560 assert(call->tc_finish_count == call->tc_submit_count);
1561
1562 enable_ints_and_unlock(group, *s);
1563
1564 zfree(thread_call_zone, call);
1565
1566 *s = disable_ints_and_lock(group);
1567 }
1568
1569 if ((old_flags & THREAD_CALL_WAIT) != 0) {
1570 /*
1571 * This may wake up a thread with a registered sched_call.
1572 * That call might need the group lock, so we drop the lock
1573 * to avoid deadlocking.
1574 *
1575 * We also must use a separate waitq from the idle waitq, as
1576 * this path goes waitq lock->thread lock->group lock, but
1577 * the idle wait goes group lock->waitq_lock->thread_lock.
1578 */
1579 thread_call_unlock(group);
1580
1581 waitq_wakeup64_all(&group->waiters_waitq, CAST_EVENT64_T(call),
1582 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1583
1584 thread_call_lock_spin(group);
1585 /* THREAD_CALL_SIGNAL call may have been freed */
1586 }
1587
1588 return repend;
1589 }
1590
1591 /*
1592 * thread_call_invoke
1593 *
1594 * Invoke the function provided for this thread call
1595 *
1596 * Note that the thread call object can be deallocated by the function if we do not control its storage.
1597 */
1598 static void __attribute__((noinline))
thread_call_invoke(thread_call_func_t func,thread_call_param_t param0,thread_call_param_t param1,__unused thread_call_t call)1599 thread_call_invoke(thread_call_func_t func,
1600 thread_call_param_t param0,
1601 thread_call_param_t param1,
1602 __unused thread_call_t call)
1603 {
1604 #if DEVELOPMENT || DEBUG
1605 KERNEL_DEBUG_CONSTANT(
1606 MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_START,
1607 VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1608 #endif /* DEVELOPMENT || DEBUG */
1609
1610 #if CONFIG_DTRACE
1611 uint64_t tc_ttd = call->tc_ttd;
1612 boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1613 DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1614 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1615 #endif
1616
1617 (*func)(param0, param1);
1618
1619 #if CONFIG_DTRACE
1620 DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1621 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1622 #endif
1623
1624 #if DEVELOPMENT || DEBUG
1625 KERNEL_DEBUG_CONSTANT(
1626 MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_END,
1627 VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1628 #endif /* DEVELOPMENT || DEBUG */
1629 }
1630
1631 /*
1632 * thread_call_thread:
1633 */
1634 static void
thread_call_thread(thread_call_group_t group,wait_result_t wres)1635 thread_call_thread(
1636 thread_call_group_t group,
1637 wait_result_t wres)
1638 {
1639 thread_t self = current_thread();
1640
1641 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1642 (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1643 }
1644
1645 /*
1646 * A wakeup with THREAD_INTERRUPTED indicates that
1647 * we should terminate.
1648 */
1649 if (wres == THREAD_INTERRUPTED) {
1650 thread_terminate(self);
1651
1652 /* NOTREACHED */
1653 panic("thread_terminate() returned?");
1654 }
1655
1656 spl_t s = disable_ints_and_lock(group);
1657
1658 struct thread_call_thread_state thc_state = { .thc_group = group };
1659 self->thc_state = &thc_state;
1660
1661 thread_sched_call(self, sched_call_thread);
1662
1663 while (group->pending_count > 0) {
1664 thread_call_t call = qe_dequeue_head(&group->pending_queue,
1665 struct thread_call, tc_qlink);
1666 assert(call != NULL);
1667
1668 group->pending_count--;
1669 if (group->pending_count == 0) {
1670 assert(queue_empty(&group->pending_queue));
1671 }
1672
1673 thread_call_func_t func = call->tc_func;
1674 thread_call_param_t param0 = call->tc_param0;
1675 thread_call_param_t param1 = call->tc_param1;
1676
1677 call->tc_queue = NULL;
1678
1679 if (_is_internal_call(call)) {
1680 _internal_call_release(call);
1681 }
1682
1683 /*
1684 * Can only do wakeups for thread calls whose storage
1685 * we control.
1686 */
1687 bool needs_finish = false;
1688 if (call->tc_flags & THREAD_CALL_ALLOC) {
1689 call->tc_refs++; /* Delay free until we're done */
1690 }
1691 if (call->tc_flags & (THREAD_CALL_ALLOC | THREAD_CALL_ONCE)) {
1692 /*
1693 * If THREAD_CALL_ONCE is used, and the timer wasn't
1694 * THREAD_CALL_ALLOC, then clients swear they will use
1695 * thread_call_cancel_wait() before destroying
1696 * the thread call.
1697 *
1698 * Else, the storage for the thread call might have
1699 * disappeared when thread_call_invoke() ran.
1700 */
1701 needs_finish = true;
1702 call->tc_flags |= THREAD_CALL_RUNNING;
1703 }
1704
1705 thc_state.thc_call = call;
1706 thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
1707 thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
1708 thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
1709 thc_state.thc_func = func;
1710 thc_state.thc_param0 = param0;
1711 thc_state.thc_param1 = param1;
1712 thc_state.thc_IOTES_invocation_timestamp = 0;
1713
1714 enable_ints_and_unlock(group, s);
1715
1716 thc_state.thc_call_start = mach_absolute_time();
1717
1718 thread_call_invoke(func, param0, param1, call);
1719
1720 thc_state.thc_call = NULL;
1721
1722 if (get_preemption_level() != 0) {
1723 int pl = get_preemption_level();
1724 panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1725 pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1726 }
1727
1728 s = disable_ints_and_lock(group);
1729
1730 if (needs_finish) {
1731 /* Release refcount, may free, may temporarily drop lock */
1732 thread_call_finish(call, group, &s);
1733 }
1734 }
1735
1736 thread_sched_call(self, NULL);
1737 group->active_count--;
1738
1739 if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1740 ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1741 if (self->callout_woken_from_platform_idle) {
1742 ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1743 }
1744 }
1745
1746 self->callout_woken_from_icontext = FALSE;
1747 self->callout_woken_from_platform_idle = FALSE;
1748 self->callout_woke_thread = FALSE;
1749
1750 self->thc_state = NULL;
1751
1752 if (group_isparallel(group)) {
1753 /*
1754 * For new style of thread group, thread always blocks.
1755 * If we have more than the target number of threads,
1756 * and this is the first to block, and it isn't active
1757 * already, set a timer for deallocating a thread if we
1758 * continue to have a surplus.
1759 */
1760 group->idle_count++;
1761
1762 if (group->idle_count == 1) {
1763 group->idle_timestamp = mach_absolute_time();
1764 }
1765
1766 if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
1767 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1768 thread_call_start_deallocate_timer(group);
1769 }
1770
1771 /* Wait for more work (or termination) */
1772 wres = waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, 0);
1773 if (wres != THREAD_WAITING) {
1774 panic("kcall worker unable to assert wait?");
1775 }
1776
1777 enable_ints_and_unlock(group, s);
1778
1779 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1780 } else {
1781 if (group->idle_count < group->target_thread_count) {
1782 group->idle_count++;
1783
1784 waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, 0); /* Interrupted means to exit */
1785
1786 enable_ints_and_unlock(group, s);
1787
1788 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1789 /* NOTREACHED */
1790 }
1791 }
1792
1793 enable_ints_and_unlock(group, s);
1794
1795 thread_terminate(self);
1796 /* NOTREACHED */
1797 }
1798
1799 void
thread_call_start_iotes_invocation(__assert_only thread_call_t call)1800 thread_call_start_iotes_invocation(__assert_only thread_call_t call)
1801 {
1802 thread_t self = current_thread();
1803
1804 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1805 /* not a thread call thread, might be a workloop IOTES */
1806 return;
1807 }
1808
1809 assert(self->thc_state);
1810 assert(self->thc_state->thc_call == call);
1811
1812 self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
1813 }
1814
1815
1816 /*
1817 * thread_call_daemon: walk list of groups, allocating
1818 * threads if appropriate (as determined by
1819 * thread_call_group_should_add_thread()).
1820 */
1821 static void
thread_call_daemon_continue(__unused void * arg)1822 thread_call_daemon_continue(__unused void *arg)
1823 {
1824 do {
1825 os_atomic_store(&thread_call_daemon_awake, false, relaxed);
1826
1827 /* Starting at zero happens to be high-priority first. */
1828 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1829 thread_call_group_t group = &thread_call_groups[i];
1830
1831 spl_t s = disable_ints_and_lock(group);
1832
1833 while (thread_call_group_should_add_thread(group)) {
1834 group->active_count++;
1835
1836 enable_ints_and_unlock(group, s);
1837
1838 thread_call_thread_create(group);
1839
1840 s = disable_ints_and_lock(group);
1841 }
1842
1843 enable_ints_and_unlock(group, s);
1844 }
1845 } while (os_atomic_load(&thread_call_daemon_awake, relaxed));
1846
1847 waitq_assert_wait64(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, 0);
1848
1849 if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
1850 clear_wait(current_thread(), THREAD_AWAKENED);
1851 }
1852
1853 thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1854 /* NOTREACHED */
1855 }
1856
1857 static void
thread_call_daemon(__unused void * arg)1858 thread_call_daemon(
1859 __unused void *arg)
1860 {
1861 thread_t self = current_thread();
1862
1863 self->options |= TH_OPT_VMPRIV;
1864 vm_page_free_reserve(2); /* XXX */
1865
1866 thread_set_thread_name(self, "thread_call_daemon");
1867
1868 thread_call_daemon_continue(NULL);
1869 /* NOTREACHED */
1870 }
1871
1872 /*
1873 * Schedule timer to deallocate a worker thread if we have a surplus
1874 * of threads (in excess of the group's target) and at least one thread
1875 * is idle the whole time.
1876 */
1877 static void
thread_call_start_deallocate_timer(thread_call_group_t group)1878 thread_call_start_deallocate_timer(thread_call_group_t group)
1879 {
1880 __assert_only bool already_enqueued;
1881
1882 assert(group->idle_count > 0);
1883 assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
1884
1885 group->tcg_flags |= TCG_DEALLOC_ACTIVE;
1886
1887 uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1888
1889 already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
1890
1891 assert(already_enqueued == false);
1892 }
1893
1894 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1895 void
thread_call_delayed_timer(timer_call_param_t p0,timer_call_param_t p1)1896 thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1897 {
1898 thread_call_group_t group = (thread_call_group_t) p0;
1899 thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1900
1901 thread_call_t call;
1902 uint64_t now;
1903
1904 thread_call_lock_spin(group);
1905
1906 if (flavor == TCF_CONTINUOUS) {
1907 now = mach_continuous_time();
1908 } else if (flavor == TCF_ABSOLUTE) {
1909 now = mach_absolute_time();
1910 } else {
1911 panic("invalid timer flavor: %d", flavor);
1912 }
1913
1914 while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
1915 struct thread_call, tc_pqlink)) != NULL) {
1916 assert(thread_call_get_group(call) == group);
1917 assert(thread_call_get_flavor(call) == flavor);
1918
1919 /*
1920 * if we hit a call that isn't yet ready to expire,
1921 * then we're done for now
1922 * TODO: The next timer in the list could have a larger leeway
1923 * and therefore be ready to expire.
1924 */
1925 if (call->tc_soft_deadline > now) {
1926 break;
1927 }
1928
1929 /*
1930 * If we hit a rate-limited timer, don't eagerly wake it up.
1931 * Wait until it reaches the end of the leeway window.
1932 *
1933 * TODO: What if the next timer is not rate-limited?
1934 * Have a separate rate-limited queue to avoid this
1935 */
1936 if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1937 (call->tc_pqlink.deadline > now) &&
1938 (ml_timer_forced_evaluation() == FALSE)) {
1939 break;
1940 }
1941
1942 if (THREAD_CALL_SIGNAL & call->tc_flags) {
1943 __assert_only queue_head_t *old_queue;
1944 old_queue = thread_call_dequeue(call);
1945 assert(old_queue == &group->delayed_queues[flavor]);
1946
1947 do {
1948 thread_call_func_t func = call->tc_func;
1949 thread_call_param_t param0 = call->tc_param0;
1950 thread_call_param_t param1 = call->tc_param1;
1951
1952 call->tc_flags |= THREAD_CALL_RUNNING;
1953
1954 thread_call_unlock(group);
1955 thread_call_invoke(func, param0, param1, call);
1956 thread_call_lock_spin(group);
1957
1958 /* finish may detect that the call has been re-pended */
1959 } while (thread_call_finish(call, group, NULL));
1960 /* call may have been freed by the finish */
1961 } else {
1962 _pending_call_enqueue(call, group, now);
1963 }
1964 }
1965
1966 _arm_delayed_call_timer(call, group, flavor);
1967
1968 thread_call_unlock(group);
1969 }
1970
1971 static void
thread_call_delayed_timer_rescan(thread_call_group_t group,thread_call_flavor_t flavor)1972 thread_call_delayed_timer_rescan(thread_call_group_t group,
1973 thread_call_flavor_t flavor)
1974 {
1975 thread_call_t call;
1976 uint64_t now;
1977
1978 spl_t s = disable_ints_and_lock(group);
1979
1980 assert(ml_timer_forced_evaluation() == TRUE);
1981
1982 if (flavor == TCF_CONTINUOUS) {
1983 now = mach_continuous_time();
1984 } else {
1985 now = mach_absolute_time();
1986 }
1987
1988 qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
1989 if (call->tc_soft_deadline <= now) {
1990 _pending_call_enqueue(call, group, now);
1991 } else {
1992 uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
1993 assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
1994 /*
1995 * On a latency quality-of-service level change,
1996 * re-sort potentially rate-limited callout. The platform
1997 * layer determines which timers require this.
1998 *
1999 * This trick works by updating the deadline value to
2000 * equal soft-deadline, effectively crushing away
2001 * timer coalescing slop values for any armed
2002 * timer in the queue.
2003 *
2004 * TODO: keep a hint on the timer to tell whether its inputs changed, so we
2005 * only have to crush coalescing for timers that need it.
2006 *
2007 * TODO: Keep a separate queue of timers above the re-sort
2008 * threshold, so we only have to look at those.
2009 */
2010 if (timer_resort_threshold(skew)) {
2011 _call_dequeue(call, group);
2012 _delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
2013 }
2014 }
2015 }
2016
2017 _arm_delayed_call_timer(NULL, group, flavor);
2018
2019 enable_ints_and_unlock(group, s);
2020 }
2021
2022 void
thread_call_delayed_timer_rescan_all(void)2023 thread_call_delayed_timer_rescan_all(void)
2024 {
2025 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
2026 for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
2027 thread_call_delayed_timer_rescan(&thread_call_groups[i], flavor);
2028 }
2029 }
2030 }
2031
2032 /*
2033 * Timer callback to tell a thread to terminate if
2034 * we have an excess of threads and at least one has been
2035 * idle for a long time.
2036 */
2037 static void
thread_call_dealloc_timer(timer_call_param_t p0,__unused timer_call_param_t p1)2038 thread_call_dealloc_timer(
2039 timer_call_param_t p0,
2040 __unused timer_call_param_t p1)
2041 {
2042 thread_call_group_t group = (thread_call_group_t)p0;
2043 uint64_t now;
2044 kern_return_t res;
2045 bool terminated = false;
2046
2047 thread_call_lock_spin(group);
2048
2049 assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
2050
2051 now = mach_absolute_time();
2052
2053 if (group->idle_count > 0) {
2054 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
2055 terminated = true;
2056 group->idle_count--;
2057 res = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
2058 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
2059 if (res != KERN_SUCCESS) {
2060 panic("Unable to wake up idle thread for termination?");
2061 }
2062 }
2063 }
2064
2065 group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
2066
2067 /*
2068 * If we still have an excess of threads, schedule another
2069 * invocation of this function.
2070 */
2071 if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
2072 /*
2073 * If we killed someone just now, push out the
2074 * next deadline.
2075 */
2076 if (terminated) {
2077 group->idle_timestamp = now;
2078 }
2079
2080 thread_call_start_deallocate_timer(group);
2081 }
2082
2083 thread_call_unlock(group);
2084 }
2085
2086 /*
2087 * Wait for the invocation of the thread call to complete
2088 * We know there's only one in flight because of the 'once' flag.
2089 *
2090 * If a subsequent invocation comes in before we wake up, that's OK
2091 *
2092 * TODO: Here is where we will add priority inheritance to the thread executing
2093 * the thread call in case it's lower priority than the current thread
2094 * <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2095 *
2096 * Takes the thread call lock locked, returns unlocked
2097 * This lets us avoid a spurious take/drop after waking up from thread_block
2098 *
2099 * This thread could be a thread call thread itself, blocking and therefore making a
2100 * sched_call upcall into the thread call subsystem, needing the group lock.
2101 * However, we're saved from deadlock because the 'block' upcall is made in
2102 * thread_block, not in assert_wait.
2103 */
2104 static bool
thread_call_wait_once_locked(thread_call_t call,spl_t s)2105 thread_call_wait_once_locked(thread_call_t call, spl_t s)
2106 {
2107 assert(call->tc_flags & THREAD_CALL_ALLOC);
2108 assert(call->tc_flags & THREAD_CALL_ONCE);
2109
2110 thread_call_group_t group = thread_call_get_group(call);
2111
2112 if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
2113 enable_ints_and_unlock(group, s);
2114 return false;
2115 }
2116
2117 /* call is running, so we have to wait for it */
2118 call->tc_flags |= THREAD_CALL_WAIT;
2119
2120 wait_result_t res = waitq_assert_wait64(&group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, 0);
2121 if (res != THREAD_WAITING) {
2122 panic("Unable to assert wait: %d", res);
2123 }
2124
2125 enable_ints_and_unlock(group, s);
2126
2127 res = thread_block(THREAD_CONTINUE_NULL);
2128 if (res != THREAD_AWAKENED) {
2129 panic("Awoken with %d?", res);
2130 }
2131
2132 /* returns unlocked */
2133 return true;
2134 }
2135
2136 /*
2137 * Wait for an in-flight invocation to complete
2138 * Does NOT try to cancel, so the client doesn't need to hold their
2139 * lock while calling this function.
2140 *
2141 * Returns whether or not it had to wait.
2142 *
2143 * Only works for THREAD_CALL_ONCE calls.
2144 */
2145 boolean_t
thread_call_wait_once(thread_call_t call)2146 thread_call_wait_once(thread_call_t call)
2147 {
2148 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
2149 panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
2150 }
2151
2152 if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
2153 panic("thread_call_wait_once: can't wait_once on a non-once call");
2154 }
2155
2156 if (!ml_get_interrupts_enabled()) {
2157 panic("unsafe thread_call_wait_once");
2158 }
2159
2160 thread_t self = current_thread();
2161
2162 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
2163 self->thc_state && self->thc_state->thc_call == call) {
2164 panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2165 call, call->tc_func);
2166 }
2167
2168 thread_call_group_t group = thread_call_get_group(call);
2169
2170 spl_t s = disable_ints_and_lock(group);
2171
2172 bool waited = thread_call_wait_once_locked(call, s);
2173 /* thread call lock unlocked */
2174
2175 return waited;
2176 }
2177
2178
2179 /*
2180 * Wait for all requested invocations of a thread call prior to now
2181 * to finish. Can only be invoked on thread calls whose storage we manage.
2182 * Just waits for the finish count to catch up to the submit count we find
2183 * at the beginning of our wait.
2184 *
2185 * Called with thread_call_lock held. Returns with lock released.
2186 */
2187 static void
thread_call_wait_locked(thread_call_t call,spl_t s)2188 thread_call_wait_locked(thread_call_t call, spl_t s)
2189 {
2190 thread_call_group_t group = thread_call_get_group(call);
2191
2192 assert(call->tc_flags & THREAD_CALL_ALLOC);
2193
2194 uint64_t submit_count = call->tc_submit_count;
2195
2196 while (call->tc_finish_count < submit_count) {
2197 call->tc_flags |= THREAD_CALL_WAIT;
2198
2199 wait_result_t res = waitq_assert_wait64(&group->waiters_waitq,
2200 CAST_EVENT64_T(call), THREAD_UNINT, 0);
2201
2202 if (res != THREAD_WAITING) {
2203 panic("Unable to assert wait: %d", res);
2204 }
2205
2206 enable_ints_and_unlock(group, s);
2207
2208 res = thread_block(THREAD_CONTINUE_NULL);
2209 if (res != THREAD_AWAKENED) {
2210 panic("Awoken with %d?", res);
2211 }
2212
2213 s = disable_ints_and_lock(group);
2214 }
2215
2216 enable_ints_and_unlock(group, s);
2217 }
2218
2219 /*
2220 * Determine whether a thread call is either on a queue or
2221 * currently being executed.
2222 */
2223 boolean_t
thread_call_isactive(thread_call_t call)2224 thread_call_isactive(thread_call_t call)
2225 {
2226 thread_call_group_t group = thread_call_get_group(call);
2227
2228 spl_t s = disable_ints_and_lock(group);
2229 boolean_t active = (call->tc_submit_count > call->tc_finish_count);
2230 enable_ints_and_unlock(group, s);
2231
2232 return active;
2233 }
2234
2235 /*
2236 * adjust_cont_time_thread_calls
2237 * on wake, reenqueue delayed call timer for continuous time thread call groups
2238 */
2239 void
adjust_cont_time_thread_calls(void)2240 adjust_cont_time_thread_calls(void)
2241 {
2242 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
2243 thread_call_group_t group = &thread_call_groups[i];
2244 spl_t s = disable_ints_and_lock(group);
2245
2246 /* only the continuous timers need to be re-armed */
2247
2248 _arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
2249 enable_ints_and_unlock(group, s);
2250 }
2251 }
2252