1 /*
2 * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <kern/kern_types.h>
31 #include <kern/processor.h>
32 #include <kern/thread.h>
33 #include <kern/zalloc.h>
34 #include <kern/task.h>
35 #include <kern/machine.h>
36 #include <kern/coalition.h>
37 #include <sys/errno.h>
38 #include <kern/queue.h>
39 #include <kern/locks.h>
40 #include <kern/thread_group.h>
41 #include <kern/sched_clutch.h>
42 #include <kern/sched_rt.h>
43
44 #if CONFIG_THREAD_GROUPS
45
46 #define TG_MACHINE_DATA_ALIGN_SIZE (16)
47
48 struct thread_group {
49 uint64_t tg_id;
50 char tg_name[THREAD_GROUP_MAXNAME];
51 struct os_refcnt tg_refcount;
52 struct {
53 uint32_t tg_flags;
54 cluster_type_t tg_recommendation;
55 };
56 /* We make the mpsc destroy chain link a separate field here because while
57 * refs = 0 and the thread group is enqueued on the daemon queue, CLPC
58 * (which does not hold an explicit ref) is still under the assumption that
59 * this thread group is alive and may provide recommendation changes/updates
60 * to it. As such, we need to make sure that all parts of the thread group
61 * structure are valid.
62 */
63 struct mpsc_queue_chain tg_destroy_link;
64 queue_chain_t tg_queue_chain;
65 #if CONFIG_SCHED_CLUTCH
66 struct sched_clutch tg_sched_clutch;
67 #endif /* CONFIG_SCHED_CLUTCH */
68 uint8_t tg_machine_data[] __attribute__((aligned(TG_MACHINE_DATA_ALIGN_SIZE)));
69 } __attribute__((aligned(8)));
70
71 static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
72 static uint32_t tg_count;
73 static queue_head_t tg_queue;
74 static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
75 static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
76 static LCK_MTX_DECLARE(tg_flags_update_lock, &tg_lck_grp);
77
78 static uint64_t tg_next_id = 0;
79 static uint32_t tg_size;
80 static uint32_t tg_machine_data_size;
81 static uint32_t perf_controller_thread_group_immediate_ipi;
82 static struct thread_group *tg_system;
83 static struct thread_group *tg_background;
84 static struct thread_group *tg_vm;
85 static struct thread_group *tg_io_storage;
86 static struct thread_group *tg_cellular;
87 static struct thread_group *tg_perf_controller;
88 int tg_set_by_bankvoucher;
89
90 static bool thread_group_retain_try(struct thread_group *tg);
91
92 static struct mpsc_daemon_queue thread_group_deallocate_queue;
93 static void thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,
94 __assert_only mpsc_daemon_queue_t dq);
95
96 /*
97 * Initialize thread groups at boot
98 */
99 void
thread_group_init(void)100 thread_group_init(void)
101 {
102 // Get thread group structure extension from EDT or boot-args (which can override EDT)
103 if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
104 if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
105 tg_machine_data_size = 8;
106 }
107 }
108
109 if (!PE_parse_boot_argn("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
110 if (!PE_get_default("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
111 perf_controller_thread_group_immediate_ipi = 0;
112 }
113 }
114
115 // Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
116 if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
117 if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
118 tg_set_by_bankvoucher = 1;
119 }
120 }
121
122 tg_size = sizeof(struct thread_group) + tg_machine_data_size;
123 if (tg_size % TG_MACHINE_DATA_ALIGN_SIZE) {
124 tg_size += TG_MACHINE_DATA_ALIGN_SIZE - (tg_size % TG_MACHINE_DATA_ALIGN_SIZE);
125 }
126 tg_machine_data_size = tg_size - sizeof(struct thread_group);
127 // printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
128 assert(offsetof(struct thread_group, tg_machine_data) % TG_MACHINE_DATA_ALIGN_SIZE == 0);
129 tg_zone = zone_create("thread_groups", tg_size, ZC_ALIGNMENT_REQUIRED);
130
131 queue_head_init(tg_queue);
132 tg_system = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
133 thread_group_set_name(tg_system, "system");
134 tg_background = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
135 thread_group_set_name(tg_background, "background");
136 lck_mtx_lock(&tg_lock);
137 tg_next_id++; // Skip ID 2, which used to be the "adaptive" group. (It was never used.)
138 lck_mtx_unlock(&tg_lock);
139 tg_vm = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
140 thread_group_set_name(tg_vm, "VM");
141 tg_io_storage = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
142 thread_group_set_name(tg_io_storage, "io storage");
143 tg_perf_controller = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
144 thread_group_set_name(tg_perf_controller, "perf_controller");
145 tg_cellular = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
146 thread_group_set_name(tg_cellular, "Cellular");
147
148 /*
149 * The thread group deallocation queue must be a thread call based queue
150 * because it is woken up from contexts where the thread lock is held. The
151 * only way to perform wakeups safely in those contexts is to wakeup a
152 * thread call which is guaranteed to be on a different waitq and would
153 * not hash onto the same global waitq which might be currently locked.
154 */
155 mpsc_daemon_queue_init_with_thread_call(&thread_group_deallocate_queue,
156 thread_group_deallocate_queue_invoke, THREAD_CALL_PRIORITY_KERNEL,
157 MPSC_DAEMON_INIT_NONE);
158 }
159
160 #if CONFIG_SCHED_CLUTCH
161 /*
162 * sched_clutch_for_thread
163 *
164 * The routine provides a back linkage from the thread to the
165 * sched_clutch it belongs to. This relationship is based on the
166 * thread group membership of the thread. Since that membership is
167 * changed from the thread context with the thread lock held, this
168 * linkage should be looked at only with the thread lock held or
169 * when the thread cannot be running (for eg. the thread is in the
170 * runq and being removed as part of thread_select().
171 */
172 sched_clutch_t
sched_clutch_for_thread(thread_t thread)173 sched_clutch_for_thread(thread_t thread)
174 {
175 assert(thread->thread_group != NULL);
176 return &(thread->thread_group->tg_sched_clutch);
177 }
178
179 sched_clutch_t
sched_clutch_for_thread_group(struct thread_group * thread_group)180 sched_clutch_for_thread_group(struct thread_group *thread_group)
181 {
182 return &(thread_group->tg_sched_clutch);
183 }
184
185 #endif /* CONFIG_SCHED_CLUTCH */
186
187 uint64_t
thread_group_id(struct thread_group * tg)188 thread_group_id(struct thread_group *tg)
189 {
190 return (tg == NULL) ? 0 : tg->tg_id;
191 }
192
193 #if CONFIG_PREADOPT_TG
194 static inline bool
thread_get_reevaluate_tg_hierarchy_locked(thread_t t)195 thread_get_reevaluate_tg_hierarchy_locked(thread_t t)
196 {
197 return t->sched_flags & TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
198 }
199
200 static inline void
thread_set_reevaluate_tg_hierarchy_locked(thread_t t)201 thread_set_reevaluate_tg_hierarchy_locked(thread_t t)
202 {
203 t->sched_flags |= TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
204 }
205
206 static inline void
thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)207 thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)
208 {
209 t->sched_flags &= ~TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
210 }
211 #endif
212
213 /*
214 * Use a mutex to protect all thread group flag updates.
215 * The lock should not have heavy contention since these flag updates should
216 * be infrequent. If this lock has contention issues, it should be changed to
217 * a per thread-group lock.
218 *
219 * The lock protects the flags field in the thread_group structure. It is also
220 * held while doing callouts to CLPC to reflect these flag changes.
221 */
222
223 void
thread_group_flags_update_lock(void)224 thread_group_flags_update_lock(void)
225 {
226 lck_mtx_lock(&tg_flags_update_lock);
227 }
228
229 void
thread_group_flags_update_unlock(void)230 thread_group_flags_update_unlock(void)
231 {
232 lck_mtx_unlock(&tg_flags_update_lock);
233 }
234
235 /*
236 * Inform platform code about already existing thread groups
237 * or ask it to free state for all thread groups
238 */
239 void
thread_group_resync(boolean_t create)240 thread_group_resync(boolean_t create)
241 {
242 struct thread_group *tg;
243
244 thread_group_flags_update_lock();
245 lck_mtx_lock(&tg_lock);
246 qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
247 if (create) {
248 machine_thread_group_init(tg);
249 } else {
250 machine_thread_group_deinit(tg);
251 }
252 }
253 lck_mtx_unlock(&tg_lock);
254 thread_group_flags_update_unlock();
255 }
256
257 /*
258 * Create new thread group and add new reference to it.
259 */
260 struct thread_group *
thread_group_create_and_retain(uint32_t flags)261 thread_group_create_and_retain(uint32_t flags)
262 {
263 struct thread_group *tg;
264
265 tg = zalloc_flags(tg_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
266 assert((uintptr_t)tg % TG_MACHINE_DATA_ALIGN_SIZE == 0);
267
268 tg->tg_flags = flags;
269
270 #if CONFIG_SCHED_CLUTCH
271 /*
272 * The clutch scheduler maintains a bunch of runqs per thread group. For
273 * each thread group it maintains a sched_clutch structure. The lifetime
274 * of that structure is tied directly to the lifetime of the thread group.
275 */
276 sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
277
278 #endif /* CONFIG_SCHED_CLUTCH */
279
280 lck_mtx_lock(&tg_lock);
281 tg->tg_id = tg_next_id++;
282 tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
283 os_ref_init(&tg->tg_refcount, NULL);
284 tg_count++;
285 enqueue_tail(&tg_queue, &tg->tg_queue_chain);
286
287 // call machine layer init before this thread group becomes visible
288 machine_thread_group_init(tg);
289 lck_mtx_unlock(&tg_lock);
290
291 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), thread_group_id(tg), thread_group_get_flags(tg));
292 if (flags) {
293 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS), thread_group_id(tg), thread_group_get_flags(tg), 0);
294 }
295
296 return tg;
297 }
298
299 /*
300 * Point newly created thread to its home thread group
301 */
302 void
thread_group_init_thread(thread_t t,task_t task)303 thread_group_init_thread(thread_t t, task_t task)
304 {
305 struct thread_group *tg = task_coalition_get_thread_group(task);
306 t->thread_group = tg;
307 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
308 THREAD_GROUP_INVALID, thread_group_id(tg), (uintptr_t)thread_tid(t));
309 }
310
311 /*
312 * Set thread group name
313 */
314 void
thread_group_set_name(__unused struct thread_group * tg,__unused const char * name)315 thread_group_set_name(__unused struct thread_group *tg, __unused const char *name)
316 {
317 if (name == NULL) {
318 return;
319 }
320 if (!thread_group_retain_try(tg)) {
321 return;
322 }
323 if (name[0] != '\0') {
324 strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
325 #if defined(__LP64__)
326 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
327 tg->tg_id,
328 *(uint64_t*)(void*)&tg->tg_name[0],
329 *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
330 *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
331 );
332 #else /* defined(__LP64__) */
333 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
334 tg->tg_id,
335 *(uint32_t*)(void*)&tg->tg_name[0],
336 *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
337 *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
338 );
339 #endif /* defined(__LP64__) */
340 }
341 thread_group_release(tg);
342 }
343
344 void
thread_group_set_flags(struct thread_group * tg,uint32_t flags)345 thread_group_set_flags(struct thread_group *tg, uint32_t flags)
346 {
347 thread_group_flags_update_lock();
348 thread_group_set_flags_locked(tg, flags);
349 thread_group_flags_update_unlock();
350 }
351
352 /*
353 * Return true if flags are valid, false otherwise.
354 * Some flags are mutually exclusive.
355 */
356 boolean_t
thread_group_valid_flags(uint32_t flags)357 thread_group_valid_flags(uint32_t flags)
358 {
359 const uint32_t sflags = flags & ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
360 const uint32_t eflags = flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
361
362 if ((sflags & THREAD_GROUP_FLAGS_SHARED) != sflags) {
363 return false;
364 }
365
366 if ((eflags & THREAD_GROUP_FLAGS_EXCLUSIVE) != eflags) {
367 return false;
368 }
369
370 /* Only one of the exclusive flags may be set. */
371 if (((eflags - 1) & eflags) != 0) {
372 return false;
373 }
374
375 return true;
376 }
377
378 void
thread_group_clear_flags(struct thread_group * tg,uint32_t flags)379 thread_group_clear_flags(struct thread_group *tg, uint32_t flags)
380 {
381 thread_group_flags_update_lock();
382 thread_group_clear_flags_locked(tg, flags);
383 thread_group_flags_update_unlock();
384 }
385
386 /*
387 * Set thread group flags and perform related actions.
388 * The tg_flags_update_lock should be held.
389 * Currently supported flags are listed in the
390 * THREAD_GROUP_FLAGS_EXCLUSIVE and THREAD_GROUP_FLAGS_SHARED masks.
391 */
392 void
thread_group_set_flags_locked(struct thread_group * tg,uint32_t flags)393 thread_group_set_flags_locked(struct thread_group *tg, uint32_t flags)
394 {
395 if (!thread_group_valid_flags(flags)) {
396 panic("thread_group_set_flags: Invalid flags %u", flags);
397 }
398
399 /* Disallow any exclusive flags from being set after creation, with the
400 * exception of moving from default to application */
401 if ((flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) &&
402 !((flags & THREAD_GROUP_FLAGS_APPLICATION) &&
403 (tg->tg_flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) ==
404 THREAD_GROUP_FLAGS_DEFAULT)) {
405 flags &= ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
406 }
407 if ((tg->tg_flags & flags) == flags) {
408 return;
409 }
410
411 if (tg == tg_system) {
412 /*
413 * The system TG is used for kernel and launchd. It is also used
414 * for processes which are getting spawned and do not have a home
415 * TG yet (see task_coalition_get_thread_group()). Make sure the
416 * policies for those processes do not update the flags for the
417 * system TG. The flags for this thread group should only be set
418 * at creation via thread_group_create_and_retain().
419 */
420 return;
421 }
422
423 __kdebug_only uint64_t old_flags = tg->tg_flags;
424 tg->tg_flags |= flags;
425
426 machine_thread_group_flags_update(tg, tg->tg_flags);
427 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
428 tg->tg_id, tg->tg_flags, old_flags);
429 }
430
431 /*
432 * Clear thread group flags and perform related actions
433 * The tg_flags_update_lock should be held.
434 * Currently supported flags are listed in the
435 * THREAD_GROUP_FLAGS_EXCLUSIVE and THREAD_GROUP_FLAGS_SHARED masks.
436 */
437 void
thread_group_clear_flags_locked(struct thread_group * tg,uint32_t flags)438 thread_group_clear_flags_locked(struct thread_group *tg, uint32_t flags)
439 {
440 if (!thread_group_valid_flags(flags)) {
441 panic("thread_group_clear_flags: Invalid flags %u", flags);
442 }
443
444 /* Disallow any exclusive flags from being cleared */
445 if (flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) {
446 flags &= ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
447 }
448 if ((tg->tg_flags & flags) == 0) {
449 return;
450 }
451
452 __kdebug_only uint64_t old_flags = tg->tg_flags;
453 tg->tg_flags &= ~flags;
454 machine_thread_group_flags_update(tg, tg->tg_flags);
455 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
456 tg->tg_id, tg->tg_flags, old_flags);
457 }
458
459
460
461 /*
462 * Find thread group with specified name and put new reference to it.
463 */
464 struct thread_group *
thread_group_find_by_name_and_retain(char * name)465 thread_group_find_by_name_and_retain(char *name)
466 {
467 struct thread_group *result = NULL;
468
469 if (name == NULL) {
470 return NULL;
471 }
472
473 if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
474 return thread_group_retain(tg_system);
475 } else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
476 return thread_group_retain(tg_background);
477 } else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
478 return thread_group_retain(tg_perf_controller);
479 }
480
481 struct thread_group *tg;
482 lck_mtx_lock(&tg_lock);
483 qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
484 if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
485 thread_group_retain_try(tg)) {
486 result = tg;
487 break;
488 }
489 }
490 lck_mtx_unlock(&tg_lock);
491 return result;
492 }
493
494 /*
495 * Find thread group with specified ID and add new reference to it.
496 */
497 struct thread_group *
thread_group_find_by_id_and_retain(uint64_t id)498 thread_group_find_by_id_and_retain(uint64_t id)
499 {
500 struct thread_group *tg = NULL;
501 struct thread_group *result = NULL;
502
503 switch (id) {
504 case THREAD_GROUP_SYSTEM:
505 result = tg_system;
506 thread_group_retain(tg_system);
507 break;
508 case THREAD_GROUP_BACKGROUND:
509 result = tg_background;
510 thread_group_retain(tg_background);
511 break;
512 case THREAD_GROUP_VM:
513 result = tg_vm;
514 thread_group_retain(tg_vm);
515 break;
516 case THREAD_GROUP_IO_STORAGE:
517 result = tg_io_storage;
518 thread_group_retain(tg_io_storage);
519 break;
520 case THREAD_GROUP_PERF_CONTROLLER:
521 result = tg_perf_controller;
522 thread_group_retain(tg_perf_controller);
523 break;
524 case THREAD_GROUP_CELLULAR:
525 result = tg_cellular;
526 thread_group_retain(tg_cellular);
527 break;
528 default:
529 lck_mtx_lock(&tg_lock);
530 qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
531 if (tg->tg_id == id && thread_group_retain_try(tg)) {
532 result = tg;
533 break;
534 }
535 }
536 lck_mtx_unlock(&tg_lock);
537 }
538 return result;
539 }
540
541 /*
542 * Add new reference to specified thread group
543 */
544 struct thread_group *
thread_group_retain(struct thread_group * tg)545 thread_group_retain(struct thread_group *tg)
546 {
547 os_ref_retain(&tg->tg_refcount);
548 return tg;
549 }
550
551 /*
552 * Similar to thread_group_retain, but fails for thread groups with a
553 * zero reference count. Returns true if retained successfully.
554 */
555 static bool
thread_group_retain_try(struct thread_group * tg)556 thread_group_retain_try(struct thread_group *tg)
557 {
558 return os_ref_retain_try(&tg->tg_refcount);
559 }
560
561 static void
thread_group_deallocate_complete(struct thread_group * tg)562 thread_group_deallocate_complete(struct thread_group *tg)
563 {
564 lck_mtx_lock(&tg_lock);
565 tg_count--;
566 remqueue(&tg->tg_queue_chain);
567 lck_mtx_unlock(&tg_lock);
568 static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 3), "thread group name is too short");
569 static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
570 #if defined(__LP64__)
571 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
572 tg->tg_id,
573 *(uint64_t*)(void*)&tg->tg_name[0],
574 *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
575 *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
576 );
577 #else /* defined(__LP64__) */
578 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
579 tg->tg_id,
580 *(uint32_t*)(void*)&tg->tg_name[0],
581 *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
582 *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
583 );
584 #endif /* defined(__LP64__) */
585 machine_thread_group_deinit(tg);
586 #if CONFIG_SCHED_CLUTCH
587 sched_clutch_destroy(&(tg->tg_sched_clutch));
588 #endif /* CONFIG_SCHED_CLUTCH */
589 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
590 zfree(tg_zone, tg);
591 }
592
593 /*
594 * Drop a reference to specified thread group
595 */
596 void
thread_group_release(struct thread_group * tg)597 thread_group_release(struct thread_group *tg)
598 {
599 if (os_ref_release(&tg->tg_refcount) == 0) {
600 thread_group_deallocate_complete(tg);
601 }
602 }
603
604 void
thread_group_release_live(struct thread_group * tg)605 thread_group_release_live(struct thread_group *tg)
606 {
607 os_ref_release_live(&tg->tg_refcount);
608 }
609
610 static void
thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)611 thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e, __assert_only mpsc_daemon_queue_t dq)
612 {
613 assert(dq == &thread_group_deallocate_queue);
614 struct thread_group *tg = mpsc_queue_element(e, struct thread_group, tg_destroy_link);
615
616 thread_group_deallocate_complete(tg);
617 }
618
619 void
thread_group_deallocate_safe(struct thread_group * tg)620 thread_group_deallocate_safe(struct thread_group *tg)
621 {
622 if (os_ref_release(&tg->tg_refcount) == 0) {
623 mpsc_daemon_enqueue(&thread_group_deallocate_queue, &tg->tg_destroy_link,
624 MPSC_QUEUE_NONE);
625 }
626 }
627
628 /*
629 * Get thread's current thread group
630 */
631 inline struct thread_group *
thread_group_get(thread_t t)632 thread_group_get(thread_t t)
633 {
634 return t->thread_group;
635 }
636
637 struct thread_group *
thread_group_get_home_group(thread_t t)638 thread_group_get_home_group(thread_t t)
639 {
640 return task_coalition_get_thread_group(get_threadtask(t));
641 }
642
643 /*
644 * The thread group is resolved according to a hierarchy:
645 *
646 * 1) work interval specified group (explicit API)
647 * 2) Auto-join thread group (wakeup tracking for special work intervals)
648 * 3) bank voucher carried group (implicitly set)
649 * 4) Preadopt thread group (if any)
650 * 5) coalition default thread group (ambient)
651 *
652 * Returns true if the thread's thread group needs to be changed and resolving
653 * TG is passed through in-out param. See also
654 * thread_mark_thread_group_hierarchy_resolved and
655 * thread_set_resolved_thread_group
656 *
657 * Caller should have thread lock. Interrupts are disabled. Thread doesn't have
658 * to be self
659 */
660 static bool
thread_compute_resolved_thread_group(thread_t t,struct thread_group ** resolved_tg)661 thread_compute_resolved_thread_group(thread_t t, struct thread_group **resolved_tg)
662 {
663 struct thread_group *cur_tg, *tg;
664 cur_tg = t->thread_group;
665
666 tg = thread_group_get_home_group(t);
667
668 #if CONFIG_PREADOPT_TG
669 if (t->preadopt_thread_group) {
670 tg = t->preadopt_thread_group;
671 }
672 #endif
673 if (t->bank_thread_group) {
674 tg = t->bank_thread_group;
675 }
676
677 if (t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN) {
678 if (t->auto_join_thread_group) {
679 tg = t->auto_join_thread_group;
680 }
681 } else {
682 if (t->work_interval_thread_group) {
683 tg = t->work_interval_thread_group;
684 }
685 }
686
687 *resolved_tg = tg;
688 return tg != cur_tg;
689 }
690
691 #if CONFIG_PREADOPT_TG
692
693 /*
694 * This function is always called after the hierarchy has been resolved. The
695 * caller holds the thread lock
696 */
697 static inline void
thread_assert_has_valid_thread_group(thread_t t)698 thread_assert_has_valid_thread_group(thread_t t)
699 {
700 __assert_only struct thread_group *home_tg = thread_group_get_home_group(t);
701
702 assert(thread_get_reevaluate_tg_hierarchy_locked(t) == false);
703
704 __assert_only struct thread_group *resolved_tg;
705 assert(thread_compute_resolved_thread_group(t, &resolved_tg) == false);
706
707 assert((t->thread_group == home_tg) ||
708 (t->thread_group == t->preadopt_thread_group) ||
709 (t->thread_group == t->bank_thread_group) ||
710 (t->thread_group == t->auto_join_thread_group) ||
711 (t->thread_group == t->work_interval_thread_group));
712 }
713 #endif
714
715 /*
716 * This function is called when the thread group hierarchy on the thread_t is
717 * resolved and t->thread_group is the result of the hierarchy resolution. Once
718 * this has happened, there is state that needs to be cleared up which is
719 * handled by this function.
720 *
721 * Prior to this call, we should have either
722 * a) Resolved the hierarchy and discovered no change needed
723 * b) Resolved the hierarchy and modified the t->thread_group
724 */
725 static void
thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)726 thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)
727 {
728 #if CONFIG_PREADOPT_TG
729 /*
730 * We have just reevaluated the thread's hierarchy so we don't need to do it
731 * again later.
732 */
733 thread_clear_reevaluate_tg_hierarchy_locked(t);
734
735 /*
736 * Clear the old_preadopt_thread_group field whose sole purpose was to make
737 * sure that t->thread_group didn't have a dangling pointer.
738 */
739 thread_assert_has_valid_thread_group(t);
740
741 if (t->old_preadopt_thread_group) {
742 thread_group_deallocate_safe(t->old_preadopt_thread_group);
743 t->old_preadopt_thread_group = NULL;
744 }
745 #endif
746 }
747
748 /*
749 * Called with thread lock held, always called on self. This function simply
750 * moves the thread to the right clutch scheduler bucket and informs CLPC of the
751 * change
752 */
753 static void
thread_notify_thread_group_change_self(thread_t t,struct thread_group * __unused old_tg,struct thread_group * __unused new_tg)754 thread_notify_thread_group_change_self(thread_t t, struct thread_group * __unused old_tg,
755 struct thread_group * __unused new_tg)
756 {
757 assert(current_thread() == t);
758 assert(old_tg != new_tg);
759 assert(t->thread_group == new_tg);
760
761 uint64_t ctime = mach_approximate_time();
762 uint64_t arg1, arg2;
763 machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
764 machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
765 }
766
767 /*
768 * Called on any thread with thread lock. Updates the thread_group field on the
769 * thread with the resolved thread group and always make necessary clutch
770 * scheduler callouts. If the thread group is being modified on self,
771 * then also make necessary CLPC callouts.
772 */
773 static void
thread_set_resolved_thread_group(thread_t t,struct thread_group * old_tg,struct thread_group * resolved_tg,bool on_self)774 thread_set_resolved_thread_group(thread_t t, struct thread_group *old_tg,
775 struct thread_group *resolved_tg, bool on_self)
776 {
777 t->thread_group = resolved_tg;
778
779 /* Thread is either running already or is runnable but not on a runqueue */
780 assert((t->state & (TH_RUN | TH_IDLE)) == TH_RUN);
781 thread_assert_runq_null(t);
782
783 struct thread_group *home_tg = thread_group_get_home_group(t);
784 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
785 thread_group_id(old_tg), thread_group_id(resolved_tg),
786 (uintptr_t)thread_tid(t), thread_group_id(home_tg));
787
788 #if CONFIG_PREADOPT_TG
789 if (resolved_tg == t->preadopt_thread_group) {
790 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
791 thread_group_id(old_tg), thread_group_id(resolved_tg),
792 thread_tid(t), thread_group_id(home_tg));
793 }
794 #endif
795
796 #if CONFIG_SCHED_CLUTCH
797 sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
798 sched_clutch_t new_clutch = (resolved_tg) ? &(resolved_tg->tg_sched_clutch) : NULL;
799 if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
800 sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
801 }
802 #endif
803
804 if (on_self) {
805 assert(t == current_thread());
806 thread_notify_thread_group_change_self(t, old_tg, resolved_tg);
807 }
808
809 thread_mark_thread_group_hierarchy_resolved(t);
810 }
811
812 /* Caller has thread lock. Always called on self */
813 static void
thread_resolve_thread_group_hierarchy_self_locked(thread_t t,__unused bool clear_preadopt)814 thread_resolve_thread_group_hierarchy_self_locked(thread_t t, __unused bool clear_preadopt)
815 {
816 assert(current_thread() == t);
817
818 #if CONFIG_PREADOPT_TG
819 struct thread_group *preadopt_tg = NULL;
820 if (clear_preadopt) {
821 if (t->preadopt_thread_group) {
822 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
823 (uintptr_t)thread_tid(t), thread_group_id(t->preadopt_thread_group), 0, 0);
824
825 preadopt_tg = t->preadopt_thread_group;
826 t->preadopt_thread_group = NULL;
827 }
828 }
829 #endif
830
831 struct thread_group *resolved_tg = NULL;
832 bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
833
834 if (needs_change) {
835 struct thread_group *old_tg = t->thread_group;
836 thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
837 }
838
839 /*
840 * Regardless of whether we modified the t->thread_group above or not, the
841 * hierarchy is now resolved
842 */
843 thread_mark_thread_group_hierarchy_resolved(t);
844
845 #if CONFIG_PREADOPT_TG
846 if (preadopt_tg) {
847 thread_group_deallocate_safe(preadopt_tg);
848 }
849 #endif
850 }
851
852 /*
853 * Caller has thread lock, never called on self, always called on a thread not
854 * on a runqueue. This is called from sched_prim.c. Counter part for calling on
855 * self is thread_resolve_thread_group_hierarchy_self
856 */
857 #if CONFIG_PREADOPT_TG
858 void
thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)859 thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)
860 {
861 assert(t != current_thread());
862 thread_assert_runq_null(t);
863
864 if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
865 struct thread_group *resolved_tg = NULL;
866
867 bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
868 if (needs_change) {
869 struct thread_group *old_tg = t->thread_group;
870 thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
871 }
872
873 /*
874 * Regardless of whether we modified the t->thread_group above or not,
875 * the hierarchy is now resolved
876 */
877 thread_mark_thread_group_hierarchy_resolved(t);
878 }
879 }
880 #endif
881
882 #if CONFIG_PREADOPT_TG
883 /*
884 * The thread being passed can be the current thread and it can also be another
885 * thread which is running on another core. This function is called with spin
886 * locks held (kq and wq lock) but the thread lock is not held by caller.
887 *
888 * The thread always takes a +1 on the thread group and will release the
889 * previous preadoption thread group's reference or stash it.
890 */
891 void
thread_set_preadopt_thread_group(thread_t t,struct thread_group * tg)892 thread_set_preadopt_thread_group(thread_t t, struct thread_group *tg)
893 {
894 spl_t s = splsched();
895 thread_lock(t);
896
897 /*
898 * Assert that this is never called on WindowServer when it has already
899 * issued a block callout to CLPC.
900 *
901 * This should never happen because we don't ever call
902 * thread_set_preadopt_thread_group on a servicer after going out to
903 * userspace unless we are doing so to/after an unbind
904 */
905 assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
906
907 struct thread_group *old_tg = t->thread_group;
908 struct thread_group *home_tg = thread_group_get_home_group(t);
909
910 /*
911 * Since the preadoption thread group can disappear from under you, we need
912 * to make sure that the thread_group pointer is always pointing to valid
913 * memory.
914 *
915 * We run the risk of the thread group pointer pointing to dangling memory
916 * when the following happens:
917 *
918 * a) We update the preadopt_thread_group
919 * b) We resolve hierarchy and need to change the resolved_thread_group
920 * c) For some reason, we are not able to do so and we need to set the
921 * resolved thread group later.
922 */
923
924 /* take the ref from the thread */
925 struct thread_group *old_preadopt_tg = t->preadopt_thread_group;
926
927 if (tg == NULL) {
928 t->preadopt_thread_group = NULL;
929 if (old_preadopt_tg != NULL) {
930 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
931 thread_tid(t), thread_group_id(old_preadopt_tg), 0, 0);
932 }
933 } else {
934 t->preadopt_thread_group = thread_group_retain(tg);
935 }
936
937 struct thread_group *resolved_tg = NULL;
938 bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
939 if (!needs_change) {
940 /*
941 * Setting preadoption thread group didn't change anything, simply mark
942 * the hierarchy as resolved and exit.
943 */
944 thread_mark_thread_group_hierarchy_resolved(t);
945 goto out;
946 }
947
948 if (t != current_thread()) {
949 /*
950 * We're modifying the thread group of another thread, we need to take
951 * action according to the state of the other thread.
952 *
953 * Try removing the thread from its runq, modify its TG and then
954 * reinsert it for reevaluation. If the thread isn't runnable (already
955 * running, started running concurrently, or in a waiting state), then
956 * mark a bit that will cause the thread to reevaluate its own
957 * hierarchy the next time it is being inserted into a runq
958 */
959 if (thread_run_queue_remove(t)) {
960 /* Thread is runnable and we successfully removed it from the runq */
961 thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
962
963 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
964 thread_group_id(old_tg), thread_group_id(tg),
965 (uintptr_t)thread_tid(t), thread_group_id(home_tg));
966
967 thread_run_queue_reinsert(t, SCHED_TAILQ);
968 } else {
969 /*
970 * The thread is not runnable or it is running already - let the
971 * thread reevaluate the next time it gets enqueued on a runq
972 */
973 thread_set_reevaluate_tg_hierarchy_locked(t);
974
975 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
976 thread_group_id(old_tg), thread_group_id(tg),
977 (uintptr_t)thread_tid(t), thread_group_id(home_tg));
978 }
979 } else {
980 /* We're modifying thread group on ourselves */
981 thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
982
983 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
984 thread_group_id(old_tg), thread_group_id(tg),
985 thread_tid(t), thread_group_id(home_tg));
986 }
987
988 out:
989 if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
990 assert(t->thread_group == old_tg);
991 /*
992 * We need to reevaluate TG hierarchy later as a result of this
993 * `thread_set_preadopt_thread_group` operation. This means that the
994 * thread group on the thread was pointing to either the home thread
995 * group, the preadoption thread group we just replaced, or the old
996 * preadoption thread group stashed on the thread.
997 */
998 assert(t->thread_group == home_tg ||
999 t->thread_group == old_preadopt_tg ||
1000 t->old_preadopt_thread_group);
1001
1002 if (t->thread_group == old_preadopt_tg) {
1003 /*
1004 * t->thread_group is pointing to the preadopt thread group we just
1005 * replaced. This means the hierarchy was resolved before this call.
1006 * Assert that there was no old_preadopt_thread_group on the thread.
1007 */
1008 assert(t->old_preadopt_thread_group == NULL);
1009 /*
1010 * Since t->thread_group is still pointing to the old preadopt thread
1011 * group - we need to keep it alive until we reevaluate the hierarchy
1012 * next
1013 */
1014 t->old_preadopt_thread_group = old_tg; // transfer ref back to thread
1015 } else if (old_preadopt_tg != NULL) {
1016 thread_group_deallocate_safe(old_preadopt_tg);
1017 }
1018 } else {
1019 /* We resolved the hierarchy just now */
1020 thread_assert_has_valid_thread_group(t);
1021
1022 /*
1023 * We don't need the old preadopt thread group that we stashed in our
1024 * local variable, drop it.
1025 */
1026 if (old_preadopt_tg) {
1027 thread_group_deallocate_safe(old_preadopt_tg);
1028 }
1029 }
1030 thread_unlock(t);
1031 splx(s);
1032 return;
1033 }
1034
1035 #endif
1036
1037 /*
1038 * thread_set_thread_group()
1039 *
1040 * Caller must guarantee lifetime of the thread group for the life of the call -
1041 * this overrides the thread group without going through the hierarchy
1042 * resolution. This is for special thread groups like the VM and IO thread
1043 * groups only.
1044 */
1045 static void
thread_set_thread_group(thread_t t,struct thread_group * tg)1046 thread_set_thread_group(thread_t t, struct thread_group *tg)
1047 {
1048 struct thread_group *home_tg = thread_group_get_home_group(t);
1049 struct thread_group *old_tg = NULL;
1050
1051 spl_t s = splsched();
1052 old_tg = t->thread_group;
1053
1054 if (old_tg != tg) {
1055 thread_lock(t);
1056
1057 assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1058 t->thread_group = tg;
1059
1060 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1061 thread_group_id(old_tg), thread_group_id(tg),
1062 (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1063
1064 thread_notify_thread_group_change_self(t, old_tg, tg);
1065
1066 thread_unlock(t);
1067 }
1068
1069 splx(s);
1070 }
1071
1072 /* Called without the thread lock held, called on current thread */
1073 void
thread_group_set_bank(thread_t t,struct thread_group * tg)1074 thread_group_set_bank(thread_t t, struct thread_group *tg)
1075 {
1076 assert(current_thread() == t);
1077 /* boot arg disables groups in bank */
1078 if (tg_set_by_bankvoucher == FALSE) {
1079 return;
1080 }
1081
1082 spl_t s = splsched();
1083 thread_lock(t);
1084
1085 /* This is a borrowed reference from the current bank voucher */
1086 t->bank_thread_group = tg;
1087
1088 assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1089 thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1090
1091 thread_unlock(t);
1092 splx(s);
1093 }
1094
1095 #if CONFIG_SCHED_AUTO_JOIN
1096 /*
1097 * thread_group_set_autojoin_thread_group_locked()
1098 *
1099 * Sets the thread group of a thread based on auto-join rules and reevaluates
1100 * the hierarchy.
1101 *
1102 * Preconditions:
1103 * - Thread must not be part of a runq (freshly made runnable threads or terminating only)
1104 * - Thread must be locked by the caller already
1105 */
1106 void
thread_set_autojoin_thread_group_locked(thread_t t,struct thread_group * tg)1107 thread_set_autojoin_thread_group_locked(thread_t t, struct thread_group *tg)
1108 {
1109 thread_assert_runq_null(t);
1110
1111 assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1112 t->auto_join_thread_group = tg;
1113
1114 struct thread_group *resolved_tg = NULL;
1115 bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
1116
1117 if (needs_change) {
1118 struct thread_group *old_tg = t->thread_group;
1119 struct thread_group *home_tg = thread_group_get_home_group(t);
1120
1121 t->thread_group = resolved_tg;
1122
1123 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1124 thread_group_id(old_tg), thread_group_id(resolved_tg),
1125 thread_tid(t), thread_group_id(home_tg));
1126 /*
1127 * If the thread group is being changed for the current thread, callout
1128 * to CLPC to update the thread's information at that layer. This makes
1129 * sure CLPC has consistent state when the current thread is going
1130 * off-core.
1131 *
1132 * Note that we are passing in the PERFCONTROL_CALLOUT_WAKE_UNSAFE flag
1133 * to CLPC here (as opposed to 0 in thread_notify_thread_group_change_self)
1134 */
1135 if (t == current_thread()) {
1136 uint64_t ctime = mach_approximate_time();
1137 uint64_t arg1, arg2;
1138 machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
1139 machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
1140 }
1141 }
1142
1143 thread_mark_thread_group_hierarchy_resolved(t);
1144 }
1145 #endif
1146
1147 /* Thread is not locked. Thread is self */
1148 void
thread_set_work_interval_thread_group(thread_t t,struct thread_group * tg)1149 thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg)
1150 {
1151 assert(current_thread() == t);
1152 assert(!(t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN));
1153
1154 /*
1155 * We have a work interval, we don't need the preadoption thread group
1156 * anymore (ie, it shouldn't be available for us to jump back to it after
1157 * the thread leaves the work interval)
1158 */
1159 spl_t s = splsched();
1160 thread_lock(t);
1161
1162 t->work_interval_thread_group = tg;
1163 assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1164
1165 thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1166
1167 thread_unlock(t);
1168 splx(s);
1169 }
1170
1171 inline cluster_type_t
thread_group_recommendation(struct thread_group * tg)1172 thread_group_recommendation(struct thread_group *tg)
1173 {
1174 if (tg == NULL) {
1175 return CLUSTER_TYPE_SMP;
1176 } else {
1177 return tg->tg_recommendation;
1178 }
1179 }
1180
1181 inline uint64_t
thread_group_get_id(struct thread_group * tg)1182 thread_group_get_id(struct thread_group *tg)
1183 {
1184 return tg->tg_id;
1185 }
1186
1187 uint32_t
thread_group_count(void)1188 thread_group_count(void)
1189 {
1190 return tg_count;
1191 }
1192
1193 /*
1194 * Can only be called while tg cannot be destroyed
1195 */
1196 inline const char*
thread_group_get_name(struct thread_group * tg)1197 thread_group_get_name(struct thread_group *tg)
1198 {
1199 return tg->tg_name;
1200 }
1201
1202 inline void *
thread_group_get_machine_data(struct thread_group * tg)1203 thread_group_get_machine_data(struct thread_group *tg)
1204 {
1205 return &tg->tg_machine_data;
1206 }
1207
1208 inline uint32_t
thread_group_machine_data_size(void)1209 thread_group_machine_data_size(void)
1210 {
1211 return tg_machine_data_size;
1212 }
1213
1214 inline boolean_t
thread_group_uses_immediate_ipi(struct thread_group * tg)1215 thread_group_uses_immediate_ipi(struct thread_group *tg)
1216 {
1217 return thread_group_get_id(tg) == THREAD_GROUP_PERF_CONTROLLER && perf_controller_thread_group_immediate_ipi != 0;
1218 }
1219
1220 kern_return_t
thread_group_iterate_stackshot(thread_group_iterate_fn_t callout,void * arg)1221 thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
1222 {
1223 struct thread_group *tg;
1224 int i = 0;
1225 qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
1226 if (tg == NULL || !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
1227 return KERN_FAILURE;
1228 }
1229 callout(arg, i, tg);
1230 i++;
1231 }
1232 return KERN_SUCCESS;
1233 }
1234
1235 void
thread_group_join_io_storage(void)1236 thread_group_join_io_storage(void)
1237 {
1238 struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
1239 assert(tg != NULL);
1240 thread_set_thread_group(current_thread(), tg);
1241 }
1242
1243 void
thread_group_join_cellular(void)1244 thread_group_join_cellular(void)
1245 {
1246 struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_CELLULAR);
1247 assert(tg != NULL);
1248 assert(current_thread()->thread_group != tg);
1249 thread_set_thread_group(current_thread(), tg);
1250 }
1251
1252 void
thread_group_join_perf_controller(void)1253 thread_group_join_perf_controller(void)
1254 {
1255 struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
1256 assert(tg != NULL);
1257 thread_set_thread_group(current_thread(), tg);
1258 }
1259
1260 void
thread_group_vm_add(void)1261 thread_group_vm_add(void)
1262 {
1263 assert(tg_vm != NULL);
1264 thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM));
1265 }
1266
1267 uint32_t
thread_group_get_flags(struct thread_group * tg)1268 thread_group_get_flags(struct thread_group *tg)
1269 {
1270 return tg->tg_flags;
1271 }
1272
1273 void
thread_group_update_recommendation(struct thread_group * tg,cluster_type_t new_recommendation)1274 thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
1275 {
1276 /*
1277 * Since the tg->tg_recommendation field is read by CPUs trying to determine
1278 * where a thread/thread group needs to be placed, it is important to use
1279 * atomic operations to update the recommendation.
1280 */
1281 os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
1282 }
1283
1284 #if CONFIG_SCHED_EDGE
1285
1286 OS_NORETURN
1287 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1288 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1289 {
1290 panic("sched_perfcontrol_thread_group_recommend() not supported on the Edge scheduler");
1291 /* Use sched_perfcontrol_thread_group_preferred_clusters_set() instead */
1292 }
1293
1294 static perfcontrol_class_t
sched_bucket_to_perfcontrol_class(sched_bucket_t bucket)1295 sched_bucket_to_perfcontrol_class(sched_bucket_t bucket)
1296 {
1297 switch (bucket) {
1298 case TH_BUCKET_FIXPRI:
1299 return PERFCONTROL_CLASS_ABOVEUI;
1300 case TH_BUCKET_SHARE_FG:
1301 return PERFCONTROL_CLASS_UI;
1302 case TH_BUCKET_SHARE_IN:
1303 return PERFCONTROL_CLASS_USER_INITIATED;
1304 case TH_BUCKET_SHARE_DF:
1305 return PERFCONTROL_CLASS_NONUI;
1306 case TH_BUCKET_SHARE_UT:
1307 return PERFCONTROL_CLASS_UTILITY;
1308 case TH_BUCKET_SHARE_BG:
1309 return PERFCONTROL_CLASS_BACKGROUND;
1310 default:
1311 panic("Unexpected sched bucket %d", bucket);
1312 }
1313 }
1314
1315 #define MAX_EDGE_MATRIX_SIZE (MAX_PSETS * MAX_PSETS * TH_BUCKET_SCHED_MAX)
1316
1317 /*
1318 * Iterate through indices of the edge matrix (dimension: num_psets X num_psets X TH_BUCKET_SCHED_MAX),
1319 * and along the way, compute the corresponding index in CLPC's version of the matrix, which has
1320 * dimension: num_psets X num_psets X PERFCONTROL_CLASS_MAX
1321 */
1322 #define sched_perfcontrol_sched_edge_matrix_iterate(num_psets, edge_ind, sched_ind, ...) \
1323 assert3u((num_psets), ==, sched_num_psets); \
1324 sched_edge_matrix_iterate(src_id, dst_id, bucket, { \
1325 perfcontrol_class_t pc = sched_bucket_to_perfcontrol_class(bucket); \
1326 int edge_ind = (src_id * (int)sched_num_psets * PERFCONTROL_CLASS_MAX) + (dst_id * PERFCONTROL_CLASS_MAX) + pc; \
1327 int sched_ind = (src_id * (int)sched_num_psets * TH_BUCKET_SCHED_MAX) + (dst_id * TH_BUCKET_SCHED_MAX) + bucket; \
1328 __VA_ARGS__; \
1329 })
1330
1331 /* Compute the index of a realtime edge within the perfcontrol matrix. */
1332 static uint64_t
rt_config_edge_index(uint64_t src_pset_id,uint64_t dst_pset_id,uint64_t num_psets)1333 rt_config_edge_index(uint64_t src_pset_id, uint64_t dst_pset_id, uint64_t num_psets)
1334 {
1335 return (src_pset_id * num_psets * PERFCONTROL_CLASS_MAX)
1336 + (dst_pset_id * PERFCONTROL_CLASS_MAX)
1337 + PERFCONTROL_CLASS_REALTIME;
1338 }
1339
1340 void
sched_perfcontrol_edge_matrix_by_qos_get(sched_clutch_edge * edge_matrix,bool * edge_requested,uint64_t flags,uint64_t num_psets,__assert_only uint64_t num_classes)1341 sched_perfcontrol_edge_matrix_by_qos_get(sched_clutch_edge *edge_matrix, bool *edge_requested, uint64_t flags,
1342 uint64_t num_psets, __assert_only uint64_t num_classes)
1343 {
1344 assert3u(num_psets, <=, MAX_PSETS);
1345 assert3u(num_classes, ==, PERFCONTROL_CLASS_MAX);
1346 bool sched_edge_requested[MAX_EDGE_MATRIX_SIZE] = {0};
1347 sched_perfcontrol_sched_edge_matrix_iterate(num_psets, edge_matrix_ind, sched_matrix_ind, {
1348 if (edge_requested[edge_matrix_ind]) {
1349 sched_edge_requested[sched_matrix_ind] = true;
1350 }
1351 });
1352
1353 sched_clutch_edge sched_matrix[MAX_EDGE_MATRIX_SIZE] = {0};
1354 sched_edge_matrix_get(sched_matrix, sched_edge_requested, flags, num_psets);
1355
1356 sched_perfcontrol_sched_edge_matrix_iterate(num_psets, edge_matrix_ind, sched_matrix_ind, {
1357 if (sched_edge_requested[sched_matrix_ind]) {
1358 edge_matrix[edge_matrix_ind] = sched_matrix[sched_matrix_ind];
1359 }
1360 });
1361
1362 bool sched_rt_requested[MAX_PSETS * MAX_PSETS] = {};
1363 for (uint src = 0; src < num_psets; src++) {
1364 for (uint dst = 0; dst < num_psets; dst++) {
1365 const uint64_t edge_matrix_index = rt_config_edge_index(src, dst, num_psets);
1366 if (sched_rt_requested[edge_matrix_index]) {
1367 sched_rt_requested[src * num_psets + dst] = true;
1368 }
1369 }
1370 }
1371
1372 sched_clutch_edge sched_rt_matrix[MAX_PSETS * MAX_PSETS] = {};
1373 sched_rt_matrix_get(sched_rt_matrix, sched_rt_requested, num_psets);
1374
1375 uint64_t rt_matrix_index = 0;
1376 for (uint src = 0; src < num_psets; src++) {
1377 for (uint dst = 0; dst < num_psets; dst++) {
1378 const uint64_t edge_matrix_index = rt_config_edge_index(src, dst, num_psets);
1379 if (edge_requested[edge_matrix_index]) {
1380 edge_matrix[edge_matrix_index] = sched_rt_matrix[rt_matrix_index];
1381 }
1382 rt_matrix_index++;
1383 }
1384 }
1385 }
1386
1387 void
sched_perfcontrol_edge_matrix_by_qos_set(sched_clutch_edge * edge_matrix,bool * edge_changed,uint64_t flags,uint64_t num_psets,__assert_only uint64_t num_classes)1388 sched_perfcontrol_edge_matrix_by_qos_set(sched_clutch_edge *edge_matrix, bool *edge_changed, uint64_t flags,
1389 uint64_t num_psets, __assert_only uint64_t num_classes)
1390 {
1391 assert3u(num_psets, <=, MAX_PSETS);
1392 assert3u(num_classes, ==, PERFCONTROL_CLASS_MAX);
1393 sched_clutch_edge sched_matrix[MAX_EDGE_MATRIX_SIZE] = {0};
1394 bool sched_edge_changed[MAX_EDGE_MATRIX_SIZE] = {0};
1395 sched_perfcontrol_sched_edge_matrix_iterate(num_psets, edge_matrix_ind, sched_matrix_ind, {
1396 if (edge_changed[edge_matrix_ind]) {
1397 sched_matrix[sched_matrix_ind] = edge_matrix[edge_matrix_ind];
1398 sched_edge_changed[sched_matrix_ind] = true;
1399 }
1400 });
1401
1402 sched_edge_matrix_set(sched_matrix, sched_edge_changed, flags, num_psets);
1403
1404 sched_clutch_edge sched_rt_matrix[MAX_PSETS * MAX_PSETS] = {};
1405 bool sched_rt_changed[MAX_PSETS * MAX_PSETS] = {};
1406 for (uint src = 0; src < num_psets; src++) {
1407 for (uint dst = 0; dst < num_psets; dst++) {
1408 const uint64_t edge_matrix_ind = rt_config_edge_index(src, dst, num_psets);
1409 const uint64_t sched_matrix_ind = src * num_psets + dst;
1410 if (edge_changed[edge_matrix_ind]) {
1411 sched_rt_matrix[sched_matrix_ind] = edge_matrix[edge_matrix_ind];
1412 sched_rt_changed[sched_matrix_ind] = true;
1413 }
1414 }
1415 }
1416 sched_rt_matrix_set(sched_rt_matrix, sched_rt_changed, num_psets);
1417 }
1418
1419 void
sched_perfcontrol_edge_matrix_get(sched_clutch_edge * edge_matrix,bool * edge_requested,uint64_t flags,uint64_t matrix_order)1420 sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_requested, uint64_t flags,
1421 uint64_t matrix_order)
1422 {
1423 assert3u(matrix_order, <=, MAX_PSETS);
1424 bool edge_requested_per_qos[MAX_EDGE_MATRIX_SIZE] = {0};
1425 for (uint32_t i = 0; i < matrix_order * matrix_order; i++) {
1426 uint32_t expanded_index = (i * TH_BUCKET_SCHED_MAX) + TH_BUCKET_FIXPRI;
1427 edge_requested_per_qos[expanded_index] = edge_requested[i];
1428 }
1429
1430 sched_clutch_edge expanded_matrix[MAX_EDGE_MATRIX_SIZE] = {0};
1431 sched_edge_matrix_get(expanded_matrix, edge_requested_per_qos, flags, matrix_order);
1432
1433 for (uint32_t i = 0; i < matrix_order * matrix_order; i++) {
1434 if (edge_requested[i]) {
1435 uint32_t expanded_index = (i * TH_BUCKET_SCHED_MAX) + TH_BUCKET_FIXPRI;
1436 edge_matrix[i] = expanded_matrix[expanded_index];
1437 }
1438 }
1439 }
1440
1441 void
sched_perfcontrol_edge_matrix_set(sched_clutch_edge * edge_matrix,bool * edge_changed,uint64_t flags,uint64_t matrix_order)1442 sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changed, uint64_t flags,
1443 uint64_t matrix_order)
1444 {
1445 assert3u(matrix_order, <=, MAX_PSETS);
1446 bool edge_changed_per_qos[MAX_EDGE_MATRIX_SIZE] = {0};
1447 sched_clutch_edge expanded_matrix[MAX_EDGE_MATRIX_SIZE] = {0};
1448 for (uint32_t i = 0; i < matrix_order * matrix_order; i++) {
1449 for (uint32_t bucket = 0; bucket < TH_BUCKET_SCHED_MAX; bucket++) {
1450 uint32_t expanded_index = (i * TH_BUCKET_SCHED_MAX) + bucket;
1451 edge_changed_per_qos[expanded_index] = edge_changed[i];
1452 expanded_matrix[expanded_index] = edge_matrix[i];
1453 }
1454 }
1455
1456 sched_edge_matrix_set(expanded_matrix, edge_changed_per_qos, flags, matrix_order);
1457 }
1458
1459 /*
1460 * Note this may be called in both preemption enabled context as well as in the
1461 * context of the scheduler csw callout / quantum interrupt / timer interrupt
1462 * perfcontrol callouts.
1463 */
1464 void
sched_perfcontrol_thread_group_preferred_clusters_set(void * machine_data,uint32_t tg_preferred_cluster,uint32_t overrides[PERFCONTROL_CLASS_MAX],sched_perfcontrol_preferred_cluster_options_t options)1465 sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
1466 uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
1467 {
1468 struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1469 uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX];
1470 for (sched_bucket_t bucket = 0; bucket < TH_BUCKET_SCHED_MAX; bucket++) {
1471 perfcontrol_class_t pc = sched_bucket_to_perfcontrol_class(bucket);
1472 tg_bucket_preferred_cluster[bucket] = (overrides[pc] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[pc] : tg_preferred_cluster;
1473 }
1474 sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1475 }
1476
1477 void
sched_perfcontrol_edge_cpu_rotation_bitmasks_set(uint32_t cluster_id,uint64_t preferred_bitmask,uint64_t migration_bitmask)1478 sched_perfcontrol_edge_cpu_rotation_bitmasks_set(uint32_t cluster_id, uint64_t preferred_bitmask, uint64_t migration_bitmask)
1479 {
1480 assert(cluster_id < MAX_PSETS);
1481 assert((preferred_bitmask & migration_bitmask) == 0);
1482 processor_set_t pset = pset_array[cluster_id];
1483 pset->perfcontrol_cpu_preferred_bitmask = preferred_bitmask;
1484 pset->perfcontrol_cpu_migration_bitmask = migration_bitmask;
1485 }
1486
1487 void
sched_perfcontrol_edge_cpu_rotation_bitmasks_get(uint32_t cluster_id,uint64_t * preferred_bitmask,uint64_t * migration_bitmask)1488 sched_perfcontrol_edge_cpu_rotation_bitmasks_get(uint32_t cluster_id, uint64_t *preferred_bitmask, uint64_t *migration_bitmask)
1489 {
1490 assert(cluster_id < MAX_PSETS);
1491 processor_set_t pset = pset_array[cluster_id];
1492 *preferred_bitmask = pset->perfcontrol_cpu_preferred_bitmask;
1493 *migration_bitmask = pset->perfcontrol_cpu_migration_bitmask;
1494 }
1495
1496 #else /* CONFIG_SCHED_EDGE */
1497
1498 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1499 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1500 {
1501 struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1502 SCHED(thread_group_recommendation_change)(tg, new_recommendation);
1503 }
1504
1505 void
sched_perfcontrol_edge_matrix_by_qos_get(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_requested,__unused uint64_t flags,__unused uint64_t num_psets,__unused uint64_t num_classes)1506 sched_perfcontrol_edge_matrix_by_qos_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_requested, __unused uint64_t flags,
1507 __unused uint64_t num_psets, __unused uint64_t num_classes)
1508 {
1509 }
1510
1511 void
sched_perfcontrol_edge_matrix_by_qos_set(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_changed,__unused uint64_t flags,__unused uint64_t num_psets,__unused uint64_t num_classes)1512 sched_perfcontrol_edge_matrix_by_qos_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changed, __unused uint64_t flags,
1513 __unused uint64_t num_psets, __unused uint64_t num_classes)
1514 {
1515 }
1516
1517 void
sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_request_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1518 sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1519 {
1520 }
1521
1522 void
sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_changes_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1523 sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1524 {
1525 }
1526
1527 void
sched_perfcontrol_thread_group_preferred_clusters_set(__unused void * machine_data,__unused uint32_t tg_preferred_cluster,__unused uint32_t overrides[PERFCONTROL_CLASS_MAX],__unused sched_perfcontrol_preferred_cluster_options_t options)1528 sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
1529 __unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
1530 {
1531 }
1532
1533 void
sched_perfcontrol_edge_cpu_rotation_bitmasks_set(__unused uint32_t cluster_id,__unused uint64_t preferred_bitmask,__unused uint64_t migration_bitmask)1534 sched_perfcontrol_edge_cpu_rotation_bitmasks_set(__unused uint32_t cluster_id, __unused uint64_t preferred_bitmask, __unused uint64_t migration_bitmask)
1535 {
1536 }
1537
1538 void
sched_perfcontrol_edge_cpu_rotation_bitmasks_get(__unused uint32_t cluster_id,__unused uint64_t * preferred_bitmask,__unused uint64_t * migration_bitmask)1539 sched_perfcontrol_edge_cpu_rotation_bitmasks_get(__unused uint32_t cluster_id, __unused uint64_t *preferred_bitmask, __unused uint64_t *migration_bitmask)
1540 {
1541 }
1542
1543 #endif /* CONFIG_SCHED_EDGE */
1544
1545 /*
1546 * Can only be called while tg cannot be destroyed.
1547 * Names can be up to THREAD_GROUP_MAXNAME long and are not necessarily null-terminated.
1548 */
1549 const char*
sched_perfcontrol_thread_group_get_name(void * machine_data)1550 sched_perfcontrol_thread_group_get_name(void *machine_data)
1551 {
1552 struct thread_group *tg = __container_of(machine_data, struct thread_group, tg_machine_data);
1553 return thread_group_get_name(tg);
1554 }
1555
1556 #endif /* CONFIG_THREAD_GROUPS */
1557