xref: /xnu-8020.140.41/osfmk/kern/thread_group.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <kern/kern_types.h>
31 #include <kern/processor.h>
32 #include <kern/thread.h>
33 #include <kern/zalloc.h>
34 #include <kern/task.h>
35 #include <kern/machine.h>
36 #include <kern/coalition.h>
37 #include <sys/errno.h>
38 #include <kern/queue.h>
39 #include <kern/locks.h>
40 #include <kern/thread_group.h>
41 #include <kern/sched_clutch.h>
42 
43 #if CONFIG_THREAD_GROUPS
44 
45 #define TG_MACHINE_DATA_ALIGN_SIZE (16)
46 
47 struct thread_group {
48 	uint64_t                tg_id;
49 	char                    tg_name[THREAD_GROUP_MAXNAME];
50 	struct os_refcnt        tg_refcount;
51 	struct {
52 		uint32_t                tg_flags;
53 		cluster_type_t          tg_recommendation;
54 	};
55 	/* We make the mpsc destroy chain link a separate field here because while
56 	 * refs = 0 and the thread group is enqueued on the daemon queue, CLPC
57 	 * (which does not hold an explicit ref) is still under the assumption that
58 	 * this thread group is alive and may provide recommendation changes/updates
59 	 * to it. As such, we need to make sure that all parts of the thread group
60 	 * structure are valid.
61 	 */
62 	struct mpsc_queue_chain tg_destroy_link;
63 	queue_chain_t           tg_queue_chain;
64 #if CONFIG_SCHED_CLUTCH
65 	struct sched_clutch     tg_sched_clutch;
66 #endif /* CONFIG_SCHED_CLUTCH */
67 	uint8_t                 tg_machine_data[] __attribute__((aligned(TG_MACHINE_DATA_ALIGN_SIZE)));
68 } __attribute__((aligned(8)));
69 
70 static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
71 static uint32_t tg_count;
72 static queue_head_t tg_queue;
73 static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
74 static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
75 static LCK_MTX_DECLARE(tg_flags_update_lock, &tg_lck_grp);
76 
77 static uint64_t tg_next_id = 0;
78 static uint32_t tg_size;
79 static uint32_t tg_machine_data_size;
80 static uint32_t perf_controller_thread_group_immediate_ipi;
81 static struct thread_group *tg_system;
82 static struct thread_group *tg_background;
83 static struct thread_group *tg_vm;
84 static struct thread_group *tg_io_storage;
85 static struct thread_group *tg_perf_controller;
86 int tg_set_by_bankvoucher;
87 
88 static bool thread_group_retain_try(struct thread_group *tg);
89 
90 static struct mpsc_daemon_queue thread_group_deallocate_queue;
91 static void thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,
92     __assert_only mpsc_daemon_queue_t dq);
93 
94 /*
95  * Initialize thread groups at boot
96  */
97 void
thread_group_init(void)98 thread_group_init(void)
99 {
100 	// Get thread group structure extension from EDT or boot-args (which can override EDT)
101 	if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
102 		if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
103 			tg_machine_data_size = 8;
104 		}
105 	}
106 
107 	if (!PE_parse_boot_argn("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
108 		if (!PE_get_default("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
109 			perf_controller_thread_group_immediate_ipi = 0;
110 		}
111 	}
112 
113 	// Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
114 	if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
115 		if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
116 			tg_set_by_bankvoucher = 1;
117 		}
118 	}
119 
120 	tg_size = sizeof(struct thread_group) + tg_machine_data_size;
121 	if (tg_size % TG_MACHINE_DATA_ALIGN_SIZE) {
122 		tg_size += TG_MACHINE_DATA_ALIGN_SIZE - (tg_size % TG_MACHINE_DATA_ALIGN_SIZE);
123 	}
124 	tg_machine_data_size = tg_size - sizeof(struct thread_group);
125 	// printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
126 	assert(offsetof(struct thread_group, tg_machine_data) % TG_MACHINE_DATA_ALIGN_SIZE == 0);
127 	tg_zone = zone_create("thread_groups", tg_size, ZC_ALIGNMENT_REQUIRED);
128 
129 	queue_head_init(tg_queue);
130 	tg_system = thread_group_create_and_retain(FALSE);
131 	thread_group_set_name(tg_system, "system");
132 	tg_background = thread_group_create_and_retain(FALSE);
133 	thread_group_set_name(tg_background, "background");
134 	lck_mtx_lock(&tg_lock);
135 	tg_next_id++;  // Skip ID 2, which used to be the "adaptive" group. (It was never used.)
136 	lck_mtx_unlock(&tg_lock);
137 	tg_vm = thread_group_create_and_retain(FALSE);
138 	thread_group_set_name(tg_vm, "VM");
139 	tg_io_storage = thread_group_create_and_retain(FALSE);
140 	thread_group_set_name(tg_io_storage, "io storage");
141 	tg_perf_controller = thread_group_create_and_retain(FALSE);
142 	thread_group_set_name(tg_perf_controller, "perf_controller");
143 
144 	/*
145 	 * If CLPC is disabled, it would recommend SMP for all thread groups.
146 	 * In that mode, the scheduler would like to restrict the kernel thread
147 	 * groups to the E-cluster while all other thread groups are run on the
148 	 * P-cluster. To identify the kernel thread groups, mark them with a
149 	 * special flag THREAD_GROUP_FLAGS_SMP_RESTRICT which is looked at by
150 	 * recommended_pset_type().
151 	 */
152 	tg_system->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
153 	tg_vm->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
154 	tg_io_storage->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
155 	tg_perf_controller->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
156 
157 	/*
158 	 * The thread group deallocation queue must be a thread call based queue
159 	 * because it is woken up from contexts where the thread lock is held. The
160 	 * only way to perform wakeups safely in those contexts is to wakeup a
161 	 * thread call which is guaranteed to be on a different waitq and would
162 	 * not hash onto the same global waitq which might be currently locked.
163 	 */
164 	mpsc_daemon_queue_init_with_thread_call(&thread_group_deallocate_queue,
165 	    thread_group_deallocate_queue_invoke, THREAD_CALL_PRIORITY_KERNEL,
166 	    MPSC_DAEMON_INIT_NONE);
167 }
168 
169 #if CONFIG_SCHED_CLUTCH
170 /*
171  * sched_clutch_for_thread
172  *
173  * The routine provides a back linkage from the thread to the
174  * sched_clutch it belongs to. This relationship is based on the
175  * thread group membership of the thread. Since that membership is
176  * changed from the thread context with the thread lock held, this
177  * linkage should be looked at only with the thread lock held or
178  * when the thread cannot be running (for eg. the thread is in the
179  * runq and being removed as part of thread_select().
180  */
181 sched_clutch_t
sched_clutch_for_thread(thread_t thread)182 sched_clutch_for_thread(thread_t thread)
183 {
184 	assert(thread->thread_group != NULL);
185 	return &(thread->thread_group->tg_sched_clutch);
186 }
187 
188 sched_clutch_t
sched_clutch_for_thread_group(struct thread_group * thread_group)189 sched_clutch_for_thread_group(struct thread_group *thread_group)
190 {
191 	return &(thread_group->tg_sched_clutch);
192 }
193 
194 /*
195  * Translate the TG flags to a priority boost for the sched_clutch.
196  * This priority boost will apply to the entire clutch represented
197  * by the thread group.
198  */
199 static void
sched_clutch_update_tg_flags(sched_clutch_t clutch,uint8_t flags)200 sched_clutch_update_tg_flags(sched_clutch_t clutch, uint8_t flags)
201 {
202 	sched_clutch_tg_priority_t sc_tg_pri = 0;
203 	if (flags & THREAD_GROUP_FLAGS_UI_APP) {
204 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_HIGH;
205 	} else if (flags & THREAD_GROUP_FLAGS_EFFICIENT) {
206 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_LOW;
207 	} else {
208 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_MED;
209 	}
210 	os_atomic_store(&clutch->sc_tg_priority, sc_tg_pri, relaxed);
211 }
212 
213 #endif /* CONFIG_SCHED_CLUTCH */
214 
215 uint64_t
thread_group_id(struct thread_group * tg)216 thread_group_id(struct thread_group *tg)
217 {
218 	return (tg == NULL) ? 0 : tg->tg_id;
219 }
220 
221 #if CONFIG_PREADOPT_TG
222 static inline bool
thread_get_reevaluate_tg_hierarchy_locked(thread_t t)223 thread_get_reevaluate_tg_hierarchy_locked(thread_t t)
224 {
225 	return t->sched_flags & TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
226 }
227 
228 static inline void
thread_set_reevaluate_tg_hierarchy_locked(thread_t t)229 thread_set_reevaluate_tg_hierarchy_locked(thread_t t)
230 {
231 	t->sched_flags |= TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
232 }
233 
234 static inline void
thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)235 thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)
236 {
237 	t->sched_flags &= ~TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
238 }
239 #endif
240 
241 /*
242  * Use a spinlock to protect all thread group flag updates.
243  * The lock should not have heavy contention since these flag updates should
244  * be infrequent. If this lock has contention issues, it should be changed to
245  * a per thread-group lock.
246  *
247  * The lock protects the flags field in the thread_group structure. It is also
248  * held while doing callouts to CLPC to reflect these flag changes.
249  */
250 
251 void
thread_group_flags_update_lock(void)252 thread_group_flags_update_lock(void)
253 {
254 	lck_mtx_lock(&tg_flags_update_lock);
255 }
256 
257 void
thread_group_flags_update_unlock(void)258 thread_group_flags_update_unlock(void)
259 {
260 	lck_mtx_unlock(&tg_flags_update_lock);
261 }
262 
263 /*
264  * Inform platform code about already existing thread groups
265  * or ask it to free state for all thread groups
266  */
267 void
thread_group_resync(boolean_t create)268 thread_group_resync(boolean_t create)
269 {
270 	struct thread_group *tg;
271 
272 	thread_group_flags_update_lock();
273 	lck_mtx_lock(&tg_lock);
274 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
275 		if (create) {
276 			machine_thread_group_init(tg);
277 		} else {
278 			machine_thread_group_deinit(tg);
279 		}
280 	}
281 	lck_mtx_unlock(&tg_lock);
282 	thread_group_flags_update_unlock();
283 }
284 
285 /*
286  * Create new thread group and add new reference to it.
287  */
288 struct thread_group *
thread_group_create_and_retain(boolean_t efficient)289 thread_group_create_and_retain(boolean_t efficient)
290 {
291 	struct thread_group *tg;
292 
293 	tg = zalloc_flags(tg_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
294 	assert((uintptr_t)tg % TG_MACHINE_DATA_ALIGN_SIZE == 0);
295 
296 	if (efficient) {
297 		tg->tg_flags |= THREAD_GROUP_FLAGS_EFFICIENT;
298 	}
299 
300 #if CONFIG_SCHED_CLUTCH
301 	/*
302 	 * The clutch scheduler maintains a bunch of runqs per thread group. For
303 	 * each thread group it maintains a sched_clutch structure. The lifetime
304 	 * of that structure is tied directly to the lifetime of the thread group.
305 	 */
306 	sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
307 
308 	/*
309 	 * Since the thread group flags are used to determine any priority promotions
310 	 * for the threads in the thread group, initialize them now.
311 	 */
312 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
313 
314 #endif /* CONFIG_SCHED_CLUTCH */
315 
316 	lck_mtx_lock(&tg_lock);
317 	tg->tg_id = tg_next_id++;
318 	tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
319 	os_ref_init(&tg->tg_refcount, NULL);
320 	tg_count++;
321 	enqueue_tail(&tg_queue, &tg->tg_queue_chain);
322 
323 	// call machine layer init before this thread group becomes visible
324 	machine_thread_group_init(tg);
325 	lck_mtx_unlock(&tg_lock);
326 
327 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), thread_group_id(tg), thread_group_get_flags(tg));
328 
329 	return tg;
330 }
331 
332 /*
333  * Point newly created thread to its home thread group
334  */
335 void
thread_group_init_thread(thread_t t,task_t task)336 thread_group_init_thread(thread_t t, task_t task)
337 {
338 	struct thread_group *tg = task_coalition_get_thread_group(task);
339 	t->thread_group = tg;
340 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
341 	    THREAD_GROUP_INVALID, thread_group_id(tg), (uintptr_t)thread_tid(t));
342 }
343 
344 /*
345  * Set thread group name
346  */
347 void
thread_group_set_name(__unused struct thread_group * tg,__unused const char * name)348 thread_group_set_name(__unused struct thread_group *tg, __unused const char *name)
349 {
350 	if (name == NULL) {
351 		return;
352 	}
353 	if (!thread_group_retain_try(tg)) {
354 		return;
355 	}
356 	if (tg->tg_name[0] == '\0') {
357 		strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
358 #if defined(__LP64__)
359 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
360 		    tg->tg_id,
361 		    *(uint64_t*)(void*)&tg->tg_name[0],
362 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
363 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
364 		    );
365 #else /* defined(__LP64__) */
366 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
367 		    tg->tg_id,
368 		    *(uint32_t*)(void*)&tg->tg_name[0],
369 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
370 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
371 		    );
372 #endif /* defined(__LP64__) */
373 	}
374 	thread_group_release(tg);
375 }
376 
377 void
thread_group_set_flags(struct thread_group * tg,uint64_t flags)378 thread_group_set_flags(struct thread_group *tg, uint64_t flags)
379 {
380 	thread_group_flags_update_lock();
381 	thread_group_set_flags_locked(tg, flags);
382 	thread_group_flags_update_unlock();
383 }
384 
385 void
thread_group_clear_flags(struct thread_group * tg,uint64_t flags)386 thread_group_clear_flags(struct thread_group *tg, uint64_t flags)
387 {
388 	thread_group_flags_update_lock();
389 	thread_group_clear_flags_locked(tg, flags);
390 	thread_group_flags_update_unlock();
391 }
392 
393 /*
394  * Set thread group flags and perform related actions.
395  * The tg_flags_update_lock should be held.
396  * Currently supported flags are:
397  * - THREAD_GROUP_FLAGS_EFFICIENT
398  * - THREAD_GROUP_FLAGS_UI_APP
399  */
400 
401 void
thread_group_set_flags_locked(struct thread_group * tg,uint64_t flags)402 thread_group_set_flags_locked(struct thread_group *tg, uint64_t flags)
403 {
404 	if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
405 		panic("thread_group_set_flags: Invalid flags %llu", flags);
406 	}
407 
408 	if ((tg->tg_flags & flags) == flags) {
409 		return;
410 	}
411 
412 	__kdebug_only uint64_t old_flags = tg->tg_flags;
413 	tg->tg_flags |= flags;
414 	machine_thread_group_flags_update(tg, tg->tg_flags);
415 #if CONFIG_SCHED_CLUTCH
416 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
417 #endif /* CONFIG_SCHED_CLUTCH */
418 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
419 	    tg->tg_id, tg->tg_flags, old_flags);
420 }
421 
422 /*
423  * Clear thread group flags and perform related actions
424  * The tg_flags_update_lock should be held.
425  * Currently supported flags are:
426  * - THREAD_GROUP_FLAGS_EFFICIENT
427  * - THREAD_GROUP_FLAGS_UI_APP
428  */
429 
430 void
thread_group_clear_flags_locked(struct thread_group * tg,uint64_t flags)431 thread_group_clear_flags_locked(struct thread_group *tg, uint64_t flags)
432 {
433 	if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
434 		panic("thread_group_clear_flags: Invalid flags %llu", flags);
435 	}
436 
437 	if ((tg->tg_flags & flags) == 0) {
438 		return;
439 	}
440 
441 	__kdebug_only uint64_t old_flags = tg->tg_flags;
442 	tg->tg_flags &= ~flags;
443 #if CONFIG_SCHED_CLUTCH
444 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
445 #endif /* CONFIG_SCHED_CLUTCH */
446 	machine_thread_group_flags_update(tg, tg->tg_flags);
447 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
448 	    tg->tg_id, tg->tg_flags, old_flags);
449 }
450 
451 
452 
453 /*
454  * Find thread group with specified name and put new reference to it.
455  */
456 struct thread_group *
thread_group_find_by_name_and_retain(char * name)457 thread_group_find_by_name_and_retain(char *name)
458 {
459 	struct thread_group *result = NULL;
460 
461 	if (name == NULL) {
462 		return NULL;
463 	}
464 
465 	if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
466 		return thread_group_retain(tg_system);
467 	} else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
468 		return thread_group_retain(tg_background);
469 	} else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
470 		return thread_group_retain(tg_perf_controller);
471 	}
472 
473 	struct thread_group *tg;
474 	lck_mtx_lock(&tg_lock);
475 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
476 		if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
477 		    thread_group_retain_try(tg)) {
478 			result = tg;
479 			break;
480 		}
481 	}
482 	lck_mtx_unlock(&tg_lock);
483 	return result;
484 }
485 
486 /*
487  * Find thread group with specified ID and add new reference to it.
488  */
489 struct thread_group *
thread_group_find_by_id_and_retain(uint64_t id)490 thread_group_find_by_id_and_retain(uint64_t id)
491 {
492 	struct thread_group *tg = NULL;
493 	struct thread_group *result = NULL;
494 
495 	switch (id) {
496 	case THREAD_GROUP_SYSTEM:
497 		result = tg_system;
498 		thread_group_retain(tg_system);
499 		break;
500 	case THREAD_GROUP_BACKGROUND:
501 		result = tg_background;
502 		thread_group_retain(tg_background);
503 		break;
504 	case THREAD_GROUP_VM:
505 		result = tg_vm;
506 		thread_group_retain(tg_vm);
507 		break;
508 	case THREAD_GROUP_IO_STORAGE:
509 		result = tg_io_storage;
510 		thread_group_retain(tg_io_storage);
511 		break;
512 	case THREAD_GROUP_PERF_CONTROLLER:
513 		result = tg_perf_controller;
514 		thread_group_retain(tg_perf_controller);
515 		break;
516 	default:
517 		lck_mtx_lock(&tg_lock);
518 		qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
519 			if (tg->tg_id == id && thread_group_retain_try(tg)) {
520 				result = tg;
521 				break;
522 			}
523 		}
524 		lck_mtx_unlock(&tg_lock);
525 	}
526 	return result;
527 }
528 
529 /*
530  * Add new reference to specified thread group
531  */
532 struct thread_group *
thread_group_retain(struct thread_group * tg)533 thread_group_retain(struct thread_group *tg)
534 {
535 	os_ref_retain(&tg->tg_refcount);
536 	return tg;
537 }
538 
539 /*
540  * Similar to thread_group_retain, but fails for thread groups with a
541  * zero reference count. Returns true if retained successfully.
542  */
543 static bool
thread_group_retain_try(struct thread_group * tg)544 thread_group_retain_try(struct thread_group *tg)
545 {
546 	return os_ref_retain_try(&tg->tg_refcount);
547 }
548 
549 static void
thread_group_deallocate_complete(struct thread_group * tg)550 thread_group_deallocate_complete(struct thread_group *tg)
551 {
552 	lck_mtx_lock(&tg_lock);
553 	tg_count--;
554 	remqueue(&tg->tg_queue_chain);
555 	lck_mtx_unlock(&tg_lock);
556 	static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 3), "thread group name is too short");
557 	static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
558 #if defined(__LP64__)
559 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
560 	    tg->tg_id,
561 	    *(uint64_t*)(void*)&tg->tg_name[0],
562 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
563 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
564 	    );
565 #else /* defined(__LP64__) */
566 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
567 	    tg->tg_id,
568 	    *(uint32_t*)(void*)&tg->tg_name[0],
569 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
570 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
571 	    );
572 #endif /* defined(__LP64__) */
573 	machine_thread_group_deinit(tg);
574 #if CONFIG_SCHED_CLUTCH
575 	sched_clutch_destroy(&(tg->tg_sched_clutch));
576 #endif /* CONFIG_SCHED_CLUTCH */
577 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
578 	zfree(tg_zone, tg);
579 }
580 
581 /*
582  * Drop a reference to specified thread group
583  */
584 void
thread_group_release(struct thread_group * tg)585 thread_group_release(struct thread_group *tg)
586 {
587 	if (os_ref_release(&tg->tg_refcount) == 0) {
588 		thread_group_deallocate_complete(tg);
589 	}
590 }
591 
592 void
thread_group_release_live(struct thread_group * tg)593 thread_group_release_live(struct thread_group *tg)
594 {
595 	os_ref_release_live(&tg->tg_refcount);
596 }
597 
598 static void
thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)599 thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e, __assert_only mpsc_daemon_queue_t dq)
600 {
601 	assert(dq == &thread_group_deallocate_queue);
602 	struct thread_group *tg = mpsc_queue_element(e, struct thread_group, tg_destroy_link);
603 
604 	thread_group_deallocate_complete(tg);
605 }
606 
607 void
thread_group_deallocate_safe(struct thread_group * tg)608 thread_group_deallocate_safe(struct thread_group *tg)
609 {
610 	if (os_ref_release(&tg->tg_refcount) == 0) {
611 		mpsc_daemon_enqueue(&thread_group_deallocate_queue, &tg->tg_destroy_link,
612 		    MPSC_QUEUE_NONE);
613 	}
614 }
615 
616 /*
617  * Get thread's current thread group
618  */
619 inline struct thread_group *
thread_group_get(thread_t t)620 thread_group_get(thread_t t)
621 {
622 	return t->thread_group;
623 }
624 
625 struct thread_group *
thread_group_get_home_group(thread_t t)626 thread_group_get_home_group(thread_t t)
627 {
628 	return task_coalition_get_thread_group(get_threadtask(t));
629 }
630 
631 /*
632  * The thread group is resolved according to a hierarchy:
633  *
634  * 1) work interval specified group (explicit API)
635  * 2) Auto-join thread group (wakeup tracking for special work intervals)
636  * 3) bank voucher carried group (implicitly set)
637  * 4) Preadopt thread group (if any)
638  * 5) coalition default thread group (ambient)
639  *
640  * Returns true if the thread's thread group needs to be changed and resolving
641  * TG is passed through in-out param. See also
642  * thread_mark_thread_group_hierarchy_resolved and
643  * thread_set_resolved_thread_group
644  *
645  * Caller should have thread lock. Interrupts are disabled. Thread doesn't have
646  * to be self
647  */
648 static bool
thread_compute_resolved_thread_group(thread_t t,struct thread_group ** resolved_tg)649 thread_compute_resolved_thread_group(thread_t t, struct thread_group **resolved_tg)
650 {
651 	struct thread_group *cur_tg, *tg;
652 	cur_tg = t->thread_group;
653 
654 	tg = thread_group_get_home_group(t);
655 
656 #if CONFIG_PREADOPT_TG
657 	if (t->preadopt_thread_group) {
658 		tg = t->preadopt_thread_group;
659 	}
660 #endif
661 	if (t->bank_thread_group) {
662 		tg = t->bank_thread_group;
663 	}
664 
665 	if (t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN) {
666 		if (t->auto_join_thread_group) {
667 			tg = t->auto_join_thread_group;
668 		}
669 	} else {
670 		if (t->work_interval_thread_group) {
671 			tg = t->work_interval_thread_group;
672 		}
673 	}
674 
675 	*resolved_tg = tg;
676 	return tg != cur_tg;
677 }
678 
679 #if CONFIG_PREADOPT_TG
680 
681 /*
682  * This function is always called after the hierarchy has been resolved. The
683  * caller holds the thread lock
684  */
685 static inline void
thread_assert_has_valid_thread_group(thread_t t)686 thread_assert_has_valid_thread_group(thread_t t)
687 {
688 	__assert_only struct thread_group *home_tg = thread_group_get_home_group(t);
689 
690 	assert(thread_get_reevaluate_tg_hierarchy_locked(t) == false);
691 
692 	__assert_only struct thread_group *resolved_tg;
693 	assert(thread_compute_resolved_thread_group(t, &resolved_tg) == false);
694 
695 	assert((t->thread_group == home_tg) ||
696 	    (t->thread_group == t->preadopt_thread_group) ||
697 	    (t->thread_group == t->bank_thread_group) ||
698 	    (t->thread_group == t->auto_join_thread_group) ||
699 	    (t->thread_group == t->work_interval_thread_group));
700 }
701 #endif
702 
703 /*
704  * This function is called when the thread group hierarchy on the thread_t is
705  * resolved and t->thread_group is the result of the hierarchy resolution. Once
706  * this has happened, there is state that needs to be cleared up which is
707  * handled by this function.
708  *
709  * Prior to this call, we should have either
710  * a) Resolved the hierarchy and discovered no change needed
711  * b) Resolved the hierarchy and modified the t->thread_group
712  */
713 static void
thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)714 thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)
715 {
716 #if CONFIG_PREADOPT_TG
717 	/*
718 	 * We have just reevaluated the thread's hierarchy so we don't need to do it
719 	 * again later.
720 	 */
721 	thread_clear_reevaluate_tg_hierarchy_locked(t);
722 
723 	/*
724 	 * Clear the old_preadopt_thread_group field whose sole purpose was to make
725 	 * sure that t->thread_group didn't have a dangling pointer.
726 	 */
727 	thread_assert_has_valid_thread_group(t);
728 
729 	if (t->old_preadopt_thread_group) {
730 		thread_group_deallocate_safe(t->old_preadopt_thread_group);
731 		t->old_preadopt_thread_group = NULL;
732 	}
733 #endif
734 }
735 
736 /*
737  * Called with thread lock held, always called on self.  This function simply
738  * moves the thread to the right clutch scheduler bucket and informs CLPC of the
739  * change
740  */
741 static void
thread_notify_thread_group_change_self(thread_t t,struct thread_group * __unused old_tg,struct thread_group * __unused new_tg)742 thread_notify_thread_group_change_self(thread_t t, struct thread_group * __unused old_tg,
743     struct thread_group * __unused new_tg)
744 {
745 	assert(current_thread() == t);
746 	assert(old_tg != new_tg);
747 	assert(t->thread_group == new_tg);
748 
749 	uint64_t ctime = mach_approximate_time();
750 	uint64_t arg1, arg2;
751 	machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
752 	machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
753 }
754 
755 /*
756  * Called on any thread with thread lock. Updates the thread_group field on the
757  * thread with the resolved thread group and always make necessary clutch
758  * scheduler callouts. If the thread group is being modified on self,
759  * then also make necessary CLPC callouts.
760  */
761 static void
thread_set_resolved_thread_group(thread_t t,struct thread_group * old_tg,struct thread_group * resolved_tg,bool on_self)762 thread_set_resolved_thread_group(thread_t t, struct thread_group *old_tg,
763     struct thread_group *resolved_tg, bool on_self)
764 {
765 	t->thread_group = resolved_tg;
766 
767 	/* Thread is either running already or is runnable but not on a runqueue */
768 	assert((t->state & (TH_RUN | TH_IDLE)) == TH_RUN);
769 	assert(t->runq == PROCESSOR_NULL);
770 
771 	struct thread_group *home_tg = thread_group_get_home_group(t);
772 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
773 	    thread_group_id(old_tg), thread_group_id(resolved_tg),
774 	    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
775 
776 #if CONFIG_PREADOPT_TG
777 	if (resolved_tg == t->preadopt_thread_group) {
778 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
779 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
780 		    thread_tid(t), thread_group_id(home_tg));
781 	}
782 #endif
783 
784 #if CONFIG_SCHED_CLUTCH
785 	sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
786 	sched_clutch_t new_clutch = (resolved_tg) ? &(resolved_tg->tg_sched_clutch) : NULL;
787 	if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
788 		sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
789 	}
790 #endif
791 
792 	if (on_self) {
793 		assert(t == current_thread());
794 		thread_notify_thread_group_change_self(t, old_tg, resolved_tg);
795 	}
796 
797 	thread_mark_thread_group_hierarchy_resolved(t);
798 }
799 
800 /* Caller has thread lock. Always called on self */
801 static void
thread_resolve_thread_group_hierarchy_self_locked(thread_t t,__unused bool clear_preadopt)802 thread_resolve_thread_group_hierarchy_self_locked(thread_t t, __unused bool clear_preadopt)
803 {
804 	assert(current_thread() == t);
805 
806 #if CONFIG_PREADOPT_TG
807 	struct thread_group *preadopt_tg = NULL;
808 	if (clear_preadopt) {
809 		if (t->preadopt_thread_group) {
810 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
811 			    (uintptr_t)thread_tid(t), thread_group_id(t->preadopt_thread_group), 0, 0);
812 
813 			preadopt_tg = t->preadopt_thread_group;
814 			t->preadopt_thread_group = NULL;
815 		}
816 	}
817 #endif
818 
819 	struct thread_group *resolved_tg = NULL;
820 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
821 
822 	if (needs_change) {
823 		struct thread_group *old_tg = t->thread_group;
824 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
825 	}
826 
827 	/*
828 	 * Regardless of whether we modified the t->thread_group above or not, the
829 	 * hierarchy is now resolved
830 	 */
831 	thread_mark_thread_group_hierarchy_resolved(t);
832 
833 #if CONFIG_PREADOPT_TG
834 	if (preadopt_tg) {
835 		thread_group_deallocate_safe(preadopt_tg);
836 	}
837 #endif
838 }
839 
840 /*
841  * Caller has thread lock, never called on self, always called on a thread not
842  * on a runqueue. This is called from sched_prim.c. Counter part for calling on
843  * self is thread_resolve_thread_group_hierarchy_self
844  */
845 #if CONFIG_PREADOPT_TG
846 void
thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)847 thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)
848 {
849 	assert(t != current_thread());
850 	assert(t->runq == NULL);
851 
852 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
853 		struct thread_group *resolved_tg = NULL;
854 
855 		bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
856 		if (needs_change) {
857 			struct thread_group *old_tg = t->thread_group;
858 			thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
859 		}
860 
861 		/*
862 		 * Regardless of whether we modified the t->thread_group above or not,
863 		 * the hierarchy is now resolved
864 		 */
865 		thread_mark_thread_group_hierarchy_resolved(t);
866 	}
867 }
868 #endif
869 
870 #if CONFIG_PREADOPT_TG
871 /*
872  * The thread being passed can be the current thread and it can also be another
873  * thread which is running on another core. This function is called with spin
874  * locks held (kq and wq lock) but the thread lock is not held by caller.
875  *
876  * The thread always takes a +1 on the thread group and will release the
877  * previous preadoption thread group's reference or stash it.
878  */
879 void
thread_set_preadopt_thread_group(thread_t t,struct thread_group * tg)880 thread_set_preadopt_thread_group(thread_t t, struct thread_group *tg)
881 {
882 	spl_t s = splsched();
883 	thread_lock(t);
884 
885 	/*
886 	 * Assert that this is never called on WindowServer when it has already
887 	 * issued a block callout to CLPC.
888 	 *
889 	 * This should never happen because we don't ever call
890 	 * thread_set_preadopt_thread_group on a servicer after going out to
891 	 * userspace unless we are doing so to/after an unbind
892 	 */
893 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
894 
895 	struct thread_group *old_tg = t->thread_group;
896 	struct thread_group *home_tg = thread_group_get_home_group(t);
897 
898 	/*
899 	 * Since the preadoption thread group can disappear from under you, we need
900 	 * to make sure that the thread_group pointer is always pointing to valid
901 	 * memory.
902 	 *
903 	 * We run the risk of the thread group pointer pointing to dangling memory
904 	 * when the following happens:
905 	 *
906 	 * a) We update the preadopt_thread_group
907 	 * b) We resolve hierarchy and need to change the resolved_thread_group
908 	 * c) For some reason, we are not able to do so and we need to set the
909 	 * resolved thread group later.
910 	 */
911 
912 	/* take the ref from the thread */
913 	struct thread_group *old_preadopt_tg = t->preadopt_thread_group;
914 
915 	if (tg == NULL) {
916 		t->preadopt_thread_group = NULL;
917 		if (old_preadopt_tg != NULL) {
918 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
919 			    thread_tid(t), thread_group_id(old_preadopt_tg), 0, 0);
920 		}
921 	} else {
922 		t->preadopt_thread_group = thread_group_retain(tg);
923 	}
924 
925 	struct thread_group *resolved_tg = NULL;
926 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
927 	if (!needs_change) {
928 		/*
929 		 * Setting preadoption thread group didn't change anything, simply mark
930 		 * the hierarchy as resolved and exit.
931 		 */
932 		thread_mark_thread_group_hierarchy_resolved(t);
933 		goto out;
934 	}
935 
936 	if (t != current_thread()) {
937 		/*
938 		 * We're modifying the thread group of another thread, we need to take
939 		 * action according to the state of the other thread.
940 		 *
941 		 * If the thread is runnable and not yet running, try removing it from
942 		 * the runq, modify it's TG and then reinsert it for reevaluation. If it
943 		 * isn't runnable (already running or started running concurrently, or
944 		 * if it is waiting), then mark a bit having the thread reevaluate its
945 		 * own hierarchy the next time it is being inserted into a runq
946 		 */
947 		if ((t->state & TH_RUN) && (t->runq != PROCESSOR_NULL)) {
948 			/* Thread is runnable but not running */
949 
950 			bool removed_from_runq = thread_run_queue_remove(t);
951 			if (removed_from_runq) {
952 				thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
953 
954 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
955 				    thread_group_id(old_tg), thread_group_id(tg),
956 				    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
957 
958 				thread_run_queue_reinsert(t, SCHED_TAILQ);
959 			} else {
960 				/*
961 				 * We failed to remove it from the runq - it probably started
962 				 * running, let the thread reevaluate the next time it gets
963 				 * enqueued on a runq
964 				 */
965 				thread_set_reevaluate_tg_hierarchy_locked(t);
966 
967 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
968 				    thread_group_id(old_tg), thread_group_id(tg),
969 				    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
970 			}
971 		} else {
972 			/*
973 			 * The thread is not runnable or it is running already - let the
974 			 * thread reevaluate the next time it gets enqueued on a runq
975 			 */
976 			thread_set_reevaluate_tg_hierarchy_locked(t);
977 
978 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
979 			    thread_group_id(old_tg), thread_group_id(tg),
980 			    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
981 		}
982 	} else {
983 		/* We're modifying thread group on ourselves */
984 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
985 
986 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
987 		    thread_group_id(old_tg), thread_group_id(tg),
988 		    thread_tid(t), thread_group_id(home_tg));
989 	}
990 
991 out:
992 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
993 		assert(t->thread_group == old_tg);
994 		/*
995 		 * We need to reevaluate TG hierarchy later as a result of this
996 		 * `thread_set_preadopt_thread_group` operation. This means that the
997 		 * thread group on the thread was pointing to either the home thread
998 		 * group, the preadoption thread group we just replaced, or the old
999 		 * preadoption thread group stashed on the thread.
1000 		 */
1001 		assert(t->thread_group == home_tg ||
1002 		    t->thread_group == old_preadopt_tg ||
1003 		    t->old_preadopt_thread_group);
1004 
1005 		if (t->thread_group == old_preadopt_tg) {
1006 			/*
1007 			 * t->thread_group is pointing to the preadopt thread group we just
1008 			 * replaced. This means the hierarchy was resolved before this call.
1009 			 * Assert that there was no old_preadopt_thread_group on the thread.
1010 			 */
1011 			assert(t->old_preadopt_thread_group == NULL);
1012 			/*
1013 			 * Since t->thread_group is still pointing to the old preadopt thread
1014 			 * group - we need to keep it alive until we reevaluate the hierarchy
1015 			 * next
1016 			 */
1017 			t->old_preadopt_thread_group = old_tg; // transfer ref back to thread
1018 		} else if (old_preadopt_tg != NULL) {
1019 			thread_group_deallocate_safe(old_preadopt_tg);
1020 		}
1021 	} else {
1022 		/* We resolved the hierarchy just now */
1023 		thread_assert_has_valid_thread_group(t);
1024 
1025 		/*
1026 		 * We don't need the old preadopt thread group that we stashed in our
1027 		 * local variable, drop it.
1028 		 */
1029 		if (old_preadopt_tg) {
1030 			thread_group_deallocate_safe(old_preadopt_tg);
1031 		}
1032 	}
1033 	thread_unlock(t);
1034 	splx(s);
1035 	return;
1036 }
1037 
1038 #endif
1039 
1040 /*
1041  * thread_set_thread_group()
1042  *
1043  * Caller must guarantee lifetime of the thread group for the life of the call -
1044  * this overrides the thread group without going through the hierarchy
1045  * resolution. This is for special thread groups like the VM and IO thread
1046  * groups only.
1047  */
1048 static void
thread_set_thread_group(thread_t t,struct thread_group * tg)1049 thread_set_thread_group(thread_t t, struct thread_group *tg)
1050 {
1051 	struct thread_group *home_tg = thread_group_get_home_group(t);
1052 	struct thread_group *old_tg = NULL;
1053 
1054 	spl_t s = splsched();
1055 	old_tg = t->thread_group;
1056 
1057 	if (old_tg != tg) {
1058 		thread_lock(t);
1059 
1060 		assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1061 		t->thread_group = tg;
1062 
1063 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1064 		    thread_group_id(old_tg), thread_group_id(tg),
1065 		    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1066 
1067 		thread_notify_thread_group_change_self(t, old_tg, tg);
1068 
1069 		thread_unlock(t);
1070 	}
1071 
1072 	splx(s);
1073 }
1074 
1075 /* Called without the thread lock held, called on current thread */
1076 void
thread_group_set_bank(thread_t t,struct thread_group * tg)1077 thread_group_set_bank(thread_t t, struct thread_group *tg)
1078 {
1079 	assert(current_thread() == t);
1080 	/* boot arg disables groups in bank */
1081 	if (tg_set_by_bankvoucher == FALSE) {
1082 		return;
1083 	}
1084 
1085 	spl_t s = splsched();
1086 	thread_lock(t);
1087 
1088 	/* This is a borrowed reference from the current bank voucher */
1089 	t->bank_thread_group = tg;
1090 
1091 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1092 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1093 
1094 	thread_unlock(t);
1095 	splx(s);
1096 }
1097 
1098 #if CONFIG_SCHED_AUTO_JOIN
1099 /*
1100  * thread_group_set_autojoin_thread_group_locked()
1101  *
1102  * Sets the thread group of a thread based on auto-join rules and reevaluates
1103  * the hierarchy.
1104  *
1105  * Preconditions:
1106  * - Thread must not be part of a runq (freshly made runnable threads or terminating only)
1107  * - Thread must be locked by the caller already
1108  */
1109 void
thread_set_autojoin_thread_group_locked(thread_t t,struct thread_group * tg)1110 thread_set_autojoin_thread_group_locked(thread_t t, struct thread_group *tg)
1111 {
1112 	assert(t->runq == PROCESSOR_NULL);
1113 
1114 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1115 	t->auto_join_thread_group = tg;
1116 
1117 	struct thread_group *resolved_tg = NULL;
1118 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
1119 
1120 	if (needs_change) {
1121 		struct thread_group *old_tg = t->thread_group;
1122 		struct thread_group *home_tg = thread_group_get_home_group(t);
1123 
1124 		t->thread_group = resolved_tg;
1125 
1126 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1127 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
1128 		    thread_tid(t), thread_group_id(home_tg));
1129 		/*
1130 		 * If the thread group is being changed for the current thread, callout
1131 		 * to CLPC to update the thread's information at that layer. This makes
1132 		 * sure CLPC has consistent state when the current thread is going
1133 		 * off-core.
1134 		 *
1135 		 * Note that we are passing in the PERFCONTROL_CALLOUT_WAKE_UNSAFE flag
1136 		 * to CLPC here (as opposed to 0 in thread_notify_thread_group_change_self)
1137 		 */
1138 		if (t == current_thread()) {
1139 			uint64_t ctime = mach_approximate_time();
1140 			uint64_t arg1, arg2;
1141 			machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
1142 			machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
1143 		}
1144 	}
1145 
1146 	thread_mark_thread_group_hierarchy_resolved(t);
1147 }
1148 #endif
1149 
1150 /* Thread is not locked. Thread is self */
1151 void
thread_set_work_interval_thread_group(thread_t t,struct thread_group * tg)1152 thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg)
1153 {
1154 	assert(current_thread() == t);
1155 	assert(!(t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN));
1156 
1157 	/*
1158 	 * We have a work interval, we don't need the preadoption thread group
1159 	 * anymore (ie, it shouldn't be available for us to jump back to it after
1160 	 * the thread leaves the work interval)
1161 	 */
1162 	spl_t s = splsched();
1163 	thread_lock(t);
1164 
1165 	t->work_interval_thread_group = tg;
1166 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1167 
1168 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1169 
1170 	thread_unlock(t);
1171 	splx(s);
1172 }
1173 
1174 inline cluster_type_t
thread_group_recommendation(struct thread_group * tg)1175 thread_group_recommendation(struct thread_group *tg)
1176 {
1177 	if (tg == NULL) {
1178 		return CLUSTER_TYPE_SMP;
1179 	} else {
1180 		return tg->tg_recommendation;
1181 	}
1182 }
1183 
1184 inline uint64_t
thread_group_get_id(struct thread_group * tg)1185 thread_group_get_id(struct thread_group *tg)
1186 {
1187 	return tg->tg_id;
1188 }
1189 
1190 uint32_t
thread_group_count(void)1191 thread_group_count(void)
1192 {
1193 	return tg_count;
1194 }
1195 
1196 /*
1197  * Can only be called while tg cannot be destroyed
1198  */
1199 inline const char*
thread_group_get_name(struct thread_group * tg)1200 thread_group_get_name(struct thread_group *tg)
1201 {
1202 	return tg->tg_name;
1203 }
1204 
1205 inline void *
thread_group_get_machine_data(struct thread_group * tg)1206 thread_group_get_machine_data(struct thread_group *tg)
1207 {
1208 	return &tg->tg_machine_data;
1209 }
1210 
1211 inline uint32_t
thread_group_machine_data_size(void)1212 thread_group_machine_data_size(void)
1213 {
1214 	return tg_machine_data_size;
1215 }
1216 
1217 inline boolean_t
thread_group_uses_immediate_ipi(struct thread_group * tg)1218 thread_group_uses_immediate_ipi(struct thread_group *tg)
1219 {
1220 	return thread_group_get_id(tg) == THREAD_GROUP_PERF_CONTROLLER && perf_controller_thread_group_immediate_ipi != 0;
1221 }
1222 
1223 kern_return_t
thread_group_iterate_stackshot(thread_group_iterate_fn_t callout,void * arg)1224 thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
1225 {
1226 	struct thread_group *tg;
1227 	int i = 0;
1228 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
1229 		if (tg == NULL || !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
1230 			return KERN_FAILURE;
1231 		}
1232 		callout(arg, i, tg);
1233 		i++;
1234 	}
1235 	return KERN_SUCCESS;
1236 }
1237 
1238 void
thread_group_join_io_storage(void)1239 thread_group_join_io_storage(void)
1240 {
1241 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
1242 	assert(tg != NULL);
1243 	thread_set_thread_group(current_thread(), tg);
1244 }
1245 
1246 void
thread_group_join_perf_controller(void)1247 thread_group_join_perf_controller(void)
1248 {
1249 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
1250 	assert(tg != NULL);
1251 	thread_set_thread_group(current_thread(), tg);
1252 }
1253 
1254 void
thread_group_vm_add(void)1255 thread_group_vm_add(void)
1256 {
1257 	assert(tg_vm != NULL);
1258 	thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM));
1259 }
1260 
1261 uint32_t
thread_group_get_flags(struct thread_group * tg)1262 thread_group_get_flags(struct thread_group *tg)
1263 {
1264 	return tg->tg_flags;
1265 }
1266 
1267 /*
1268  * Returns whether the thread group is restricted to the E-cluster when CLPC is
1269  * turned off.
1270  */
1271 boolean_t
thread_group_smp_restricted(struct thread_group * tg)1272 thread_group_smp_restricted(struct thread_group *tg)
1273 {
1274 	if (tg->tg_flags & THREAD_GROUP_FLAGS_SMP_RESTRICT) {
1275 		return true;
1276 	} else {
1277 		return false;
1278 	}
1279 }
1280 
1281 void
thread_group_update_recommendation(struct thread_group * tg,cluster_type_t new_recommendation)1282 thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
1283 {
1284 	/*
1285 	 * Since the tg->tg_recommendation field is read by CPUs trying to determine
1286 	 * where a thread/thread group needs to be placed, it is important to use
1287 	 * atomic operations to update the recommendation.
1288 	 */
1289 	os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
1290 }
1291 
1292 #if CONFIG_SCHED_EDGE
1293 
1294 int sched_edge_restrict_ut = 1;
1295 int sched_edge_restrict_bg = 1;
1296 
1297 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1298 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1299 {
1300 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1301 	/*
1302 	 * CLUSTER_TYPE_SMP was used for some debugging support when CLPC dynamic control was turned off.
1303 	 * In more recent implementations, CLPC simply recommends "P-spill" when dynamic control is turned off. So it should
1304 	 * never be recommending CLUSTER_TYPE_SMP for thread groups.
1305 	 */
1306 	assert(new_recommendation != CLUSTER_TYPE_SMP);
1307 	/*
1308 	 * The Edge scheduler expects preferred cluster recommendations for each QoS level within a TG. Until the new CLPC
1309 	 * routine is being called, fake out the call from the old CLPC interface.
1310 	 */
1311 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
1312 	/*
1313 	 * For all buckets higher than UT, apply the recommendation to the thread group bucket
1314 	 */
1315 	for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SHARE_UT; bucket++) {
1316 		tg_bucket_preferred_cluster[bucket] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1317 	}
1318 	/* For UT & BG QoS, set the recommendation only if they havent been restricted via sysctls */
1319 	if (!sched_edge_restrict_ut) {
1320 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_UT] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1321 	}
1322 	if (!sched_edge_restrict_bg) {
1323 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_BG] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1324 	}
1325 	sched_perfcontrol_preferred_cluster_options_t options = 0;
1326 	if (new_recommendation == CLUSTER_TYPE_P) {
1327 		options |= SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING;
1328 	}
1329 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1330 }
1331 
1332 void
sched_perfcontrol_edge_matrix_get(sched_clutch_edge * edge_matrix,bool * edge_request_bitmap,uint64_t flags,uint64_t matrix_order)1333 sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t matrix_order)
1334 {
1335 	sched_edge_matrix_get(edge_matrix, edge_request_bitmap, flags, matrix_order);
1336 }
1337 
1338 void
sched_perfcontrol_edge_matrix_set(sched_clutch_edge * edge_matrix,bool * edge_changes_bitmap,uint64_t flags,uint64_t matrix_order)1339 sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t matrix_order)
1340 {
1341 	sched_edge_matrix_set(edge_matrix, edge_changes_bitmap, flags, matrix_order);
1342 }
1343 
1344 void
sched_perfcontrol_thread_group_preferred_clusters_set(void * machine_data,uint32_t tg_preferred_cluster,uint32_t overrides[PERFCONTROL_CLASS_MAX],sched_perfcontrol_preferred_cluster_options_t options)1345 sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
1346     uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
1347 {
1348 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1349 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {
1350 		[TH_BUCKET_FIXPRI]   = (overrides[PERFCONTROL_CLASS_ABOVEUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_ABOVEUI] : tg_preferred_cluster,
1351 		[TH_BUCKET_SHARE_FG] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1352 		[TH_BUCKET_SHARE_IN] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1353 		[TH_BUCKET_SHARE_DF] = (overrides[PERFCONTROL_CLASS_NONUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_NONUI] : tg_preferred_cluster,
1354 		[TH_BUCKET_SHARE_UT] = (overrides[PERFCONTROL_CLASS_UTILITY] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UTILITY] : tg_preferred_cluster,
1355 		[TH_BUCKET_SHARE_BG] = (overrides[PERFCONTROL_CLASS_BACKGROUND] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_BACKGROUND] : tg_preferred_cluster,
1356 	};
1357 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1358 }
1359 
1360 #else /* CONFIG_SCHED_EDGE */
1361 
1362 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1363 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1364 {
1365 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1366 	SCHED(thread_group_recommendation_change)(tg, new_recommendation);
1367 }
1368 
1369 void
sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_request_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1370 sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1371 {
1372 }
1373 
1374 void
sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_changes_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1375 sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1376 {
1377 }
1378 
1379 void
sched_perfcontrol_thread_group_preferred_clusters_set(__unused void * machine_data,__unused uint32_t tg_preferred_cluster,__unused uint32_t overrides[PERFCONTROL_CLASS_MAX],__unused sched_perfcontrol_preferred_cluster_options_t options)1380 sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
1381     __unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
1382 {
1383 }
1384 
1385 #endif /* CONFIG_SCHED_EDGE */
1386 
1387 /*
1388  * Can only be called while tg cannot be destroyed.
1389  * Names can be up to THREAD_GROUP_MAXNAME long and are not necessarily null-terminated.
1390  */
1391 const char*
sched_perfcontrol_thread_group_get_name(void * machine_data)1392 sched_perfcontrol_thread_group_get_name(void *machine_data)
1393 {
1394 	struct thread_group *tg = __container_of(machine_data, struct thread_group, tg_machine_data);
1395 	return thread_group_get_name(tg);
1396 }
1397 
1398 #endif /* CONFIG_THREAD_GROUPS */
1399