xref: /xnu-8019.80.24/osfmk/kern/thread_group.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea) !
1 /*
2  * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <kern/kern_types.h>
31 #include <kern/processor.h>
32 #include <kern/thread.h>
33 #include <kern/zalloc.h>
34 #include <kern/task.h>
35 #include <kern/machine.h>
36 #include <kern/coalition.h>
37 #include <sys/errno.h>
38 #include <kern/queue.h>
39 #include <kern/locks.h>
40 #include <kern/thread_group.h>
41 #include <kern/sched_clutch.h>
42 
43 #if CONFIG_THREAD_GROUPS
44 
45 #define TG_MACHINE_DATA_ALIGN_SIZE (16)
46 
47 struct thread_group {
48 	uint64_t                tg_id;
49 	char                    tg_name[THREAD_GROUP_MAXNAME];
50 	struct os_refcnt        tg_refcount;
51 	struct {
52 		uint32_t                tg_flags;
53 		cluster_type_t          tg_recommendation;
54 	};
55 	/* We make the mpsc destroy chain link a separate field here because while
56 	 * refs = 0 and the thread group is enqueued on the daemon queue, CLPC
57 	 * (which does not hold an explicit ref) is still under the assumption that
58 	 * this thread group is alive and may provide recommendation changes/updates
59 	 * to it. As such, we need to make sure that all parts of the thread group
60 	 * structure are valid.
61 	 */
62 	struct mpsc_queue_chain tg_destroy_link;
63 	queue_chain_t           tg_queue_chain;
64 #if CONFIG_SCHED_CLUTCH
65 	struct sched_clutch     tg_sched_clutch;
66 #endif /* CONFIG_SCHED_CLUTCH */
67 	uint8_t                 tg_machine_data[] __attribute__((aligned(TG_MACHINE_DATA_ALIGN_SIZE)));
68 } __attribute__((aligned(8)));
69 
70 static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
71 static uint32_t tg_count;
72 static queue_head_t tg_queue;
73 static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
74 static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
75 static LCK_MTX_DECLARE(tg_flags_update_lock, &tg_lck_grp);
76 
77 static uint64_t tg_next_id = 0;
78 static uint32_t tg_size;
79 static uint32_t tg_machine_data_size;
80 static uint32_t perf_controller_thread_group_immediate_ipi;
81 static struct thread_group *tg_system;
82 static struct thread_group *tg_background;
83 static struct thread_group *tg_vm;
84 static struct thread_group *tg_io_storage;
85 static struct thread_group *tg_perf_controller;
86 int tg_set_by_bankvoucher;
87 
88 static bool thread_group_retain_try(struct thread_group *tg);
89 
90 static struct mpsc_daemon_queue thread_group_deallocate_queue;
91 static void thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,
92     __assert_only mpsc_daemon_queue_t dq);
93 
94 /*
95  * Initialize thread groups at boot
96  */
97 void
thread_group_init(void)98 thread_group_init(void)
99 {
100 	// Get thread group structure extension from EDT or boot-args (which can override EDT)
101 	if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
102 		if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
103 			tg_machine_data_size = 8;
104 		}
105 	}
106 
107 	if (!PE_parse_boot_argn("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
108 		if (!PE_get_default("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
109 			perf_controller_thread_group_immediate_ipi = 0;
110 		}
111 	}
112 
113 	// Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
114 	if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
115 		if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
116 			tg_set_by_bankvoucher = 1;
117 		}
118 	}
119 
120 	tg_size = sizeof(struct thread_group) + tg_machine_data_size;
121 	if (tg_size % TG_MACHINE_DATA_ALIGN_SIZE) {
122 		tg_size += TG_MACHINE_DATA_ALIGN_SIZE - (tg_size % TG_MACHINE_DATA_ALIGN_SIZE);
123 	}
124 	tg_machine_data_size = tg_size - sizeof(struct thread_group);
125 	// printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
126 	assert(offsetof(struct thread_group, tg_machine_data) % TG_MACHINE_DATA_ALIGN_SIZE == 0);
127 	tg_zone = zone_create("thread_groups", tg_size, ZC_ALIGNMENT_REQUIRED);
128 
129 	queue_head_init(tg_queue);
130 	tg_system = thread_group_create_and_retain(FALSE);
131 	thread_group_set_name(tg_system, "system");
132 	tg_background = thread_group_create_and_retain(FALSE);
133 	thread_group_set_name(tg_background, "background");
134 	lck_mtx_lock(&tg_lock);
135 	tg_next_id++;  // Skip ID 2, which used to be the "adaptive" group. (It was never used.)
136 	lck_mtx_unlock(&tg_lock);
137 	tg_vm = thread_group_create_and_retain(FALSE);
138 	thread_group_set_name(tg_vm, "VM");
139 	tg_io_storage = thread_group_create_and_retain(FALSE);
140 	thread_group_set_name(tg_io_storage, "io storage");
141 	tg_perf_controller = thread_group_create_and_retain(FALSE);
142 	thread_group_set_name(tg_perf_controller, "perf_controller");
143 
144 	/*
145 	 * If CLPC is disabled, it would recommend SMP for all thread groups.
146 	 * In that mode, the scheduler would like to restrict the kernel thread
147 	 * groups to the E-cluster while all other thread groups are run on the
148 	 * P-cluster. To identify the kernel thread groups, mark them with a
149 	 * special flag THREAD_GROUP_FLAGS_SMP_RESTRICT which is looked at by
150 	 * recommended_pset_type().
151 	 */
152 	tg_system->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
153 	tg_vm->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
154 	tg_io_storage->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
155 	tg_perf_controller->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
156 
157 	/*
158 	 * The thread group deallocation queue must be a thread call based queue
159 	 * because it is woken up from contexts where the thread lock is held. The
160 	 * only way to perform wakeups safely in those contexts is to wakeup a
161 	 * thread call which is guaranteed to be on a different waitq and would
162 	 * not hash onto the same global waitq which might be currently locked.
163 	 */
164 	mpsc_daemon_queue_init_with_thread_call(&thread_group_deallocate_queue,
165 	    thread_group_deallocate_queue_invoke, THREAD_CALL_PRIORITY_KERNEL);
166 }
167 
168 #if CONFIG_SCHED_CLUTCH
169 /*
170  * sched_clutch_for_thread
171  *
172  * The routine provides a back linkage from the thread to the
173  * sched_clutch it belongs to. This relationship is based on the
174  * thread group membership of the thread. Since that membership is
175  * changed from the thread context with the thread lock held, this
176  * linkage should be looked at only with the thread lock held or
177  * when the thread cannot be running (for eg. the thread is in the
178  * runq and being removed as part of thread_select().
179  */
180 sched_clutch_t
sched_clutch_for_thread(thread_t thread)181 sched_clutch_for_thread(thread_t thread)
182 {
183 	assert(thread->thread_group != NULL);
184 	return &(thread->thread_group->tg_sched_clutch);
185 }
186 
187 sched_clutch_t
sched_clutch_for_thread_group(struct thread_group * thread_group)188 sched_clutch_for_thread_group(struct thread_group *thread_group)
189 {
190 	return &(thread_group->tg_sched_clutch);
191 }
192 
193 /*
194  * Translate the TG flags to a priority boost for the sched_clutch.
195  * This priority boost will apply to the entire clutch represented
196  * by the thread group.
197  */
198 static void
sched_clutch_update_tg_flags(sched_clutch_t clutch,uint8_t flags)199 sched_clutch_update_tg_flags(sched_clutch_t clutch, uint8_t flags)
200 {
201 	sched_clutch_tg_priority_t sc_tg_pri = 0;
202 	if (flags & THREAD_GROUP_FLAGS_UI_APP) {
203 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_HIGH;
204 	} else if (flags & THREAD_GROUP_FLAGS_EFFICIENT) {
205 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_LOW;
206 	} else {
207 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_MED;
208 	}
209 	os_atomic_store(&clutch->sc_tg_priority, sc_tg_pri, relaxed);
210 }
211 
212 #endif /* CONFIG_SCHED_CLUTCH */
213 
214 uint64_t
thread_group_id(struct thread_group * tg)215 thread_group_id(struct thread_group *tg)
216 {
217 	return (tg == NULL) ? 0 : tg->tg_id;
218 }
219 
220 #if CONFIG_PREADOPT_TG
221 static inline bool
thread_get_reevaluate_tg_hierarchy_locked(thread_t t)222 thread_get_reevaluate_tg_hierarchy_locked(thread_t t)
223 {
224 	return t->sched_flags & TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
225 }
226 
227 static inline void
thread_set_reevaluate_tg_hierarchy_locked(thread_t t)228 thread_set_reevaluate_tg_hierarchy_locked(thread_t t)
229 {
230 	t->sched_flags |= TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
231 }
232 
233 static inline void
thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)234 thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)
235 {
236 	t->sched_flags &= ~TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
237 }
238 #endif
239 
240 /*
241  * Use a spinlock to protect all thread group flag updates.
242  * The lock should not have heavy contention since these flag updates should
243  * be infrequent. If this lock has contention issues, it should be changed to
244  * a per thread-group lock.
245  *
246  * The lock protects the flags field in the thread_group structure. It is also
247  * held while doing callouts to CLPC to reflect these flag changes.
248  */
249 
250 void
thread_group_flags_update_lock(void)251 thread_group_flags_update_lock(void)
252 {
253 	lck_mtx_lock(&tg_flags_update_lock);
254 }
255 
256 void
thread_group_flags_update_unlock(void)257 thread_group_flags_update_unlock(void)
258 {
259 	lck_mtx_unlock(&tg_flags_update_lock);
260 }
261 
262 /*
263  * Inform platform code about already existing thread groups
264  * or ask it to free state for all thread groups
265  */
266 void
thread_group_resync(boolean_t create)267 thread_group_resync(boolean_t create)
268 {
269 	struct thread_group *tg;
270 
271 	thread_group_flags_update_lock();
272 	lck_mtx_lock(&tg_lock);
273 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
274 		if (create) {
275 			machine_thread_group_init(tg);
276 		} else {
277 			machine_thread_group_deinit(tg);
278 		}
279 	}
280 	lck_mtx_unlock(&tg_lock);
281 	thread_group_flags_update_unlock();
282 }
283 
284 /*
285  * Create new thread group and add new reference to it.
286  */
287 struct thread_group *
thread_group_create_and_retain(boolean_t efficient)288 thread_group_create_and_retain(boolean_t efficient)
289 {
290 	struct thread_group *tg;
291 
292 	tg = zalloc_flags(tg_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
293 	assert((uintptr_t)tg % TG_MACHINE_DATA_ALIGN_SIZE == 0);
294 
295 	if (efficient) {
296 		tg->tg_flags |= THREAD_GROUP_FLAGS_EFFICIENT;
297 	}
298 
299 #if CONFIG_SCHED_CLUTCH
300 	/*
301 	 * The clutch scheduler maintains a bunch of runqs per thread group. For
302 	 * each thread group it maintains a sched_clutch structure. The lifetime
303 	 * of that structure is tied directly to the lifetime of the thread group.
304 	 */
305 	sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
306 
307 	/*
308 	 * Since the thread group flags are used to determine any priority promotions
309 	 * for the threads in the thread group, initialize them now.
310 	 */
311 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
312 
313 #endif /* CONFIG_SCHED_CLUTCH */
314 
315 	lck_mtx_lock(&tg_lock);
316 	tg->tg_id = tg_next_id++;
317 	tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
318 	os_ref_init(&tg->tg_refcount, NULL);
319 	tg_count++;
320 	enqueue_tail(&tg_queue, &tg->tg_queue_chain);
321 
322 	// call machine layer init before this thread group becomes visible
323 	machine_thread_group_init(tg);
324 	lck_mtx_unlock(&tg_lock);
325 
326 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), thread_group_id(tg), thread_group_get_flags(tg));
327 
328 	return tg;
329 }
330 
331 /*
332  * Point newly created thread to its home thread group
333  */
334 void
thread_group_init_thread(thread_t t,task_t task)335 thread_group_init_thread(thread_t t, task_t task)
336 {
337 	struct thread_group *tg = task_coalition_get_thread_group(task);
338 	t->thread_group = tg;
339 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
340 	    THREAD_GROUP_INVALID, thread_group_id(tg), (uintptr_t)thread_tid(t));
341 }
342 
343 /*
344  * Set thread group name
345  */
346 void
thread_group_set_name(__unused struct thread_group * tg,__unused const char * name)347 thread_group_set_name(__unused struct thread_group *tg, __unused const char *name)
348 {
349 	if (name == NULL) {
350 		return;
351 	}
352 	if (!thread_group_retain_try(tg)) {
353 		return;
354 	}
355 	if (tg->tg_name[0] == '\0') {
356 		strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
357 #if defined(__LP64__)
358 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
359 		    tg->tg_id,
360 		    *(uint64_t*)(void*)&tg->tg_name[0],
361 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
362 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
363 		    );
364 #else /* defined(__LP64__) */
365 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
366 		    tg->tg_id,
367 		    *(uint32_t*)(void*)&tg->tg_name[0],
368 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
369 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
370 		    );
371 #endif /* defined(__LP64__) */
372 	}
373 	thread_group_release(tg);
374 }
375 
376 void
thread_group_set_flags(struct thread_group * tg,uint64_t flags)377 thread_group_set_flags(struct thread_group *tg, uint64_t flags)
378 {
379 	thread_group_flags_update_lock();
380 	thread_group_set_flags_locked(tg, flags);
381 	thread_group_flags_update_unlock();
382 }
383 
384 void
thread_group_clear_flags(struct thread_group * tg,uint64_t flags)385 thread_group_clear_flags(struct thread_group *tg, uint64_t flags)
386 {
387 	thread_group_flags_update_lock();
388 	thread_group_clear_flags_locked(tg, flags);
389 	thread_group_flags_update_unlock();
390 }
391 
392 /*
393  * Set thread group flags and perform related actions.
394  * The tg_flags_update_lock should be held.
395  * Currently supported flags are:
396  * - THREAD_GROUP_FLAGS_EFFICIENT
397  * - THREAD_GROUP_FLAGS_UI_APP
398  */
399 
400 void
thread_group_set_flags_locked(struct thread_group * tg,uint64_t flags)401 thread_group_set_flags_locked(struct thread_group *tg, uint64_t flags)
402 {
403 	if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
404 		panic("thread_group_set_flags: Invalid flags %llu", flags);
405 	}
406 
407 	if ((tg->tg_flags & flags) == flags) {
408 		return;
409 	}
410 
411 	__kdebug_only uint64_t old_flags = tg->tg_flags;
412 	tg->tg_flags |= flags;
413 	machine_thread_group_flags_update(tg, tg->tg_flags);
414 #if CONFIG_SCHED_CLUTCH
415 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
416 #endif /* CONFIG_SCHED_CLUTCH */
417 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
418 	    tg->tg_id, tg->tg_flags, old_flags);
419 }
420 
421 /*
422  * Clear thread group flags and perform related actions
423  * The tg_flags_update_lock should be held.
424  * Currently supported flags are:
425  * - THREAD_GROUP_FLAGS_EFFICIENT
426  * - THREAD_GROUP_FLAGS_UI_APP
427  */
428 
429 void
thread_group_clear_flags_locked(struct thread_group * tg,uint64_t flags)430 thread_group_clear_flags_locked(struct thread_group *tg, uint64_t flags)
431 {
432 	if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
433 		panic("thread_group_clear_flags: Invalid flags %llu", flags);
434 	}
435 
436 	if ((tg->tg_flags & flags) == 0) {
437 		return;
438 	}
439 
440 	__kdebug_only uint64_t old_flags = tg->tg_flags;
441 	tg->tg_flags &= ~flags;
442 #if CONFIG_SCHED_CLUTCH
443 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
444 #endif /* CONFIG_SCHED_CLUTCH */
445 	machine_thread_group_flags_update(tg, tg->tg_flags);
446 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
447 	    tg->tg_id, tg->tg_flags, old_flags);
448 }
449 
450 
451 
452 /*
453  * Find thread group with specified name and put new reference to it.
454  */
455 struct thread_group *
thread_group_find_by_name_and_retain(char * name)456 thread_group_find_by_name_and_retain(char *name)
457 {
458 	struct thread_group *result = NULL;
459 
460 	if (name == NULL) {
461 		return NULL;
462 	}
463 
464 	if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
465 		return thread_group_retain(tg_system);
466 	} else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
467 		return thread_group_retain(tg_background);
468 	} else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
469 		return thread_group_retain(tg_perf_controller);
470 	}
471 
472 	struct thread_group *tg;
473 	lck_mtx_lock(&tg_lock);
474 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
475 		if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
476 		    thread_group_retain_try(tg)) {
477 			result = tg;
478 			break;
479 		}
480 	}
481 	lck_mtx_unlock(&tg_lock);
482 	return result;
483 }
484 
485 /*
486  * Find thread group with specified ID and add new reference to it.
487  */
488 struct thread_group *
thread_group_find_by_id_and_retain(uint64_t id)489 thread_group_find_by_id_and_retain(uint64_t id)
490 {
491 	struct thread_group *tg = NULL;
492 	struct thread_group *result = NULL;
493 
494 	switch (id) {
495 	case THREAD_GROUP_SYSTEM:
496 		result = tg_system;
497 		thread_group_retain(tg_system);
498 		break;
499 	case THREAD_GROUP_BACKGROUND:
500 		result = tg_background;
501 		thread_group_retain(tg_background);
502 		break;
503 	case THREAD_GROUP_VM:
504 		result = tg_vm;
505 		thread_group_retain(tg_vm);
506 		break;
507 	case THREAD_GROUP_IO_STORAGE:
508 		result = tg_io_storage;
509 		thread_group_retain(tg_io_storage);
510 		break;
511 	case THREAD_GROUP_PERF_CONTROLLER:
512 		result = tg_perf_controller;
513 		thread_group_retain(tg_perf_controller);
514 		break;
515 	default:
516 		lck_mtx_lock(&tg_lock);
517 		qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
518 			if (tg->tg_id == id && thread_group_retain_try(tg)) {
519 				result = tg;
520 				break;
521 			}
522 		}
523 		lck_mtx_unlock(&tg_lock);
524 	}
525 	return result;
526 }
527 
528 /*
529  * Add new reference to specified thread group
530  */
531 struct thread_group *
thread_group_retain(struct thread_group * tg)532 thread_group_retain(struct thread_group *tg)
533 {
534 	os_ref_retain(&tg->tg_refcount);
535 	return tg;
536 }
537 
538 /*
539  * Similar to thread_group_retain, but fails for thread groups with a
540  * zero reference count. Returns true if retained successfully.
541  */
542 static bool
thread_group_retain_try(struct thread_group * tg)543 thread_group_retain_try(struct thread_group *tg)
544 {
545 	return os_ref_retain_try(&tg->tg_refcount);
546 }
547 
548 static void
thread_group_deallocate_complete(struct thread_group * tg)549 thread_group_deallocate_complete(struct thread_group *tg)
550 {
551 	lck_mtx_lock(&tg_lock);
552 	tg_count--;
553 	remqueue(&tg->tg_queue_chain);
554 	lck_mtx_unlock(&tg_lock);
555 	static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 3), "thread group name is too short");
556 	static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
557 #if defined(__LP64__)
558 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
559 	    tg->tg_id,
560 	    *(uint64_t*)(void*)&tg->tg_name[0],
561 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
562 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
563 	    );
564 #else /* defined(__LP64__) */
565 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
566 	    tg->tg_id,
567 	    *(uint32_t*)(void*)&tg->tg_name[0],
568 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
569 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
570 	    );
571 #endif /* defined(__LP64__) */
572 	machine_thread_group_deinit(tg);
573 #if CONFIG_SCHED_CLUTCH
574 	sched_clutch_destroy(&(tg->tg_sched_clutch));
575 #endif /* CONFIG_SCHED_CLUTCH */
576 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
577 	zfree(tg_zone, tg);
578 }
579 
580 /*
581  * Drop a reference to specified thread group
582  */
583 void
thread_group_release(struct thread_group * tg)584 thread_group_release(struct thread_group *tg)
585 {
586 	if (os_ref_release(&tg->tg_refcount) == 0) {
587 		thread_group_deallocate_complete(tg);
588 	}
589 }
590 
591 void
thread_group_release_live(struct thread_group * tg)592 thread_group_release_live(struct thread_group *tg)
593 {
594 	os_ref_release_live(&tg->tg_refcount);
595 }
596 
597 static void
thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)598 thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e, __assert_only mpsc_daemon_queue_t dq)
599 {
600 	assert(dq == &thread_group_deallocate_queue);
601 	struct thread_group *tg = mpsc_queue_element(e, struct thread_group, tg_destroy_link);
602 
603 	thread_group_deallocate_complete(tg);
604 }
605 
606 void
thread_group_deallocate_safe(struct thread_group * tg)607 thread_group_deallocate_safe(struct thread_group *tg)
608 {
609 	if (os_ref_release(&tg->tg_refcount) == 0) {
610 		mpsc_daemon_enqueue(&thread_group_deallocate_queue, &tg->tg_destroy_link,
611 		    MPSC_QUEUE_NONE);
612 	}
613 }
614 
615 /*
616  * Get thread's current thread group
617  */
618 inline struct thread_group *
thread_group_get(thread_t t)619 thread_group_get(thread_t t)
620 {
621 	return t->thread_group;
622 }
623 
624 struct thread_group *
thread_group_get_home_group(thread_t t)625 thread_group_get_home_group(thread_t t)
626 {
627 	return task_coalition_get_thread_group(get_threadtask(t));
628 }
629 
630 /*
631  * The thread group is resolved according to a hierarchy:
632  *
633  * 1) work interval specified group (explicit API)
634  * 2) Auto-join thread group (wakeup tracking for special work intervals)
635  * 3) bank voucher carried group (implicitly set)
636  * 4) Preadopt thread group (if any)
637  * 5) coalition default thread group (ambient)
638  *
639  * Returns true if the thread's thread group needs to be changed and resolving
640  * TG is passed through in-out param. See also
641  * thread_mark_thread_group_hierarchy_resolved and
642  * thread_set_resolved_thread_group
643  *
644  * Caller should have thread lock. Interrupts are disabled. Thread doesn't have
645  * to be self
646  */
647 static bool
thread_compute_resolved_thread_group(thread_t t,struct thread_group ** resolved_tg)648 thread_compute_resolved_thread_group(thread_t t, struct thread_group **resolved_tg)
649 {
650 	struct thread_group *cur_tg, *tg;
651 	cur_tg = t->thread_group;
652 
653 	tg = thread_group_get_home_group(t);
654 
655 #if CONFIG_PREADOPT_TG
656 	if (t->preadopt_thread_group) {
657 		tg = t->preadopt_thread_group;
658 	}
659 #endif
660 	if (t->bank_thread_group) {
661 		tg = t->bank_thread_group;
662 	}
663 
664 	if (t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN) {
665 		if (t->auto_join_thread_group) {
666 			tg = t->auto_join_thread_group;
667 		}
668 	} else {
669 		if (t->work_interval_thread_group) {
670 			tg = t->work_interval_thread_group;
671 		}
672 	}
673 
674 	*resolved_tg = tg;
675 	return tg != cur_tg;
676 }
677 
678 #if CONFIG_PREADOPT_TG
679 
680 /*
681  * This function is always called after the hierarchy has been resolved. The
682  * caller holds the thread lock
683  */
684 static inline void
thread_assert_has_valid_thread_group(thread_t t)685 thread_assert_has_valid_thread_group(thread_t t)
686 {
687 	__assert_only struct thread_group *home_tg = thread_group_get_home_group(t);
688 
689 	assert(thread_get_reevaluate_tg_hierarchy_locked(t) == false);
690 
691 	__assert_only struct thread_group *resolved_tg;
692 	assert(thread_compute_resolved_thread_group(t, &resolved_tg) == false);
693 
694 	assert((t->thread_group == home_tg) ||
695 	    (t->thread_group == t->preadopt_thread_group) ||
696 	    (t->thread_group == t->bank_thread_group) ||
697 	    (t->thread_group == t->auto_join_thread_group) ||
698 	    (t->thread_group == t->work_interval_thread_group));
699 }
700 #endif
701 
702 /*
703  * This function is called when the thread group hierarchy on the thread_t is
704  * resolved and t->thread_group is the result of the hierarchy resolution. Once
705  * this has happened, there is state that needs to be cleared up which is
706  * handled by this function.
707  *
708  * Prior to this call, we should have either
709  * a) Resolved the hierarchy and discovered no change needed
710  * b) Resolved the hierarchy and modified the t->thread_group
711  */
712 static void
thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)713 thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)
714 {
715 #if CONFIG_PREADOPT_TG
716 	/*
717 	 * We have just reevaluated the thread's hierarchy so we don't need to do it
718 	 * again later.
719 	 */
720 	thread_clear_reevaluate_tg_hierarchy_locked(t);
721 
722 	/*
723 	 * Clear the old_preadopt_thread_group field whose sole purpose was to make
724 	 * sure that t->thread_group didn't have a dangling pointer.
725 	 */
726 	thread_assert_has_valid_thread_group(t);
727 
728 	if (t->old_preadopt_thread_group) {
729 		thread_group_deallocate_safe(t->old_preadopt_thread_group);
730 		t->old_preadopt_thread_group = NULL;
731 	}
732 #endif
733 }
734 
735 /*
736  * Called with thread lock held, always called on self.  This function simply
737  * moves the thread to the right clutch scheduler bucket and informs CLPC of the
738  * change
739  */
740 static void
thread_notify_thread_group_change_self(thread_t t,struct thread_group * __unused old_tg,struct thread_group * __unused new_tg)741 thread_notify_thread_group_change_self(thread_t t, struct thread_group * __unused old_tg,
742     struct thread_group * __unused new_tg)
743 {
744 	assert(current_thread() == t);
745 	assert(old_tg != new_tg);
746 	assert(t->thread_group == new_tg);
747 
748 	uint64_t ctime = mach_approximate_time();
749 	uint64_t arg1, arg2;
750 	machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
751 	machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
752 }
753 
754 /*
755  * Called on any thread with thread lock. Updates the thread_group field on the
756  * thread with the resolved thread group and always make necessary clutch
757  * scheduler callouts. If the thread group is being modified on self,
758  * then also make necessary CLPC callouts.
759  */
760 static void
thread_set_resolved_thread_group(thread_t t,struct thread_group * old_tg,struct thread_group * resolved_tg,bool on_self)761 thread_set_resolved_thread_group(thread_t t, struct thread_group *old_tg,
762     struct thread_group *resolved_tg, bool on_self)
763 {
764 	t->thread_group = resolved_tg;
765 
766 	/* Thread is either running already or is runnable but not on a runqueue */
767 	assert((t->state & (TH_RUN | TH_IDLE)) == TH_RUN);
768 	assert(t->runq == PROCESSOR_NULL);
769 
770 	struct thread_group *home_tg = thread_group_get_home_group(t);
771 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
772 	    thread_group_id(old_tg), thread_group_id(resolved_tg),
773 	    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
774 
775 #if CONFIG_PREADOPT_TG
776 	if (resolved_tg == t->preadopt_thread_group) {
777 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
778 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
779 		    thread_tid(t), thread_group_id(home_tg));
780 	}
781 #endif
782 
783 #if CONFIG_SCHED_CLUTCH
784 	sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
785 	sched_clutch_t new_clutch = (resolved_tg) ? &(resolved_tg->tg_sched_clutch) : NULL;
786 	if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
787 		sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
788 	}
789 #endif
790 
791 	if (on_self) {
792 		assert(t == current_thread());
793 		thread_notify_thread_group_change_self(t, old_tg, resolved_tg);
794 	}
795 
796 	thread_mark_thread_group_hierarchy_resolved(t);
797 }
798 
799 /* Caller has thread lock. Always called on self */
800 static void
thread_resolve_thread_group_hierarchy_self_locked(thread_t t,__unused bool clear_preadopt)801 thread_resolve_thread_group_hierarchy_self_locked(thread_t t, __unused bool clear_preadopt)
802 {
803 	assert(current_thread() == t);
804 
805 #if CONFIG_PREADOPT_TG
806 	struct thread_group *preadopt_tg = NULL;
807 	if (clear_preadopt) {
808 		if (t->preadopt_thread_group) {
809 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
810 			    (uintptr_t)thread_tid(t), thread_group_id(t->preadopt_thread_group), 0, 0);
811 
812 			preadopt_tg = t->preadopt_thread_group;
813 			t->preadopt_thread_group = NULL;
814 		}
815 	}
816 #endif
817 
818 	struct thread_group *resolved_tg = NULL;
819 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
820 
821 	if (needs_change) {
822 		struct thread_group *old_tg = t->thread_group;
823 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
824 	}
825 
826 	/*
827 	 * Regardless of whether we modified the t->thread_group above or not, the
828 	 * hierarchy is now resolved
829 	 */
830 	thread_mark_thread_group_hierarchy_resolved(t);
831 
832 #if CONFIG_PREADOPT_TG
833 	if (preadopt_tg) {
834 		thread_group_deallocate_safe(preadopt_tg);
835 	}
836 #endif
837 }
838 
839 /*
840  * Caller has thread lock, never called on self, always called on a thread not
841  * on a runqueue. This is called from sched_prim.c. Counter part for calling on
842  * self is thread_resolve_thread_group_hierarchy_self
843  */
844 #if CONFIG_PREADOPT_TG
845 void
thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)846 thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)
847 {
848 	assert(t != current_thread());
849 	assert(t->runq == NULL);
850 
851 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
852 		struct thread_group *resolved_tg = NULL;
853 
854 		bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
855 		if (needs_change) {
856 			struct thread_group *old_tg = t->thread_group;
857 			thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
858 		}
859 
860 		/*
861 		 * Regardless of whether we modified the t->thread_group above or not,
862 		 * the hierarchy is now resolved
863 		 */
864 		thread_mark_thread_group_hierarchy_resolved(t);
865 	}
866 }
867 #endif
868 
869 #if CONFIG_PREADOPT_TG
870 /*
871  * The thread being passed can be the current thread and it can also be another
872  * thread which is running on another core. This function is called with spin
873  * locks held (kq and wq lock) but the thread lock is not held by caller.
874  *
875  * The thread always takes a +1 on the thread group and will release the
876  * previous preadoption thread group's reference or stash it.
877  */
878 void
thread_set_preadopt_thread_group(thread_t t,struct thread_group * tg)879 thread_set_preadopt_thread_group(thread_t t, struct thread_group *tg)
880 {
881 	spl_t s = splsched();
882 	thread_lock(t);
883 
884 	/*
885 	 * Assert that this is never called on WindowServer when it has already
886 	 * issued a block callout to CLPC.
887 	 *
888 	 * This should never happen because we don't ever call
889 	 * thread_set_preadopt_thread_group on a servicer after going out to
890 	 * userspace unless we are doing so to/after an unbind
891 	 */
892 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
893 
894 	struct thread_group *old_tg = t->thread_group;
895 	struct thread_group *home_tg = thread_group_get_home_group(t);
896 
897 	/*
898 	 * Since the preadoption thread group can disappear from under you, we need
899 	 * to make sure that the thread_group pointer is always pointing to valid
900 	 * memory.
901 	 *
902 	 * We run the risk of the thread group pointer pointing to dangling memory
903 	 * when the following happens:
904 	 *
905 	 * a) We update the preadopt_thread_group
906 	 * b) We resolve hierarchy and need to change the resolved_thread_group
907 	 * c) For some reason, we are not able to do so and we need to set the
908 	 * resolved thread group later.
909 	 */
910 
911 	/* take the ref from the thread */
912 	struct thread_group *old_preadopt_tg = t->preadopt_thread_group;
913 
914 	if (tg == NULL) {
915 		t->preadopt_thread_group = NULL;
916 		if (old_preadopt_tg != NULL) {
917 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
918 			    thread_tid(t), thread_group_id(old_preadopt_tg), 0, 0);
919 		}
920 	} else {
921 		t->preadopt_thread_group = thread_group_retain(tg);
922 	}
923 
924 	struct thread_group *resolved_tg = NULL;
925 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
926 	if (!needs_change) {
927 		/*
928 		 * Setting preadoption thread group didn't change anything, simply mark
929 		 * the hierarchy as resolved and exit.
930 		 */
931 		thread_mark_thread_group_hierarchy_resolved(t);
932 		goto out;
933 	}
934 
935 	if (t != current_thread()) {
936 		/*
937 		 * We're modifying the thread group of another thread, we need to take
938 		 * action according to the state of the other thread.
939 		 *
940 		 * If the thread is runnable and not yet running, try removing it from
941 		 * the runq, modify it's TG and then reinsert it for reevaluation. If it
942 		 * isn't runnable (already running or started running concurrently, or
943 		 * if it is waiting), then mark a bit having the thread reevaluate its
944 		 * own hierarchy the next time it is being inserted into a runq
945 		 */
946 		if ((t->state & TH_RUN) && (t->runq != PROCESSOR_NULL)) {
947 			/* Thread is runnable but not running */
948 
949 			bool removed_from_runq = thread_run_queue_remove(t);
950 			if (removed_from_runq) {
951 				thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
952 
953 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
954 				    thread_group_id(old_tg), thread_group_id(tg),
955 				    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
956 
957 				thread_run_queue_reinsert(t, SCHED_TAILQ);
958 			} else {
959 				/*
960 				 * We failed to remove it from the runq - it probably started
961 				 * running, let the thread reevaluate the next time it gets
962 				 * enqueued on a runq
963 				 */
964 				thread_set_reevaluate_tg_hierarchy_locked(t);
965 
966 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
967 				    thread_group_id(old_tg), thread_group_id(tg),
968 				    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
969 			}
970 		} else {
971 			/*
972 			 * The thread is not runnable or it is running already - let the
973 			 * thread reevaluate the next time it gets enqueued on a runq
974 			 */
975 			thread_set_reevaluate_tg_hierarchy_locked(t);
976 
977 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
978 			    thread_group_id(old_tg), thread_group_id(tg),
979 			    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
980 		}
981 	} else {
982 		/* We're modifying thread group on ourselves */
983 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
984 
985 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
986 		    thread_group_id(old_tg), thread_group_id(tg),
987 		    thread_tid(t), thread_group_id(home_tg));
988 	}
989 
990 out:
991 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
992 		assert(t->thread_group == old_tg);
993 		/*
994 		 * We need to reevaluate TG hierarchy later as a result of this
995 		 * `thread_set_preadopt_thread_group` operation. This means that the
996 		 * thread group on the thread was pointing to either the home thread
997 		 * group, the preadoption thread group we just replaced, or the old
998 		 * preadoption thread group stashed on the thread.
999 		 */
1000 		assert(t->thread_group == home_tg ||
1001 		    t->thread_group == old_preadopt_tg ||
1002 		    t->old_preadopt_thread_group);
1003 
1004 		if (t->thread_group == old_preadopt_tg) {
1005 			/*
1006 			 * t->thread_group is pointing to the preadopt thread group we just
1007 			 * replaced. This means the hierarchy was resolved before this call.
1008 			 * Assert that there was no old_preadopt_thread_group on the thread.
1009 			 */
1010 			assert(t->old_preadopt_thread_group == NULL);
1011 			/*
1012 			 * Since t->thread_group is still pointing to the old preadopt thread
1013 			 * group - we need to keep it alive until we reevaluate the hierarchy
1014 			 * next
1015 			 */
1016 			t->old_preadopt_thread_group = old_tg; // transfer ref back to thread
1017 		} else if (old_preadopt_tg != NULL) {
1018 			thread_group_deallocate_safe(old_preadopt_tg);
1019 		}
1020 	} else {
1021 		/* We resolved the hierarchy just now */
1022 		thread_assert_has_valid_thread_group(t);
1023 
1024 		/*
1025 		 * We don't need the old preadopt thread group that we stashed in our
1026 		 * local variable, drop it.
1027 		 */
1028 		if (old_preadopt_tg) {
1029 			thread_group_deallocate_safe(old_preadopt_tg);
1030 		}
1031 	}
1032 	thread_unlock(t);
1033 	splx(s);
1034 	return;
1035 }
1036 
1037 #endif
1038 
1039 /*
1040  * thread_set_thread_group()
1041  *
1042  * Caller must guarantee lifetime of the thread group for the life of the call -
1043  * this overrides the thread group without going through the hierarchy
1044  * resolution. This is for special thread groups like the VM and IO thread
1045  * groups only.
1046  */
1047 static void
thread_set_thread_group(thread_t t,struct thread_group * tg)1048 thread_set_thread_group(thread_t t, struct thread_group *tg)
1049 {
1050 	struct thread_group *home_tg = thread_group_get_home_group(t);
1051 	struct thread_group *old_tg = NULL;
1052 
1053 	spl_t s = splsched();
1054 	old_tg = t->thread_group;
1055 
1056 	if (old_tg != tg) {
1057 		thread_lock(t);
1058 
1059 		assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1060 		t->thread_group = tg;
1061 
1062 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1063 		    thread_group_id(old_tg), thread_group_id(tg),
1064 		    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1065 
1066 		thread_notify_thread_group_change_self(t, old_tg, tg);
1067 
1068 		thread_unlock(t);
1069 	}
1070 
1071 	splx(s);
1072 }
1073 
1074 /* Called without the thread lock held, called on current thread */
1075 void
thread_group_set_bank(thread_t t,struct thread_group * tg)1076 thread_group_set_bank(thread_t t, struct thread_group *tg)
1077 {
1078 	assert(current_thread() == t);
1079 	/* boot arg disables groups in bank */
1080 	if (tg_set_by_bankvoucher == FALSE) {
1081 		return;
1082 	}
1083 
1084 	spl_t s = splsched();
1085 	thread_lock(t);
1086 
1087 	/* This is a borrowed reference from the current bank voucher */
1088 	t->bank_thread_group = tg;
1089 
1090 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1091 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1092 
1093 	thread_unlock(t);
1094 	splx(s);
1095 }
1096 
1097 #if CONFIG_SCHED_AUTO_JOIN
1098 /*
1099  * thread_group_set_autojoin_thread_group_locked()
1100  *
1101  * Sets the thread group of a thread based on auto-join rules and reevaluates
1102  * the hierarchy.
1103  *
1104  * Preconditions:
1105  * - Thread must not be part of a runq (freshly made runnable threads or terminating only)
1106  * - Thread must be locked by the caller already
1107  */
1108 void
thread_set_autojoin_thread_group_locked(thread_t t,struct thread_group * tg)1109 thread_set_autojoin_thread_group_locked(thread_t t, struct thread_group *tg)
1110 {
1111 	assert(t->runq == PROCESSOR_NULL);
1112 
1113 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1114 	t->auto_join_thread_group = tg;
1115 
1116 	struct thread_group *resolved_tg = NULL;
1117 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
1118 
1119 	if (needs_change) {
1120 		struct thread_group *old_tg = t->thread_group;
1121 		struct thread_group *home_tg = thread_group_get_home_group(t);
1122 
1123 		t->thread_group = resolved_tg;
1124 
1125 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1126 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
1127 		    thread_tid(t), thread_group_id(home_tg));
1128 		/*
1129 		 * If the thread group is being changed for the current thread, callout
1130 		 * to CLPC to update the thread's information at that layer. This makes
1131 		 * sure CLPC has consistent state when the current thread is going
1132 		 * off-core.
1133 		 *
1134 		 * Note that we are passing in the PERFCONTROL_CALLOUT_WAKE_UNSAFE flag
1135 		 * to CLPC here (as opposed to 0 in thread_notify_thread_group_change_self)
1136 		 */
1137 		if (t == current_thread()) {
1138 			uint64_t ctime = mach_approximate_time();
1139 			uint64_t arg1, arg2;
1140 			machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
1141 			machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
1142 		}
1143 	}
1144 
1145 	thread_mark_thread_group_hierarchy_resolved(t);
1146 }
1147 #endif
1148 
1149 /* Thread is not locked. Thread is self */
1150 void
thread_set_work_interval_thread_group(thread_t t,struct thread_group * tg)1151 thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg)
1152 {
1153 	assert(current_thread() == t);
1154 	assert(!(t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN));
1155 
1156 	/*
1157 	 * We have a work interval, we don't need the preadoption thread group
1158 	 * anymore (ie, it shouldn't be available for us to jump back to it after
1159 	 * the thread leaves the work interval)
1160 	 */
1161 	spl_t s = splsched();
1162 	thread_lock(t);
1163 
1164 	t->work_interval_thread_group = tg;
1165 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1166 
1167 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1168 
1169 	thread_unlock(t);
1170 	splx(s);
1171 }
1172 
1173 inline cluster_type_t
thread_group_recommendation(struct thread_group * tg)1174 thread_group_recommendation(struct thread_group *tg)
1175 {
1176 	if (tg == NULL) {
1177 		return CLUSTER_TYPE_SMP;
1178 	} else {
1179 		return tg->tg_recommendation;
1180 	}
1181 }
1182 
1183 inline uint64_t
thread_group_get_id(struct thread_group * tg)1184 thread_group_get_id(struct thread_group *tg)
1185 {
1186 	return tg->tg_id;
1187 }
1188 
1189 uint32_t
thread_group_count(void)1190 thread_group_count(void)
1191 {
1192 	return tg_count;
1193 }
1194 
1195 /*
1196  * Can only be called while tg cannot be destroyed
1197  */
1198 inline const char*
thread_group_get_name(struct thread_group * tg)1199 thread_group_get_name(struct thread_group *tg)
1200 {
1201 	return tg->tg_name;
1202 }
1203 
1204 inline void *
thread_group_get_machine_data(struct thread_group * tg)1205 thread_group_get_machine_data(struct thread_group *tg)
1206 {
1207 	return &tg->tg_machine_data;
1208 }
1209 
1210 inline uint32_t
thread_group_machine_data_size(void)1211 thread_group_machine_data_size(void)
1212 {
1213 	return tg_machine_data_size;
1214 }
1215 
1216 inline boolean_t
thread_group_uses_immediate_ipi(struct thread_group * tg)1217 thread_group_uses_immediate_ipi(struct thread_group *tg)
1218 {
1219 	return thread_group_get_id(tg) == THREAD_GROUP_PERF_CONTROLLER && perf_controller_thread_group_immediate_ipi != 0;
1220 }
1221 
1222 kern_return_t
thread_group_iterate_stackshot(thread_group_iterate_fn_t callout,void * arg)1223 thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
1224 {
1225 	struct thread_group *tg;
1226 	int i = 0;
1227 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
1228 		if (tg == NULL || !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
1229 			return KERN_FAILURE;
1230 		}
1231 		callout(arg, i, tg);
1232 		i++;
1233 	}
1234 	return KERN_SUCCESS;
1235 }
1236 
1237 void
thread_group_join_io_storage(void)1238 thread_group_join_io_storage(void)
1239 {
1240 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
1241 	assert(tg != NULL);
1242 	thread_set_thread_group(current_thread(), tg);
1243 }
1244 
1245 void
thread_group_join_perf_controller(void)1246 thread_group_join_perf_controller(void)
1247 {
1248 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
1249 	assert(tg != NULL);
1250 	thread_set_thread_group(current_thread(), tg);
1251 }
1252 
1253 void
thread_group_vm_add(void)1254 thread_group_vm_add(void)
1255 {
1256 	assert(tg_vm != NULL);
1257 	thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM));
1258 }
1259 
1260 uint32_t
thread_group_get_flags(struct thread_group * tg)1261 thread_group_get_flags(struct thread_group *tg)
1262 {
1263 	return tg->tg_flags;
1264 }
1265 
1266 /*
1267  * Returns whether the thread group is restricted to the E-cluster when CLPC is
1268  * turned off.
1269  */
1270 boolean_t
thread_group_smp_restricted(struct thread_group * tg)1271 thread_group_smp_restricted(struct thread_group *tg)
1272 {
1273 	if (tg->tg_flags & THREAD_GROUP_FLAGS_SMP_RESTRICT) {
1274 		return true;
1275 	} else {
1276 		return false;
1277 	}
1278 }
1279 
1280 void
thread_group_update_recommendation(struct thread_group * tg,cluster_type_t new_recommendation)1281 thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
1282 {
1283 	/*
1284 	 * Since the tg->tg_recommendation field is read by CPUs trying to determine
1285 	 * where a thread/thread group needs to be placed, it is important to use
1286 	 * atomic operations to update the recommendation.
1287 	 */
1288 	os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
1289 }
1290 
1291 #if CONFIG_SCHED_EDGE
1292 
1293 int sched_edge_restrict_ut = 1;
1294 int sched_edge_restrict_bg = 1;
1295 
1296 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1297 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1298 {
1299 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1300 	/*
1301 	 * CLUSTER_TYPE_SMP was used for some debugging support when CLPC dynamic control was turned off.
1302 	 * In more recent implementations, CLPC simply recommends "P-spill" when dynamic control is turned off. So it should
1303 	 * never be recommending CLUSTER_TYPE_SMP for thread groups.
1304 	 */
1305 	assert(new_recommendation != CLUSTER_TYPE_SMP);
1306 	/*
1307 	 * The Edge scheduler expects preferred cluster recommendations for each QoS level within a TG. Until the new CLPC
1308 	 * routine is being called, fake out the call from the old CLPC interface.
1309 	 */
1310 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
1311 	/*
1312 	 * For all buckets higher than UT, apply the recommendation to the thread group bucket
1313 	 */
1314 	for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SHARE_UT; bucket++) {
1315 		tg_bucket_preferred_cluster[bucket] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1316 	}
1317 	/* For UT & BG QoS, set the recommendation only if they havent been restricted via sysctls */
1318 	if (!sched_edge_restrict_ut) {
1319 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_UT] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1320 	}
1321 	if (!sched_edge_restrict_bg) {
1322 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_BG] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1323 	}
1324 	sched_perfcontrol_preferred_cluster_options_t options = 0;
1325 	if (new_recommendation == CLUSTER_TYPE_P) {
1326 		options |= SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING;
1327 	}
1328 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1329 }
1330 
1331 void
sched_perfcontrol_edge_matrix_get(sched_clutch_edge * edge_matrix,bool * edge_request_bitmap,uint64_t flags,uint64_t matrix_order)1332 sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t matrix_order)
1333 {
1334 	sched_edge_matrix_get(edge_matrix, edge_request_bitmap, flags, matrix_order);
1335 }
1336 
1337 void
sched_perfcontrol_edge_matrix_set(sched_clutch_edge * edge_matrix,bool * edge_changes_bitmap,uint64_t flags,uint64_t matrix_order)1338 sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t matrix_order)
1339 {
1340 	sched_edge_matrix_set(edge_matrix, edge_changes_bitmap, flags, matrix_order);
1341 }
1342 
1343 void
sched_perfcontrol_thread_group_preferred_clusters_set(void * machine_data,uint32_t tg_preferred_cluster,uint32_t overrides[PERFCONTROL_CLASS_MAX],sched_perfcontrol_preferred_cluster_options_t options)1344 sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
1345     uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
1346 {
1347 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1348 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {
1349 		[TH_BUCKET_FIXPRI]   = (overrides[PERFCONTROL_CLASS_ABOVEUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_ABOVEUI] : tg_preferred_cluster,
1350 		[TH_BUCKET_SHARE_FG] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1351 		[TH_BUCKET_SHARE_IN] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1352 		[TH_BUCKET_SHARE_DF] = (overrides[PERFCONTROL_CLASS_NONUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_NONUI] : tg_preferred_cluster,
1353 		[TH_BUCKET_SHARE_UT] = (overrides[PERFCONTROL_CLASS_UTILITY] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UTILITY] : tg_preferred_cluster,
1354 		[TH_BUCKET_SHARE_BG] = (overrides[PERFCONTROL_CLASS_BACKGROUND] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_BACKGROUND] : tg_preferred_cluster,
1355 	};
1356 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1357 }
1358 
1359 #else /* CONFIG_SCHED_EDGE */
1360 
1361 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1362 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1363 {
1364 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1365 	SCHED(thread_group_recommendation_change)(tg, new_recommendation);
1366 }
1367 
1368 void
sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_request_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1369 sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1370 {
1371 }
1372 
1373 void
sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_changes_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1374 sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1375 {
1376 }
1377 
1378 void
sched_perfcontrol_thread_group_preferred_clusters_set(__unused void * machine_data,__unused uint32_t tg_preferred_cluster,__unused uint32_t overrides[PERFCONTROL_CLASS_MAX],__unused sched_perfcontrol_preferred_cluster_options_t options)1379 sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
1380     __unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
1381 {
1382 }
1383 
1384 #endif /* CONFIG_SCHED_EDGE */
1385 
1386 /*
1387  * Can only be called while tg cannot be destroyed.
1388  * Names can be up to THREAD_GROUP_MAXNAME long and are not necessarily null-terminated.
1389  */
1390 const char*
sched_perfcontrol_thread_group_get_name(void * machine_data)1391 sched_perfcontrol_thread_group_get_name(void *machine_data)
1392 {
1393 	struct thread_group *tg = __container_of(machine_data, struct thread_group, tg_machine_data);
1394 	return thread_group_get_name(tg);
1395 }
1396 
1397 #endif /* CONFIG_THREAD_GROUPS */
1398