xref: /xnu-8792.61.2/osfmk/kern/thread_group.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <kern/kern_types.h>
31 #include <kern/processor.h>
32 #include <kern/thread.h>
33 #include <kern/zalloc.h>
34 #include <kern/task.h>
35 #include <kern/machine.h>
36 #include <kern/coalition.h>
37 #include <sys/errno.h>
38 #include <kern/queue.h>
39 #include <kern/locks.h>
40 #include <kern/thread_group.h>
41 #include <kern/sched_clutch.h>
42 
43 #if CONFIG_THREAD_GROUPS
44 
45 #define TG_MACHINE_DATA_ALIGN_SIZE (16)
46 
47 struct thread_group {
48 	uint64_t                tg_id;
49 	char                    tg_name[THREAD_GROUP_MAXNAME];
50 	struct os_refcnt        tg_refcount;
51 	struct {
52 		uint32_t                tg_flags;
53 		cluster_type_t          tg_recommendation;
54 	};
55 	/* We make the mpsc destroy chain link a separate field here because while
56 	 * refs = 0 and the thread group is enqueued on the daemon queue, CLPC
57 	 * (which does not hold an explicit ref) is still under the assumption that
58 	 * this thread group is alive and may provide recommendation changes/updates
59 	 * to it. As such, we need to make sure that all parts of the thread group
60 	 * structure are valid.
61 	 */
62 	struct mpsc_queue_chain tg_destroy_link;
63 	queue_chain_t           tg_queue_chain;
64 #if CONFIG_SCHED_CLUTCH
65 	struct sched_clutch     tg_sched_clutch;
66 #endif /* CONFIG_SCHED_CLUTCH */
67 	uint8_t                 tg_machine_data[] __attribute__((aligned(TG_MACHINE_DATA_ALIGN_SIZE)));
68 } __attribute__((aligned(8)));
69 
70 static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
71 static uint32_t tg_count;
72 static queue_head_t tg_queue;
73 static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
74 static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
75 static LCK_MTX_DECLARE(tg_flags_update_lock, &tg_lck_grp);
76 
77 static uint64_t tg_next_id = 0;
78 static uint32_t tg_size;
79 static uint32_t tg_machine_data_size;
80 static uint32_t perf_controller_thread_group_immediate_ipi;
81 static struct thread_group *tg_system;
82 static struct thread_group *tg_background;
83 static struct thread_group *tg_vm;
84 static struct thread_group *tg_io_storage;
85 static struct thread_group *tg_perf_controller;
86 int tg_set_by_bankvoucher;
87 
88 static bool thread_group_retain_try(struct thread_group *tg);
89 
90 static struct mpsc_daemon_queue thread_group_deallocate_queue;
91 static void thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,
92     __assert_only mpsc_daemon_queue_t dq);
93 
94 /*
95  * Initialize thread groups at boot
96  */
97 void
thread_group_init(void)98 thread_group_init(void)
99 {
100 	// Get thread group structure extension from EDT or boot-args (which can override EDT)
101 	if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
102 		if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
103 			tg_machine_data_size = 8;
104 		}
105 	}
106 
107 	if (!PE_parse_boot_argn("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
108 		if (!PE_get_default("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
109 			perf_controller_thread_group_immediate_ipi = 0;
110 		}
111 	}
112 
113 	// Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
114 	if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
115 		if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
116 			tg_set_by_bankvoucher = 1;
117 		}
118 	}
119 
120 	tg_size = sizeof(struct thread_group) + tg_machine_data_size;
121 	if (tg_size % TG_MACHINE_DATA_ALIGN_SIZE) {
122 		tg_size += TG_MACHINE_DATA_ALIGN_SIZE - (tg_size % TG_MACHINE_DATA_ALIGN_SIZE);
123 	}
124 	tg_machine_data_size = tg_size - sizeof(struct thread_group);
125 	// printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
126 	assert(offsetof(struct thread_group, tg_machine_data) % TG_MACHINE_DATA_ALIGN_SIZE == 0);
127 	tg_zone = zone_create("thread_groups", tg_size, ZC_ALIGNMENT_REQUIRED);
128 
129 	queue_head_init(tg_queue);
130 	tg_system = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
131 	thread_group_set_name(tg_system, "system");
132 	tg_background = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
133 	thread_group_set_name(tg_background, "background");
134 	lck_mtx_lock(&tg_lock);
135 	tg_next_id++;  // Skip ID 2, which used to be the "adaptive" group. (It was never used.)
136 	lck_mtx_unlock(&tg_lock);
137 	tg_vm = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
138 	thread_group_set_name(tg_vm, "VM");
139 	tg_io_storage = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
140 	thread_group_set_name(tg_io_storage, "io storage");
141 	tg_perf_controller = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
142 	thread_group_set_name(tg_perf_controller, "perf_controller");
143 
144 	/*
145 	 * The thread group deallocation queue must be a thread call based queue
146 	 * because it is woken up from contexts where the thread lock is held. The
147 	 * only way to perform wakeups safely in those contexts is to wakeup a
148 	 * thread call which is guaranteed to be on a different waitq and would
149 	 * not hash onto the same global waitq which might be currently locked.
150 	 */
151 	mpsc_daemon_queue_init_with_thread_call(&thread_group_deallocate_queue,
152 	    thread_group_deallocate_queue_invoke, THREAD_CALL_PRIORITY_KERNEL,
153 	    MPSC_DAEMON_INIT_NONE);
154 }
155 
156 #if CONFIG_SCHED_CLUTCH
157 /*
158  * sched_clutch_for_thread
159  *
160  * The routine provides a back linkage from the thread to the
161  * sched_clutch it belongs to. This relationship is based on the
162  * thread group membership of the thread. Since that membership is
163  * changed from the thread context with the thread lock held, this
164  * linkage should be looked at only with the thread lock held or
165  * when the thread cannot be running (for eg. the thread is in the
166  * runq and being removed as part of thread_select().
167  */
168 sched_clutch_t
sched_clutch_for_thread(thread_t thread)169 sched_clutch_for_thread(thread_t thread)
170 {
171 	assert(thread->thread_group != NULL);
172 	return &(thread->thread_group->tg_sched_clutch);
173 }
174 
175 sched_clutch_t
sched_clutch_for_thread_group(struct thread_group * thread_group)176 sched_clutch_for_thread_group(struct thread_group *thread_group)
177 {
178 	return &(thread_group->tg_sched_clutch);
179 }
180 
181 /*
182  * Translate the TG flags to a priority boost for the sched_clutch.
183  * This priority boost will apply to the entire clutch represented
184  * by the thread group.
185  */
186 static void
sched_clutch_update_tg_flags(__unused sched_clutch_t clutch,__unused uint32_t flags)187 sched_clutch_update_tg_flags(__unused sched_clutch_t clutch, __unused uint32_t flags)
188 {
189 	sched_clutch_tg_priority_t sc_tg_pri = 0;
190 	if (flags & THREAD_GROUP_FLAGS_UI_APP) {
191 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_HIGH;
192 	} else if (flags & THREAD_GROUP_FLAGS_EFFICIENT) {
193 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_LOW;
194 	} else {
195 		sc_tg_pri = SCHED_CLUTCH_TG_PRI_MED;
196 	}
197 	os_atomic_store(&clutch->sc_tg_priority, sc_tg_pri, relaxed);
198 }
199 
200 #endif /* CONFIG_SCHED_CLUTCH */
201 
202 uint64_t
thread_group_id(struct thread_group * tg)203 thread_group_id(struct thread_group *tg)
204 {
205 	return (tg == NULL) ? 0 : tg->tg_id;
206 }
207 
208 #if CONFIG_PREADOPT_TG
209 static inline bool
thread_get_reevaluate_tg_hierarchy_locked(thread_t t)210 thread_get_reevaluate_tg_hierarchy_locked(thread_t t)
211 {
212 	return t->sched_flags & TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
213 }
214 
215 static inline void
thread_set_reevaluate_tg_hierarchy_locked(thread_t t)216 thread_set_reevaluate_tg_hierarchy_locked(thread_t t)
217 {
218 	t->sched_flags |= TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
219 }
220 
221 static inline void
thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)222 thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)
223 {
224 	t->sched_flags &= ~TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
225 }
226 #endif
227 
228 /*
229  * Use a spinlock to protect all thread group flag updates.
230  * The lock should not have heavy contention since these flag updates should
231  * be infrequent. If this lock has contention issues, it should be changed to
232  * a per thread-group lock.
233  *
234  * The lock protects the flags field in the thread_group structure. It is also
235  * held while doing callouts to CLPC to reflect these flag changes.
236  */
237 
238 void
thread_group_flags_update_lock(void)239 thread_group_flags_update_lock(void)
240 {
241 	lck_mtx_lock(&tg_flags_update_lock);
242 }
243 
244 void
thread_group_flags_update_unlock(void)245 thread_group_flags_update_unlock(void)
246 {
247 	lck_mtx_unlock(&tg_flags_update_lock);
248 }
249 
250 /*
251  * Inform platform code about already existing thread groups
252  * or ask it to free state for all thread groups
253  */
254 void
thread_group_resync(boolean_t create)255 thread_group_resync(boolean_t create)
256 {
257 	struct thread_group *tg;
258 
259 	thread_group_flags_update_lock();
260 	lck_mtx_lock(&tg_lock);
261 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
262 		if (create) {
263 			machine_thread_group_init(tg);
264 		} else {
265 			machine_thread_group_deinit(tg);
266 		}
267 	}
268 	lck_mtx_unlock(&tg_lock);
269 	thread_group_flags_update_unlock();
270 }
271 
272 /*
273  * Create new thread group and add new reference to it.
274  */
275 struct thread_group *
thread_group_create_and_retain(uint32_t flags)276 thread_group_create_and_retain(uint32_t flags)
277 {
278 	struct thread_group *tg;
279 
280 	tg = zalloc_flags(tg_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
281 	assert((uintptr_t)tg % TG_MACHINE_DATA_ALIGN_SIZE == 0);
282 
283 	tg->tg_flags = flags;
284 
285 #if CONFIG_SCHED_CLUTCH
286 	/*
287 	 * The clutch scheduler maintains a bunch of runqs per thread group. For
288 	 * each thread group it maintains a sched_clutch structure. The lifetime
289 	 * of that structure is tied directly to the lifetime of the thread group.
290 	 */
291 	sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
292 
293 	/*
294 	 * Since the thread group flags are used to determine any priority promotions
295 	 * for the threads in the thread group, initialize them now.
296 	 */
297 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
298 
299 #endif /* CONFIG_SCHED_CLUTCH */
300 
301 	lck_mtx_lock(&tg_lock);
302 	tg->tg_id = tg_next_id++;
303 	tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
304 	os_ref_init(&tg->tg_refcount, NULL);
305 	tg_count++;
306 	enqueue_tail(&tg_queue, &tg->tg_queue_chain);
307 
308 	// call machine layer init before this thread group becomes visible
309 	machine_thread_group_init(tg);
310 	lck_mtx_unlock(&tg_lock);
311 
312 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), thread_group_id(tg), thread_group_get_flags(tg));
313 	if (flags) {
314 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS), thread_group_id(tg), thread_group_get_flags(tg), 0);
315 	}
316 
317 	return tg;
318 }
319 
320 /*
321  * Point newly created thread to its home thread group
322  */
323 void
thread_group_init_thread(thread_t t,task_t task)324 thread_group_init_thread(thread_t t, task_t task)
325 {
326 	struct thread_group *tg = task_coalition_get_thread_group(task);
327 	t->thread_group = tg;
328 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
329 	    THREAD_GROUP_INVALID, thread_group_id(tg), (uintptr_t)thread_tid(t));
330 }
331 
332 /*
333  * Set thread group name
334  */
335 void
thread_group_set_name(__unused struct thread_group * tg,__unused const char * name)336 thread_group_set_name(__unused struct thread_group *tg, __unused const char *name)
337 {
338 	if (name == NULL) {
339 		return;
340 	}
341 	if (!thread_group_retain_try(tg)) {
342 		return;
343 	}
344 	if (name[0] != '\0') {
345 		strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
346 #if defined(__LP64__)
347 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
348 		    tg->tg_id,
349 		    *(uint64_t*)(void*)&tg->tg_name[0],
350 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
351 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
352 		    );
353 #else /* defined(__LP64__) */
354 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
355 		    tg->tg_id,
356 		    *(uint32_t*)(void*)&tg->tg_name[0],
357 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
358 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
359 		    );
360 #endif /* defined(__LP64__) */
361 	}
362 	thread_group_release(tg);
363 }
364 
365 void
thread_group_set_flags(struct thread_group * tg,uint32_t flags)366 thread_group_set_flags(struct thread_group *tg, uint32_t flags)
367 {
368 	thread_group_flags_update_lock();
369 	thread_group_set_flags_locked(tg, flags);
370 	thread_group_flags_update_unlock();
371 }
372 
373 /*
374  * Return true if flags are valid, false otherwise.
375  * Some flags are mutually exclusive.
376  */
377 boolean_t
thread_group_valid_flags(uint32_t flags)378 thread_group_valid_flags(uint32_t flags)
379 {
380 	const uint32_t sflags = flags & ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
381 	const uint32_t eflags = flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
382 
383 	if ((sflags & THREAD_GROUP_FLAGS_SHARED) != sflags) {
384 		return false;
385 	}
386 
387 	if ((eflags & THREAD_GROUP_FLAGS_EXCLUSIVE) != eflags) {
388 		return false;
389 	}
390 
391 	/* Only one of the exclusive flags may be set. */
392 	if (((eflags - 1) & eflags) != 0) {
393 		return false;
394 	}
395 
396 	return true;
397 }
398 
399 void
thread_group_clear_flags(struct thread_group * tg,uint32_t flags)400 thread_group_clear_flags(struct thread_group *tg, uint32_t flags)
401 {
402 	thread_group_flags_update_lock();
403 	thread_group_clear_flags_locked(tg, flags);
404 	thread_group_flags_update_unlock();
405 }
406 
407 /*
408  * Set thread group flags and perform related actions.
409  * The tg_flags_update_lock should be held.
410  * Currently supported flags are:
411  * Exclusive Flags:
412  * - THREAD_GROUP_FLAGS_EFFICIENT
413  * - THREAD_GROUP_FLAGS_APPLICATION
414  * - THREAD_GROUP_FLAGS_CRITICAL
415  * Shared Flags:
416  * - THREAD_GROUP_FLAGS_UI_APP
417  */
418 
419 void
thread_group_set_flags_locked(struct thread_group * tg,uint32_t flags)420 thread_group_set_flags_locked(struct thread_group *tg, uint32_t flags)
421 {
422 	if (!thread_group_valid_flags(flags)) {
423 		panic("thread_group_set_flags: Invalid flags %u", flags);
424 	}
425 
426 	/* Disallow any exclusive flags from being set after creation, with the
427 	 * exception of moving from default to application */
428 	if ((flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) &&
429 	    !((flags & THREAD_GROUP_FLAGS_APPLICATION) &&
430 	    (tg->tg_flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) ==
431 	    THREAD_GROUP_FLAGS_DEFAULT)) {
432 		flags &= ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
433 	}
434 	if ((tg->tg_flags & flags) == flags) {
435 		return;
436 	}
437 
438 	if (tg == tg_system) {
439 		/*
440 		 * The system TG is used for kernel and launchd. It is also used
441 		 * for processes which are getting spawned and do not have a home
442 		 * TG yet (see task_coalition_get_thread_group()). Make sure the
443 		 * policies for those processes do not update the flags for the
444 		 * system TG. The flags for this thread group should only be set
445 		 * at creation via thread_group_create_and_retain().
446 		 */
447 		return;
448 	}
449 
450 	__kdebug_only uint64_t old_flags = tg->tg_flags;
451 	tg->tg_flags |= flags;
452 
453 	machine_thread_group_flags_update(tg, tg->tg_flags);
454 #if CONFIG_SCHED_CLUTCH
455 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
456 #endif /* CONFIG_SCHED_CLUTCH */
457 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
458 	    tg->tg_id, tg->tg_flags, old_flags);
459 }
460 
461 /*
462  * Clear thread group flags and perform related actions
463  * The tg_flags_update_lock should be held.
464  * Currently supported flags are:
465  * Exclusive Flags:
466  * - THREAD_GROUP_FLAGS_EFFICIENT
467  * - THREAD_GROUP_FLAGS_APPLICATION
468  * - THREAD_GROUP_FLAGS_CRITICAL
469  * Shared Flags:
470  * - THREAD_GROUP_FLAGS_UI_APP
471  */
472 
473 void
thread_group_clear_flags_locked(struct thread_group * tg,uint32_t flags)474 thread_group_clear_flags_locked(struct thread_group *tg, uint32_t flags)
475 {
476 	if (!thread_group_valid_flags(flags)) {
477 		panic("thread_group_clear_flags: Invalid flags %u", flags);
478 	}
479 
480 	/* Disallow any exclusive flags from being cleared */
481 	if (flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) {
482 		flags &= ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
483 	}
484 	if ((tg->tg_flags & flags) == 0) {
485 		return;
486 	}
487 
488 	__kdebug_only uint64_t old_flags = tg->tg_flags;
489 	tg->tg_flags &= ~flags;
490 #if CONFIG_SCHED_CLUTCH
491 	sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
492 #endif /* CONFIG_SCHED_CLUTCH */
493 	machine_thread_group_flags_update(tg, tg->tg_flags);
494 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
495 	    tg->tg_id, tg->tg_flags, old_flags);
496 }
497 
498 
499 
500 /*
501  * Find thread group with specified name and put new reference to it.
502  */
503 struct thread_group *
thread_group_find_by_name_and_retain(char * name)504 thread_group_find_by_name_and_retain(char *name)
505 {
506 	struct thread_group *result = NULL;
507 
508 	if (name == NULL) {
509 		return NULL;
510 	}
511 
512 	if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
513 		return thread_group_retain(tg_system);
514 	} else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
515 		return thread_group_retain(tg_background);
516 	} else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
517 		return thread_group_retain(tg_perf_controller);
518 	}
519 
520 	struct thread_group *tg;
521 	lck_mtx_lock(&tg_lock);
522 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
523 		if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
524 		    thread_group_retain_try(tg)) {
525 			result = tg;
526 			break;
527 		}
528 	}
529 	lck_mtx_unlock(&tg_lock);
530 	return result;
531 }
532 
533 /*
534  * Find thread group with specified ID and add new reference to it.
535  */
536 struct thread_group *
thread_group_find_by_id_and_retain(uint64_t id)537 thread_group_find_by_id_and_retain(uint64_t id)
538 {
539 	struct thread_group *tg = NULL;
540 	struct thread_group *result = NULL;
541 
542 	switch (id) {
543 	case THREAD_GROUP_SYSTEM:
544 		result = tg_system;
545 		thread_group_retain(tg_system);
546 		break;
547 	case THREAD_GROUP_BACKGROUND:
548 		result = tg_background;
549 		thread_group_retain(tg_background);
550 		break;
551 	case THREAD_GROUP_VM:
552 		result = tg_vm;
553 		thread_group_retain(tg_vm);
554 		break;
555 	case THREAD_GROUP_IO_STORAGE:
556 		result = tg_io_storage;
557 		thread_group_retain(tg_io_storage);
558 		break;
559 	case THREAD_GROUP_PERF_CONTROLLER:
560 		result = tg_perf_controller;
561 		thread_group_retain(tg_perf_controller);
562 		break;
563 	default:
564 		lck_mtx_lock(&tg_lock);
565 		qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
566 			if (tg->tg_id == id && thread_group_retain_try(tg)) {
567 				result = tg;
568 				break;
569 			}
570 		}
571 		lck_mtx_unlock(&tg_lock);
572 	}
573 	return result;
574 }
575 
576 /*
577  * Add new reference to specified thread group
578  */
579 struct thread_group *
thread_group_retain(struct thread_group * tg)580 thread_group_retain(struct thread_group *tg)
581 {
582 	os_ref_retain(&tg->tg_refcount);
583 	return tg;
584 }
585 
586 /*
587  * Similar to thread_group_retain, but fails for thread groups with a
588  * zero reference count. Returns true if retained successfully.
589  */
590 static bool
thread_group_retain_try(struct thread_group * tg)591 thread_group_retain_try(struct thread_group *tg)
592 {
593 	return os_ref_retain_try(&tg->tg_refcount);
594 }
595 
596 static void
thread_group_deallocate_complete(struct thread_group * tg)597 thread_group_deallocate_complete(struct thread_group *tg)
598 {
599 	lck_mtx_lock(&tg_lock);
600 	tg_count--;
601 	remqueue(&tg->tg_queue_chain);
602 	lck_mtx_unlock(&tg_lock);
603 	static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 3), "thread group name is too short");
604 	static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
605 #if defined(__LP64__)
606 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
607 	    tg->tg_id,
608 	    *(uint64_t*)(void*)&tg->tg_name[0],
609 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
610 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
611 	    );
612 #else /* defined(__LP64__) */
613 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
614 	    tg->tg_id,
615 	    *(uint32_t*)(void*)&tg->tg_name[0],
616 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
617 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
618 	    );
619 #endif /* defined(__LP64__) */
620 	machine_thread_group_deinit(tg);
621 #if CONFIG_SCHED_CLUTCH
622 	sched_clutch_destroy(&(tg->tg_sched_clutch));
623 #endif /* CONFIG_SCHED_CLUTCH */
624 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
625 	zfree(tg_zone, tg);
626 }
627 
628 /*
629  * Drop a reference to specified thread group
630  */
631 void
thread_group_release(struct thread_group * tg)632 thread_group_release(struct thread_group *tg)
633 {
634 	if (os_ref_release(&tg->tg_refcount) == 0) {
635 		thread_group_deallocate_complete(tg);
636 	}
637 }
638 
639 void
thread_group_release_live(struct thread_group * tg)640 thread_group_release_live(struct thread_group *tg)
641 {
642 	os_ref_release_live(&tg->tg_refcount);
643 }
644 
645 static void
thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)646 thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e, __assert_only mpsc_daemon_queue_t dq)
647 {
648 	assert(dq == &thread_group_deallocate_queue);
649 	struct thread_group *tg = mpsc_queue_element(e, struct thread_group, tg_destroy_link);
650 
651 	thread_group_deallocate_complete(tg);
652 }
653 
654 void
thread_group_deallocate_safe(struct thread_group * tg)655 thread_group_deallocate_safe(struct thread_group *tg)
656 {
657 	if (os_ref_release(&tg->tg_refcount) == 0) {
658 		mpsc_daemon_enqueue(&thread_group_deallocate_queue, &tg->tg_destroy_link,
659 		    MPSC_QUEUE_NONE);
660 	}
661 }
662 
663 /*
664  * Get thread's current thread group
665  */
666 inline struct thread_group *
thread_group_get(thread_t t)667 thread_group_get(thread_t t)
668 {
669 	return t->thread_group;
670 }
671 
672 struct thread_group *
thread_group_get_home_group(thread_t t)673 thread_group_get_home_group(thread_t t)
674 {
675 	return task_coalition_get_thread_group(get_threadtask(t));
676 }
677 
678 /*
679  * The thread group is resolved according to a hierarchy:
680  *
681  * 1) work interval specified group (explicit API)
682  * 2) Auto-join thread group (wakeup tracking for special work intervals)
683  * 3) bank voucher carried group (implicitly set)
684  * 4) Preadopt thread group (if any)
685  * 5) coalition default thread group (ambient)
686  *
687  * Returns true if the thread's thread group needs to be changed and resolving
688  * TG is passed through in-out param. See also
689  * thread_mark_thread_group_hierarchy_resolved and
690  * thread_set_resolved_thread_group
691  *
692  * Caller should have thread lock. Interrupts are disabled. Thread doesn't have
693  * to be self
694  */
695 static bool
thread_compute_resolved_thread_group(thread_t t,struct thread_group ** resolved_tg)696 thread_compute_resolved_thread_group(thread_t t, struct thread_group **resolved_tg)
697 {
698 	struct thread_group *cur_tg, *tg;
699 	cur_tg = t->thread_group;
700 
701 	tg = thread_group_get_home_group(t);
702 
703 #if CONFIG_PREADOPT_TG
704 	if (t->preadopt_thread_group) {
705 		tg = t->preadopt_thread_group;
706 	}
707 #endif
708 	if (t->bank_thread_group) {
709 		tg = t->bank_thread_group;
710 	}
711 
712 	if (t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN) {
713 		if (t->auto_join_thread_group) {
714 			tg = t->auto_join_thread_group;
715 		}
716 	} else {
717 		if (t->work_interval_thread_group) {
718 			tg = t->work_interval_thread_group;
719 		}
720 	}
721 
722 	*resolved_tg = tg;
723 	return tg != cur_tg;
724 }
725 
726 #if CONFIG_PREADOPT_TG
727 
728 /*
729  * This function is always called after the hierarchy has been resolved. The
730  * caller holds the thread lock
731  */
732 static inline void
thread_assert_has_valid_thread_group(thread_t t)733 thread_assert_has_valid_thread_group(thread_t t)
734 {
735 	__assert_only struct thread_group *home_tg = thread_group_get_home_group(t);
736 
737 	assert(thread_get_reevaluate_tg_hierarchy_locked(t) == false);
738 
739 	__assert_only struct thread_group *resolved_tg;
740 	assert(thread_compute_resolved_thread_group(t, &resolved_tg) == false);
741 
742 	assert((t->thread_group == home_tg) ||
743 	    (t->thread_group == t->preadopt_thread_group) ||
744 	    (t->thread_group == t->bank_thread_group) ||
745 	    (t->thread_group == t->auto_join_thread_group) ||
746 	    (t->thread_group == t->work_interval_thread_group));
747 }
748 #endif
749 
750 /*
751  * This function is called when the thread group hierarchy on the thread_t is
752  * resolved and t->thread_group is the result of the hierarchy resolution. Once
753  * this has happened, there is state that needs to be cleared up which is
754  * handled by this function.
755  *
756  * Prior to this call, we should have either
757  * a) Resolved the hierarchy and discovered no change needed
758  * b) Resolved the hierarchy and modified the t->thread_group
759  */
760 static void
thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)761 thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)
762 {
763 #if CONFIG_PREADOPT_TG
764 	/*
765 	 * We have just reevaluated the thread's hierarchy so we don't need to do it
766 	 * again later.
767 	 */
768 	thread_clear_reevaluate_tg_hierarchy_locked(t);
769 
770 	/*
771 	 * Clear the old_preadopt_thread_group field whose sole purpose was to make
772 	 * sure that t->thread_group didn't have a dangling pointer.
773 	 */
774 	thread_assert_has_valid_thread_group(t);
775 
776 	if (t->old_preadopt_thread_group) {
777 		thread_group_deallocate_safe(t->old_preadopt_thread_group);
778 		t->old_preadopt_thread_group = NULL;
779 	}
780 #endif
781 }
782 
783 /*
784  * Called with thread lock held, always called on self.  This function simply
785  * moves the thread to the right clutch scheduler bucket and informs CLPC of the
786  * change
787  */
788 static void
thread_notify_thread_group_change_self(thread_t t,struct thread_group * __unused old_tg,struct thread_group * __unused new_tg)789 thread_notify_thread_group_change_self(thread_t t, struct thread_group * __unused old_tg,
790     struct thread_group * __unused new_tg)
791 {
792 	assert(current_thread() == t);
793 	assert(old_tg != new_tg);
794 	assert(t->thread_group == new_tg);
795 
796 	uint64_t ctime = mach_approximate_time();
797 	uint64_t arg1, arg2;
798 	machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
799 	machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
800 }
801 
802 /*
803  * Called on any thread with thread lock. Updates the thread_group field on the
804  * thread with the resolved thread group and always make necessary clutch
805  * scheduler callouts. If the thread group is being modified on self,
806  * then also make necessary CLPC callouts.
807  */
808 static void
thread_set_resolved_thread_group(thread_t t,struct thread_group * old_tg,struct thread_group * resolved_tg,bool on_self)809 thread_set_resolved_thread_group(thread_t t, struct thread_group *old_tg,
810     struct thread_group *resolved_tg, bool on_self)
811 {
812 	t->thread_group = resolved_tg;
813 
814 	/* Thread is either running already or is runnable but not on a runqueue */
815 	assert((t->state & (TH_RUN | TH_IDLE)) == TH_RUN);
816 	assert(t->runq == PROCESSOR_NULL);
817 
818 	struct thread_group *home_tg = thread_group_get_home_group(t);
819 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
820 	    thread_group_id(old_tg), thread_group_id(resolved_tg),
821 	    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
822 
823 #if CONFIG_PREADOPT_TG
824 	if (resolved_tg == t->preadopt_thread_group) {
825 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
826 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
827 		    thread_tid(t), thread_group_id(home_tg));
828 	}
829 #endif
830 
831 #if CONFIG_SCHED_CLUTCH
832 	sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
833 	sched_clutch_t new_clutch = (resolved_tg) ? &(resolved_tg->tg_sched_clutch) : NULL;
834 	if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
835 		sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
836 	}
837 #endif
838 
839 	if (on_self) {
840 		assert(t == current_thread());
841 		thread_notify_thread_group_change_self(t, old_tg, resolved_tg);
842 	}
843 
844 	thread_mark_thread_group_hierarchy_resolved(t);
845 }
846 
847 /* Caller has thread lock. Always called on self */
848 static void
thread_resolve_thread_group_hierarchy_self_locked(thread_t t,__unused bool clear_preadopt)849 thread_resolve_thread_group_hierarchy_self_locked(thread_t t, __unused bool clear_preadopt)
850 {
851 	assert(current_thread() == t);
852 
853 #if CONFIG_PREADOPT_TG
854 	struct thread_group *preadopt_tg = NULL;
855 	if (clear_preadopt) {
856 		if (t->preadopt_thread_group) {
857 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
858 			    (uintptr_t)thread_tid(t), thread_group_id(t->preadopt_thread_group), 0, 0);
859 
860 			preadopt_tg = t->preadopt_thread_group;
861 			t->preadopt_thread_group = NULL;
862 		}
863 	}
864 #endif
865 
866 	struct thread_group *resolved_tg = NULL;
867 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
868 
869 	if (needs_change) {
870 		struct thread_group *old_tg = t->thread_group;
871 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
872 	}
873 
874 	/*
875 	 * Regardless of whether we modified the t->thread_group above or not, the
876 	 * hierarchy is now resolved
877 	 */
878 	thread_mark_thread_group_hierarchy_resolved(t);
879 
880 #if CONFIG_PREADOPT_TG
881 	if (preadopt_tg) {
882 		thread_group_deallocate_safe(preadopt_tg);
883 	}
884 #endif
885 }
886 
887 /*
888  * Caller has thread lock, never called on self, always called on a thread not
889  * on a runqueue. This is called from sched_prim.c. Counter part for calling on
890  * self is thread_resolve_thread_group_hierarchy_self
891  */
892 #if CONFIG_PREADOPT_TG
893 void
thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)894 thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)
895 {
896 	assert(t != current_thread());
897 	assert(t->runq == NULL);
898 
899 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
900 		struct thread_group *resolved_tg = NULL;
901 
902 		bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
903 		if (needs_change) {
904 			struct thread_group *old_tg = t->thread_group;
905 			thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
906 		}
907 
908 		/*
909 		 * Regardless of whether we modified the t->thread_group above or not,
910 		 * the hierarchy is now resolved
911 		 */
912 		thread_mark_thread_group_hierarchy_resolved(t);
913 	}
914 }
915 #endif
916 
917 #if CONFIG_PREADOPT_TG
918 /*
919  * The thread being passed can be the current thread and it can also be another
920  * thread which is running on another core. This function is called with spin
921  * locks held (kq and wq lock) but the thread lock is not held by caller.
922  *
923  * The thread always takes a +1 on the thread group and will release the
924  * previous preadoption thread group's reference or stash it.
925  */
926 void
thread_set_preadopt_thread_group(thread_t t,struct thread_group * tg)927 thread_set_preadopt_thread_group(thread_t t, struct thread_group *tg)
928 {
929 	spl_t s = splsched();
930 	thread_lock(t);
931 
932 	/*
933 	 * Assert that this is never called on WindowServer when it has already
934 	 * issued a block callout to CLPC.
935 	 *
936 	 * This should never happen because we don't ever call
937 	 * thread_set_preadopt_thread_group on a servicer after going out to
938 	 * userspace unless we are doing so to/after an unbind
939 	 */
940 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
941 
942 	struct thread_group *old_tg = t->thread_group;
943 	struct thread_group *home_tg = thread_group_get_home_group(t);
944 
945 	/*
946 	 * Since the preadoption thread group can disappear from under you, we need
947 	 * to make sure that the thread_group pointer is always pointing to valid
948 	 * memory.
949 	 *
950 	 * We run the risk of the thread group pointer pointing to dangling memory
951 	 * when the following happens:
952 	 *
953 	 * a) We update the preadopt_thread_group
954 	 * b) We resolve hierarchy and need to change the resolved_thread_group
955 	 * c) For some reason, we are not able to do so and we need to set the
956 	 * resolved thread group later.
957 	 */
958 
959 	/* take the ref from the thread */
960 	struct thread_group *old_preadopt_tg = t->preadopt_thread_group;
961 
962 	if (tg == NULL) {
963 		t->preadopt_thread_group = NULL;
964 		if (old_preadopt_tg != NULL) {
965 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
966 			    thread_tid(t), thread_group_id(old_preadopt_tg), 0, 0);
967 		}
968 	} else {
969 		t->preadopt_thread_group = thread_group_retain(tg);
970 	}
971 
972 	struct thread_group *resolved_tg = NULL;
973 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
974 	if (!needs_change) {
975 		/*
976 		 * Setting preadoption thread group didn't change anything, simply mark
977 		 * the hierarchy as resolved and exit.
978 		 */
979 		thread_mark_thread_group_hierarchy_resolved(t);
980 		goto out;
981 	}
982 
983 	if (t != current_thread()) {
984 		/*
985 		 * We're modifying the thread group of another thread, we need to take
986 		 * action according to the state of the other thread.
987 		 *
988 		 * If the thread is runnable and not yet running, try removing it from
989 		 * the runq, modify it's TG and then reinsert it for reevaluation. If it
990 		 * isn't runnable (already running or started running concurrently, or
991 		 * if it is waiting), then mark a bit having the thread reevaluate its
992 		 * own hierarchy the next time it is being inserted into a runq
993 		 */
994 		if ((t->state & TH_RUN) && (t->runq != PROCESSOR_NULL)) {
995 			/* Thread is runnable but not running */
996 
997 			bool removed_from_runq = thread_run_queue_remove(t);
998 			if (removed_from_runq) {
999 				thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
1000 
1001 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
1002 				    thread_group_id(old_tg), thread_group_id(tg),
1003 				    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1004 
1005 				thread_run_queue_reinsert(t, SCHED_TAILQ);
1006 			} else {
1007 				/*
1008 				 * We failed to remove it from the runq - it probably started
1009 				 * running, let the thread reevaluate the next time it gets
1010 				 * enqueued on a runq
1011 				 */
1012 				thread_set_reevaluate_tg_hierarchy_locked(t);
1013 
1014 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
1015 				    thread_group_id(old_tg), thread_group_id(tg),
1016 				    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1017 			}
1018 		} else {
1019 			/*
1020 			 * The thread is not runnable or it is running already - let the
1021 			 * thread reevaluate the next time it gets enqueued on a runq
1022 			 */
1023 			thread_set_reevaluate_tg_hierarchy_locked(t);
1024 
1025 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
1026 			    thread_group_id(old_tg), thread_group_id(tg),
1027 			    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1028 		}
1029 	} else {
1030 		/* We're modifying thread group on ourselves */
1031 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
1032 
1033 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
1034 		    thread_group_id(old_tg), thread_group_id(tg),
1035 		    thread_tid(t), thread_group_id(home_tg));
1036 	}
1037 
1038 out:
1039 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
1040 		assert(t->thread_group == old_tg);
1041 		/*
1042 		 * We need to reevaluate TG hierarchy later as a result of this
1043 		 * `thread_set_preadopt_thread_group` operation. This means that the
1044 		 * thread group on the thread was pointing to either the home thread
1045 		 * group, the preadoption thread group we just replaced, or the old
1046 		 * preadoption thread group stashed on the thread.
1047 		 */
1048 		assert(t->thread_group == home_tg ||
1049 		    t->thread_group == old_preadopt_tg ||
1050 		    t->old_preadopt_thread_group);
1051 
1052 		if (t->thread_group == old_preadopt_tg) {
1053 			/*
1054 			 * t->thread_group is pointing to the preadopt thread group we just
1055 			 * replaced. This means the hierarchy was resolved before this call.
1056 			 * Assert that there was no old_preadopt_thread_group on the thread.
1057 			 */
1058 			assert(t->old_preadopt_thread_group == NULL);
1059 			/*
1060 			 * Since t->thread_group is still pointing to the old preadopt thread
1061 			 * group - we need to keep it alive until we reevaluate the hierarchy
1062 			 * next
1063 			 */
1064 			t->old_preadopt_thread_group = old_tg; // transfer ref back to thread
1065 		} else if (old_preadopt_tg != NULL) {
1066 			thread_group_deallocate_safe(old_preadopt_tg);
1067 		}
1068 	} else {
1069 		/* We resolved the hierarchy just now */
1070 		thread_assert_has_valid_thread_group(t);
1071 
1072 		/*
1073 		 * We don't need the old preadopt thread group that we stashed in our
1074 		 * local variable, drop it.
1075 		 */
1076 		if (old_preadopt_tg) {
1077 			thread_group_deallocate_safe(old_preadopt_tg);
1078 		}
1079 	}
1080 	thread_unlock(t);
1081 	splx(s);
1082 	return;
1083 }
1084 
1085 #endif
1086 
1087 /*
1088  * thread_set_thread_group()
1089  *
1090  * Caller must guarantee lifetime of the thread group for the life of the call -
1091  * this overrides the thread group without going through the hierarchy
1092  * resolution. This is for special thread groups like the VM and IO thread
1093  * groups only.
1094  */
1095 static void
thread_set_thread_group(thread_t t,struct thread_group * tg)1096 thread_set_thread_group(thread_t t, struct thread_group *tg)
1097 {
1098 	struct thread_group *home_tg = thread_group_get_home_group(t);
1099 	struct thread_group *old_tg = NULL;
1100 
1101 	spl_t s = splsched();
1102 	old_tg = t->thread_group;
1103 
1104 	if (old_tg != tg) {
1105 		thread_lock(t);
1106 
1107 		assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1108 		t->thread_group = tg;
1109 
1110 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1111 		    thread_group_id(old_tg), thread_group_id(tg),
1112 		    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1113 
1114 		thread_notify_thread_group_change_self(t, old_tg, tg);
1115 
1116 		thread_unlock(t);
1117 	}
1118 
1119 	splx(s);
1120 }
1121 
1122 /* Called without the thread lock held, called on current thread */
1123 void
thread_group_set_bank(thread_t t,struct thread_group * tg)1124 thread_group_set_bank(thread_t t, struct thread_group *tg)
1125 {
1126 	assert(current_thread() == t);
1127 	/* boot arg disables groups in bank */
1128 	if (tg_set_by_bankvoucher == FALSE) {
1129 		return;
1130 	}
1131 
1132 	spl_t s = splsched();
1133 	thread_lock(t);
1134 
1135 	/* This is a borrowed reference from the current bank voucher */
1136 	t->bank_thread_group = tg;
1137 
1138 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1139 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1140 
1141 	thread_unlock(t);
1142 	splx(s);
1143 }
1144 
1145 #if CONFIG_SCHED_AUTO_JOIN
1146 /*
1147  * thread_group_set_autojoin_thread_group_locked()
1148  *
1149  * Sets the thread group of a thread based on auto-join rules and reevaluates
1150  * the hierarchy.
1151  *
1152  * Preconditions:
1153  * - Thread must not be part of a runq (freshly made runnable threads or terminating only)
1154  * - Thread must be locked by the caller already
1155  */
1156 void
thread_set_autojoin_thread_group_locked(thread_t t,struct thread_group * tg)1157 thread_set_autojoin_thread_group_locked(thread_t t, struct thread_group *tg)
1158 {
1159 	assert(t->runq == PROCESSOR_NULL);
1160 
1161 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1162 	t->auto_join_thread_group = tg;
1163 
1164 	struct thread_group *resolved_tg = NULL;
1165 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
1166 
1167 	if (needs_change) {
1168 		struct thread_group *old_tg = t->thread_group;
1169 		struct thread_group *home_tg = thread_group_get_home_group(t);
1170 
1171 		t->thread_group = resolved_tg;
1172 
1173 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1174 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
1175 		    thread_tid(t), thread_group_id(home_tg));
1176 		/*
1177 		 * If the thread group is being changed for the current thread, callout
1178 		 * to CLPC to update the thread's information at that layer. This makes
1179 		 * sure CLPC has consistent state when the current thread is going
1180 		 * off-core.
1181 		 *
1182 		 * Note that we are passing in the PERFCONTROL_CALLOUT_WAKE_UNSAFE flag
1183 		 * to CLPC here (as opposed to 0 in thread_notify_thread_group_change_self)
1184 		 */
1185 		if (t == current_thread()) {
1186 			uint64_t ctime = mach_approximate_time();
1187 			uint64_t arg1, arg2;
1188 			machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
1189 			machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
1190 		}
1191 	}
1192 
1193 	thread_mark_thread_group_hierarchy_resolved(t);
1194 }
1195 #endif
1196 
1197 /* Thread is not locked. Thread is self */
1198 void
thread_set_work_interval_thread_group(thread_t t,struct thread_group * tg)1199 thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg)
1200 {
1201 	assert(current_thread() == t);
1202 	assert(!(t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN));
1203 
1204 	/*
1205 	 * We have a work interval, we don't need the preadoption thread group
1206 	 * anymore (ie, it shouldn't be available for us to jump back to it after
1207 	 * the thread leaves the work interval)
1208 	 */
1209 	spl_t s = splsched();
1210 	thread_lock(t);
1211 
1212 	t->work_interval_thread_group = tg;
1213 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1214 
1215 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1216 
1217 	thread_unlock(t);
1218 	splx(s);
1219 }
1220 
1221 inline cluster_type_t
thread_group_recommendation(struct thread_group * tg)1222 thread_group_recommendation(struct thread_group *tg)
1223 {
1224 	if (tg == NULL) {
1225 		return CLUSTER_TYPE_SMP;
1226 	} else {
1227 		return tg->tg_recommendation;
1228 	}
1229 }
1230 
1231 inline uint64_t
thread_group_get_id(struct thread_group * tg)1232 thread_group_get_id(struct thread_group *tg)
1233 {
1234 	return tg->tg_id;
1235 }
1236 
1237 uint32_t
thread_group_count(void)1238 thread_group_count(void)
1239 {
1240 	return tg_count;
1241 }
1242 
1243 /*
1244  * Can only be called while tg cannot be destroyed
1245  */
1246 inline const char*
thread_group_get_name(struct thread_group * tg)1247 thread_group_get_name(struct thread_group *tg)
1248 {
1249 	return tg->tg_name;
1250 }
1251 
1252 inline void *
thread_group_get_machine_data(struct thread_group * tg)1253 thread_group_get_machine_data(struct thread_group *tg)
1254 {
1255 	return &tg->tg_machine_data;
1256 }
1257 
1258 inline uint32_t
thread_group_machine_data_size(void)1259 thread_group_machine_data_size(void)
1260 {
1261 	return tg_machine_data_size;
1262 }
1263 
1264 inline boolean_t
thread_group_uses_immediate_ipi(struct thread_group * tg)1265 thread_group_uses_immediate_ipi(struct thread_group *tg)
1266 {
1267 	return thread_group_get_id(tg) == THREAD_GROUP_PERF_CONTROLLER && perf_controller_thread_group_immediate_ipi != 0;
1268 }
1269 
1270 kern_return_t
thread_group_iterate_stackshot(thread_group_iterate_fn_t callout,void * arg)1271 thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
1272 {
1273 	struct thread_group *tg;
1274 	int i = 0;
1275 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
1276 		if (tg == NULL || !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
1277 			return KERN_FAILURE;
1278 		}
1279 		callout(arg, i, tg);
1280 		i++;
1281 	}
1282 	return KERN_SUCCESS;
1283 }
1284 
1285 void
thread_group_join_io_storage(void)1286 thread_group_join_io_storage(void)
1287 {
1288 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
1289 	assert(tg != NULL);
1290 	thread_set_thread_group(current_thread(), tg);
1291 }
1292 
1293 void
thread_group_join_perf_controller(void)1294 thread_group_join_perf_controller(void)
1295 {
1296 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
1297 	assert(tg != NULL);
1298 	thread_set_thread_group(current_thread(), tg);
1299 }
1300 
1301 void
thread_group_vm_add(void)1302 thread_group_vm_add(void)
1303 {
1304 	assert(tg_vm != NULL);
1305 	thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM));
1306 }
1307 
1308 uint32_t
thread_group_get_flags(struct thread_group * tg)1309 thread_group_get_flags(struct thread_group *tg)
1310 {
1311 	return tg->tg_flags;
1312 }
1313 
1314 void
thread_group_update_recommendation(struct thread_group * tg,cluster_type_t new_recommendation)1315 thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
1316 {
1317 	/*
1318 	 * Since the tg->tg_recommendation field is read by CPUs trying to determine
1319 	 * where a thread/thread group needs to be placed, it is important to use
1320 	 * atomic operations to update the recommendation.
1321 	 */
1322 	os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
1323 }
1324 
1325 #if CONFIG_SCHED_EDGE
1326 
1327 int sched_edge_restrict_ut = 1;
1328 int sched_edge_restrict_bg = 1;
1329 
1330 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1331 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1332 {
1333 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1334 	/*
1335 	 * CLUSTER_TYPE_SMP was used for some debugging support when CLPC dynamic control was turned off.
1336 	 * In more recent implementations, CLPC simply recommends "P-spill" when dynamic control is turned off. So it should
1337 	 * never be recommending CLUSTER_TYPE_SMP for thread groups.
1338 	 */
1339 	assert(new_recommendation != CLUSTER_TYPE_SMP);
1340 	/*
1341 	 * The Edge scheduler expects preferred cluster recommendations for each QoS level within a TG. Until the new CLPC
1342 	 * routine is being called, fake out the call from the old CLPC interface.
1343 	 */
1344 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
1345 	/*
1346 	 * For all buckets higher than UT, apply the recommendation to the thread group bucket
1347 	 */
1348 	for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SHARE_UT; bucket++) {
1349 		tg_bucket_preferred_cluster[bucket] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1350 	}
1351 	/* For UT & BG QoS, set the recommendation only if they havent been restricted via sysctls */
1352 	if (!sched_edge_restrict_ut) {
1353 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_UT] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1354 	}
1355 	if (!sched_edge_restrict_bg) {
1356 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_BG] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1357 	}
1358 	sched_perfcontrol_preferred_cluster_options_t options = 0;
1359 	if (new_recommendation == CLUSTER_TYPE_P) {
1360 		options |= SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING;
1361 	}
1362 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1363 }
1364 
1365 void
sched_perfcontrol_edge_matrix_get(sched_clutch_edge * edge_matrix,bool * edge_request_bitmap,uint64_t flags,uint64_t matrix_order)1366 sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t matrix_order)
1367 {
1368 	sched_edge_matrix_get(edge_matrix, edge_request_bitmap, flags, matrix_order);
1369 }
1370 
1371 void
sched_perfcontrol_edge_matrix_set(sched_clutch_edge * edge_matrix,bool * edge_changes_bitmap,uint64_t flags,uint64_t matrix_order)1372 sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t matrix_order)
1373 {
1374 	sched_edge_matrix_set(edge_matrix, edge_changes_bitmap, flags, matrix_order);
1375 }
1376 
1377 void
sched_perfcontrol_thread_group_preferred_clusters_set(void * machine_data,uint32_t tg_preferred_cluster,uint32_t overrides[PERFCONTROL_CLASS_MAX],sched_perfcontrol_preferred_cluster_options_t options)1378 sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
1379     uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
1380 {
1381 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1382 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {
1383 		[TH_BUCKET_FIXPRI]   = (overrides[PERFCONTROL_CLASS_ABOVEUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_ABOVEUI] : tg_preferred_cluster,
1384 		[TH_BUCKET_SHARE_FG] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1385 		[TH_BUCKET_SHARE_IN] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1386 		[TH_BUCKET_SHARE_DF] = (overrides[PERFCONTROL_CLASS_NONUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_NONUI] : tg_preferred_cluster,
1387 		[TH_BUCKET_SHARE_UT] = (overrides[PERFCONTROL_CLASS_UTILITY] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UTILITY] : tg_preferred_cluster,
1388 		[TH_BUCKET_SHARE_BG] = (overrides[PERFCONTROL_CLASS_BACKGROUND] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_BACKGROUND] : tg_preferred_cluster,
1389 	};
1390 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1391 }
1392 
1393 #else /* CONFIG_SCHED_EDGE */
1394 
1395 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1396 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1397 {
1398 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1399 	SCHED(thread_group_recommendation_change)(tg, new_recommendation);
1400 }
1401 
1402 void
sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_request_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1403 sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1404 {
1405 }
1406 
1407 void
sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_changes_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1408 sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1409 {
1410 }
1411 
1412 void
sched_perfcontrol_thread_group_preferred_clusters_set(__unused void * machine_data,__unused uint32_t tg_preferred_cluster,__unused uint32_t overrides[PERFCONTROL_CLASS_MAX],__unused sched_perfcontrol_preferred_cluster_options_t options)1413 sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
1414     __unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
1415 {
1416 }
1417 
1418 #endif /* CONFIG_SCHED_EDGE */
1419 
1420 /*
1421  * Can only be called while tg cannot be destroyed.
1422  * Names can be up to THREAD_GROUP_MAXNAME long and are not necessarily null-terminated.
1423  */
1424 const char*
sched_perfcontrol_thread_group_get_name(void * machine_data)1425 sched_perfcontrol_thread_group_get_name(void *machine_data)
1426 {
1427 	struct thread_group *tg = __container_of(machine_data, struct thread_group, tg_machine_data);
1428 	return thread_group_get_name(tg);
1429 }
1430 
1431 #endif /* CONFIG_THREAD_GROUPS */
1432