xref: /xnu-8796.101.5/osfmk/kern/thread_group.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <kern/kern_types.h>
31 #include <kern/processor.h>
32 #include <kern/thread.h>
33 #include <kern/zalloc.h>
34 #include <kern/task.h>
35 #include <kern/machine.h>
36 #include <kern/coalition.h>
37 #include <sys/errno.h>
38 #include <kern/queue.h>
39 #include <kern/locks.h>
40 #include <kern/thread_group.h>
41 #include <kern/sched_clutch.h>
42 
43 #if CONFIG_THREAD_GROUPS
44 
45 #define TG_MACHINE_DATA_ALIGN_SIZE (16)
46 
47 struct thread_group {
48 	uint64_t                tg_id;
49 	char                    tg_name[THREAD_GROUP_MAXNAME];
50 	struct os_refcnt        tg_refcount;
51 	struct {
52 		uint32_t                tg_flags;
53 		cluster_type_t          tg_recommendation;
54 	};
55 	/* We make the mpsc destroy chain link a separate field here because while
56 	 * refs = 0 and the thread group is enqueued on the daemon queue, CLPC
57 	 * (which does not hold an explicit ref) is still under the assumption that
58 	 * this thread group is alive and may provide recommendation changes/updates
59 	 * to it. As such, we need to make sure that all parts of the thread group
60 	 * structure are valid.
61 	 */
62 	struct mpsc_queue_chain tg_destroy_link;
63 	queue_chain_t           tg_queue_chain;
64 #if CONFIG_SCHED_CLUTCH
65 	struct sched_clutch     tg_sched_clutch;
66 #endif /* CONFIG_SCHED_CLUTCH */
67 	uint8_t                 tg_machine_data[] __attribute__((aligned(TG_MACHINE_DATA_ALIGN_SIZE)));
68 } __attribute__((aligned(8)));
69 
70 static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
71 static uint32_t tg_count;
72 static queue_head_t tg_queue;
73 static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
74 static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
75 static LCK_MTX_DECLARE(tg_flags_update_lock, &tg_lck_grp);
76 
77 static uint64_t tg_next_id = 0;
78 static uint32_t tg_size;
79 static uint32_t tg_machine_data_size;
80 static uint32_t perf_controller_thread_group_immediate_ipi;
81 static struct thread_group *tg_system;
82 static struct thread_group *tg_background;
83 static struct thread_group *tg_vm;
84 static struct thread_group *tg_io_storage;
85 static struct thread_group *tg_perf_controller;
86 int tg_set_by_bankvoucher;
87 
88 static bool thread_group_retain_try(struct thread_group *tg);
89 
90 static struct mpsc_daemon_queue thread_group_deallocate_queue;
91 static void thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,
92     __assert_only mpsc_daemon_queue_t dq);
93 
94 /*
95  * Initialize thread groups at boot
96  */
97 void
thread_group_init(void)98 thread_group_init(void)
99 {
100 	// Get thread group structure extension from EDT or boot-args (which can override EDT)
101 	if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
102 		if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
103 			tg_machine_data_size = 8;
104 		}
105 	}
106 
107 	if (!PE_parse_boot_argn("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
108 		if (!PE_get_default("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
109 			perf_controller_thread_group_immediate_ipi = 0;
110 		}
111 	}
112 
113 	// Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
114 	if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
115 		if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
116 			tg_set_by_bankvoucher = 1;
117 		}
118 	}
119 
120 	tg_size = sizeof(struct thread_group) + tg_machine_data_size;
121 	if (tg_size % TG_MACHINE_DATA_ALIGN_SIZE) {
122 		tg_size += TG_MACHINE_DATA_ALIGN_SIZE - (tg_size % TG_MACHINE_DATA_ALIGN_SIZE);
123 	}
124 	tg_machine_data_size = tg_size - sizeof(struct thread_group);
125 	// printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
126 	assert(offsetof(struct thread_group, tg_machine_data) % TG_MACHINE_DATA_ALIGN_SIZE == 0);
127 	tg_zone = zone_create("thread_groups", tg_size, ZC_ALIGNMENT_REQUIRED);
128 
129 	queue_head_init(tg_queue);
130 	tg_system = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
131 	thread_group_set_name(tg_system, "system");
132 	tg_background = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
133 	thread_group_set_name(tg_background, "background");
134 	lck_mtx_lock(&tg_lock);
135 	tg_next_id++;  // Skip ID 2, which used to be the "adaptive" group. (It was never used.)
136 	lck_mtx_unlock(&tg_lock);
137 	tg_vm = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
138 	thread_group_set_name(tg_vm, "VM");
139 	tg_io_storage = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
140 	thread_group_set_name(tg_io_storage, "io storage");
141 	tg_perf_controller = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
142 	thread_group_set_name(tg_perf_controller, "perf_controller");
143 
144 	/*
145 	 * The thread group deallocation queue must be a thread call based queue
146 	 * because it is woken up from contexts where the thread lock is held. The
147 	 * only way to perform wakeups safely in those contexts is to wakeup a
148 	 * thread call which is guaranteed to be on a different waitq and would
149 	 * not hash onto the same global waitq which might be currently locked.
150 	 */
151 	mpsc_daemon_queue_init_with_thread_call(&thread_group_deallocate_queue,
152 	    thread_group_deallocate_queue_invoke, THREAD_CALL_PRIORITY_KERNEL,
153 	    MPSC_DAEMON_INIT_NONE);
154 }
155 
156 #if CONFIG_SCHED_CLUTCH
157 /*
158  * sched_clutch_for_thread
159  *
160  * The routine provides a back linkage from the thread to the
161  * sched_clutch it belongs to. This relationship is based on the
162  * thread group membership of the thread. Since that membership is
163  * changed from the thread context with the thread lock held, this
164  * linkage should be looked at only with the thread lock held or
165  * when the thread cannot be running (for eg. the thread is in the
166  * runq and being removed as part of thread_select().
167  */
168 sched_clutch_t
sched_clutch_for_thread(thread_t thread)169 sched_clutch_for_thread(thread_t thread)
170 {
171 	assert(thread->thread_group != NULL);
172 	return &(thread->thread_group->tg_sched_clutch);
173 }
174 
175 sched_clutch_t
sched_clutch_for_thread_group(struct thread_group * thread_group)176 sched_clutch_for_thread_group(struct thread_group *thread_group)
177 {
178 	return &(thread_group->tg_sched_clutch);
179 }
180 
181 #endif /* CONFIG_SCHED_CLUTCH */
182 
183 uint64_t
thread_group_id(struct thread_group * tg)184 thread_group_id(struct thread_group *tg)
185 {
186 	return (tg == NULL) ? 0 : tg->tg_id;
187 }
188 
189 #if CONFIG_PREADOPT_TG
190 static inline bool
thread_get_reevaluate_tg_hierarchy_locked(thread_t t)191 thread_get_reevaluate_tg_hierarchy_locked(thread_t t)
192 {
193 	return t->sched_flags & TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
194 }
195 
196 static inline void
thread_set_reevaluate_tg_hierarchy_locked(thread_t t)197 thread_set_reevaluate_tg_hierarchy_locked(thread_t t)
198 {
199 	t->sched_flags |= TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
200 }
201 
202 static inline void
thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)203 thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)
204 {
205 	t->sched_flags &= ~TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
206 }
207 #endif
208 
209 /*
210  * Use a spinlock to protect all thread group flag updates.
211  * The lock should not have heavy contention since these flag updates should
212  * be infrequent. If this lock has contention issues, it should be changed to
213  * a per thread-group lock.
214  *
215  * The lock protects the flags field in the thread_group structure. It is also
216  * held while doing callouts to CLPC to reflect these flag changes.
217  */
218 
219 void
thread_group_flags_update_lock(void)220 thread_group_flags_update_lock(void)
221 {
222 	lck_mtx_lock(&tg_flags_update_lock);
223 }
224 
225 void
thread_group_flags_update_unlock(void)226 thread_group_flags_update_unlock(void)
227 {
228 	lck_mtx_unlock(&tg_flags_update_lock);
229 }
230 
231 /*
232  * Inform platform code about already existing thread groups
233  * or ask it to free state for all thread groups
234  */
235 void
thread_group_resync(boolean_t create)236 thread_group_resync(boolean_t create)
237 {
238 	struct thread_group *tg;
239 
240 	thread_group_flags_update_lock();
241 	lck_mtx_lock(&tg_lock);
242 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
243 		if (create) {
244 			machine_thread_group_init(tg);
245 		} else {
246 			machine_thread_group_deinit(tg);
247 		}
248 	}
249 	lck_mtx_unlock(&tg_lock);
250 	thread_group_flags_update_unlock();
251 }
252 
253 /*
254  * Create new thread group and add new reference to it.
255  */
256 struct thread_group *
thread_group_create_and_retain(uint32_t flags)257 thread_group_create_and_retain(uint32_t flags)
258 {
259 	struct thread_group *tg;
260 
261 	tg = zalloc_flags(tg_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
262 	assert((uintptr_t)tg % TG_MACHINE_DATA_ALIGN_SIZE == 0);
263 
264 	tg->tg_flags = flags;
265 
266 #if CONFIG_SCHED_CLUTCH
267 	/*
268 	 * The clutch scheduler maintains a bunch of runqs per thread group. For
269 	 * each thread group it maintains a sched_clutch structure. The lifetime
270 	 * of that structure is tied directly to the lifetime of the thread group.
271 	 */
272 	sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
273 
274 #endif /* CONFIG_SCHED_CLUTCH */
275 
276 	lck_mtx_lock(&tg_lock);
277 	tg->tg_id = tg_next_id++;
278 	tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
279 	os_ref_init(&tg->tg_refcount, NULL);
280 	tg_count++;
281 	enqueue_tail(&tg_queue, &tg->tg_queue_chain);
282 
283 	// call machine layer init before this thread group becomes visible
284 	machine_thread_group_init(tg);
285 	lck_mtx_unlock(&tg_lock);
286 
287 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), thread_group_id(tg), thread_group_get_flags(tg));
288 	if (flags) {
289 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS), thread_group_id(tg), thread_group_get_flags(tg), 0);
290 	}
291 
292 	return tg;
293 }
294 
295 /*
296  * Point newly created thread to its home thread group
297  */
298 void
thread_group_init_thread(thread_t t,task_t task)299 thread_group_init_thread(thread_t t, task_t task)
300 {
301 	struct thread_group *tg = task_coalition_get_thread_group(task);
302 	t->thread_group = tg;
303 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
304 	    THREAD_GROUP_INVALID, thread_group_id(tg), (uintptr_t)thread_tid(t));
305 }
306 
307 /*
308  * Set thread group name
309  */
310 void
thread_group_set_name(__unused struct thread_group * tg,__unused const char * name)311 thread_group_set_name(__unused struct thread_group *tg, __unused const char *name)
312 {
313 	if (name == NULL) {
314 		return;
315 	}
316 	if (!thread_group_retain_try(tg)) {
317 		return;
318 	}
319 	if (name[0] != '\0') {
320 		strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
321 #if defined(__LP64__)
322 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
323 		    tg->tg_id,
324 		    *(uint64_t*)(void*)&tg->tg_name[0],
325 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
326 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
327 		    );
328 #else /* defined(__LP64__) */
329 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
330 		    tg->tg_id,
331 		    *(uint32_t*)(void*)&tg->tg_name[0],
332 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
333 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
334 		    );
335 #endif /* defined(__LP64__) */
336 	}
337 	thread_group_release(tg);
338 }
339 
340 void
thread_group_set_flags(struct thread_group * tg,uint32_t flags)341 thread_group_set_flags(struct thread_group *tg, uint32_t flags)
342 {
343 	thread_group_flags_update_lock();
344 	thread_group_set_flags_locked(tg, flags);
345 	thread_group_flags_update_unlock();
346 }
347 
348 /*
349  * Return true if flags are valid, false otherwise.
350  * Some flags are mutually exclusive.
351  */
352 boolean_t
thread_group_valid_flags(uint32_t flags)353 thread_group_valid_flags(uint32_t flags)
354 {
355 	const uint32_t sflags = flags & ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
356 	const uint32_t eflags = flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
357 
358 	if ((sflags & THREAD_GROUP_FLAGS_SHARED) != sflags) {
359 		return false;
360 	}
361 
362 	if ((eflags & THREAD_GROUP_FLAGS_EXCLUSIVE) != eflags) {
363 		return false;
364 	}
365 
366 	/* Only one of the exclusive flags may be set. */
367 	if (((eflags - 1) & eflags) != 0) {
368 		return false;
369 	}
370 
371 	return true;
372 }
373 
374 void
thread_group_clear_flags(struct thread_group * tg,uint32_t flags)375 thread_group_clear_flags(struct thread_group *tg, uint32_t flags)
376 {
377 	thread_group_flags_update_lock();
378 	thread_group_clear_flags_locked(tg, flags);
379 	thread_group_flags_update_unlock();
380 }
381 
382 /*
383  * Set thread group flags and perform related actions.
384  * The tg_flags_update_lock should be held.
385  * Currently supported flags are:
386  * Exclusive Flags:
387  * - THREAD_GROUP_FLAGS_EFFICIENT
388  * - THREAD_GROUP_FLAGS_APPLICATION
389  * - THREAD_GROUP_FLAGS_CRITICAL
390  * Shared Flags:
391  * - THREAD_GROUP_FLAGS_UI_APP
392  */
393 
394 void
thread_group_set_flags_locked(struct thread_group * tg,uint32_t flags)395 thread_group_set_flags_locked(struct thread_group *tg, uint32_t flags)
396 {
397 	if (!thread_group_valid_flags(flags)) {
398 		panic("thread_group_set_flags: Invalid flags %u", flags);
399 	}
400 
401 	/* Disallow any exclusive flags from being set after creation, with the
402 	 * exception of moving from default to application */
403 	if ((flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) &&
404 	    !((flags & THREAD_GROUP_FLAGS_APPLICATION) &&
405 	    (tg->tg_flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) ==
406 	    THREAD_GROUP_FLAGS_DEFAULT)) {
407 		flags &= ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
408 	}
409 	if ((tg->tg_flags & flags) == flags) {
410 		return;
411 	}
412 
413 	if (tg == tg_system) {
414 		/*
415 		 * The system TG is used for kernel and launchd. It is also used
416 		 * for processes which are getting spawned and do not have a home
417 		 * TG yet (see task_coalition_get_thread_group()). Make sure the
418 		 * policies for those processes do not update the flags for the
419 		 * system TG. The flags for this thread group should only be set
420 		 * at creation via thread_group_create_and_retain().
421 		 */
422 		return;
423 	}
424 
425 	__kdebug_only uint64_t old_flags = tg->tg_flags;
426 	tg->tg_flags |= flags;
427 
428 	machine_thread_group_flags_update(tg, tg->tg_flags);
429 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
430 	    tg->tg_id, tg->tg_flags, old_flags);
431 }
432 
433 /*
434  * Clear thread group flags and perform related actions
435  * The tg_flags_update_lock should be held.
436  * Currently supported flags are:
437  * Exclusive Flags:
438  * - THREAD_GROUP_FLAGS_EFFICIENT
439  * - THREAD_GROUP_FLAGS_APPLICATION
440  * - THREAD_GROUP_FLAGS_CRITICAL
441  * Shared Flags:
442  * - THREAD_GROUP_FLAGS_UI_APP
443  */
444 
445 void
thread_group_clear_flags_locked(struct thread_group * tg,uint32_t flags)446 thread_group_clear_flags_locked(struct thread_group *tg, uint32_t flags)
447 {
448 	if (!thread_group_valid_flags(flags)) {
449 		panic("thread_group_clear_flags: Invalid flags %u", flags);
450 	}
451 
452 	/* Disallow any exclusive flags from being cleared */
453 	if (flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) {
454 		flags &= ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
455 	}
456 	if ((tg->tg_flags & flags) == 0) {
457 		return;
458 	}
459 
460 	__kdebug_only uint64_t old_flags = tg->tg_flags;
461 	tg->tg_flags &= ~flags;
462 	machine_thread_group_flags_update(tg, tg->tg_flags);
463 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
464 	    tg->tg_id, tg->tg_flags, old_flags);
465 }
466 
467 
468 
469 /*
470  * Find thread group with specified name and put new reference to it.
471  */
472 struct thread_group *
thread_group_find_by_name_and_retain(char * name)473 thread_group_find_by_name_and_retain(char *name)
474 {
475 	struct thread_group *result = NULL;
476 
477 	if (name == NULL) {
478 		return NULL;
479 	}
480 
481 	if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
482 		return thread_group_retain(tg_system);
483 	} else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
484 		return thread_group_retain(tg_background);
485 	} else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
486 		return thread_group_retain(tg_perf_controller);
487 	}
488 
489 	struct thread_group *tg;
490 	lck_mtx_lock(&tg_lock);
491 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
492 		if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
493 		    thread_group_retain_try(tg)) {
494 			result = tg;
495 			break;
496 		}
497 	}
498 	lck_mtx_unlock(&tg_lock);
499 	return result;
500 }
501 
502 /*
503  * Find thread group with specified ID and add new reference to it.
504  */
505 struct thread_group *
thread_group_find_by_id_and_retain(uint64_t id)506 thread_group_find_by_id_and_retain(uint64_t id)
507 {
508 	struct thread_group *tg = NULL;
509 	struct thread_group *result = NULL;
510 
511 	switch (id) {
512 	case THREAD_GROUP_SYSTEM:
513 		result = tg_system;
514 		thread_group_retain(tg_system);
515 		break;
516 	case THREAD_GROUP_BACKGROUND:
517 		result = tg_background;
518 		thread_group_retain(tg_background);
519 		break;
520 	case THREAD_GROUP_VM:
521 		result = tg_vm;
522 		thread_group_retain(tg_vm);
523 		break;
524 	case THREAD_GROUP_IO_STORAGE:
525 		result = tg_io_storage;
526 		thread_group_retain(tg_io_storage);
527 		break;
528 	case THREAD_GROUP_PERF_CONTROLLER:
529 		result = tg_perf_controller;
530 		thread_group_retain(tg_perf_controller);
531 		break;
532 	default:
533 		lck_mtx_lock(&tg_lock);
534 		qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
535 			if (tg->tg_id == id && thread_group_retain_try(tg)) {
536 				result = tg;
537 				break;
538 			}
539 		}
540 		lck_mtx_unlock(&tg_lock);
541 	}
542 	return result;
543 }
544 
545 /*
546  * Add new reference to specified thread group
547  */
548 struct thread_group *
thread_group_retain(struct thread_group * tg)549 thread_group_retain(struct thread_group *tg)
550 {
551 	os_ref_retain(&tg->tg_refcount);
552 	return tg;
553 }
554 
555 /*
556  * Similar to thread_group_retain, but fails for thread groups with a
557  * zero reference count. Returns true if retained successfully.
558  */
559 static bool
thread_group_retain_try(struct thread_group * tg)560 thread_group_retain_try(struct thread_group *tg)
561 {
562 	return os_ref_retain_try(&tg->tg_refcount);
563 }
564 
565 static void
thread_group_deallocate_complete(struct thread_group * tg)566 thread_group_deallocate_complete(struct thread_group *tg)
567 {
568 	lck_mtx_lock(&tg_lock);
569 	tg_count--;
570 	remqueue(&tg->tg_queue_chain);
571 	lck_mtx_unlock(&tg_lock);
572 	static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 3), "thread group name is too short");
573 	static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
574 #if defined(__LP64__)
575 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
576 	    tg->tg_id,
577 	    *(uint64_t*)(void*)&tg->tg_name[0],
578 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
579 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
580 	    );
581 #else /* defined(__LP64__) */
582 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
583 	    tg->tg_id,
584 	    *(uint32_t*)(void*)&tg->tg_name[0],
585 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
586 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
587 	    );
588 #endif /* defined(__LP64__) */
589 	machine_thread_group_deinit(tg);
590 #if CONFIG_SCHED_CLUTCH
591 	sched_clutch_destroy(&(tg->tg_sched_clutch));
592 #endif /* CONFIG_SCHED_CLUTCH */
593 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
594 	zfree(tg_zone, tg);
595 }
596 
597 /*
598  * Drop a reference to specified thread group
599  */
600 void
thread_group_release(struct thread_group * tg)601 thread_group_release(struct thread_group *tg)
602 {
603 	if (os_ref_release(&tg->tg_refcount) == 0) {
604 		thread_group_deallocate_complete(tg);
605 	}
606 }
607 
608 void
thread_group_release_live(struct thread_group * tg)609 thread_group_release_live(struct thread_group *tg)
610 {
611 	os_ref_release_live(&tg->tg_refcount);
612 }
613 
614 static void
thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)615 thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e, __assert_only mpsc_daemon_queue_t dq)
616 {
617 	assert(dq == &thread_group_deallocate_queue);
618 	struct thread_group *tg = mpsc_queue_element(e, struct thread_group, tg_destroy_link);
619 
620 	thread_group_deallocate_complete(tg);
621 }
622 
623 void
thread_group_deallocate_safe(struct thread_group * tg)624 thread_group_deallocate_safe(struct thread_group *tg)
625 {
626 	if (os_ref_release(&tg->tg_refcount) == 0) {
627 		mpsc_daemon_enqueue(&thread_group_deallocate_queue, &tg->tg_destroy_link,
628 		    MPSC_QUEUE_NONE);
629 	}
630 }
631 
632 /*
633  * Get thread's current thread group
634  */
635 inline struct thread_group *
thread_group_get(thread_t t)636 thread_group_get(thread_t t)
637 {
638 	return t->thread_group;
639 }
640 
641 struct thread_group *
thread_group_get_home_group(thread_t t)642 thread_group_get_home_group(thread_t t)
643 {
644 	return task_coalition_get_thread_group(get_threadtask(t));
645 }
646 
647 /*
648  * The thread group is resolved according to a hierarchy:
649  *
650  * 1) work interval specified group (explicit API)
651  * 2) Auto-join thread group (wakeup tracking for special work intervals)
652  * 3) bank voucher carried group (implicitly set)
653  * 4) Preadopt thread group (if any)
654  * 5) coalition default thread group (ambient)
655  *
656  * Returns true if the thread's thread group needs to be changed and resolving
657  * TG is passed through in-out param. See also
658  * thread_mark_thread_group_hierarchy_resolved and
659  * thread_set_resolved_thread_group
660  *
661  * Caller should have thread lock. Interrupts are disabled. Thread doesn't have
662  * to be self
663  */
664 static bool
thread_compute_resolved_thread_group(thread_t t,struct thread_group ** resolved_tg)665 thread_compute_resolved_thread_group(thread_t t, struct thread_group **resolved_tg)
666 {
667 	struct thread_group *cur_tg, *tg;
668 	cur_tg = t->thread_group;
669 
670 	tg = thread_group_get_home_group(t);
671 
672 #if CONFIG_PREADOPT_TG
673 	if (t->preadopt_thread_group) {
674 		tg = t->preadopt_thread_group;
675 	}
676 #endif
677 	if (t->bank_thread_group) {
678 		tg = t->bank_thread_group;
679 	}
680 
681 	if (t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN) {
682 		if (t->auto_join_thread_group) {
683 			tg = t->auto_join_thread_group;
684 		}
685 	} else {
686 		if (t->work_interval_thread_group) {
687 			tg = t->work_interval_thread_group;
688 		}
689 	}
690 
691 	*resolved_tg = tg;
692 	return tg != cur_tg;
693 }
694 
695 #if CONFIG_PREADOPT_TG
696 
697 /*
698  * This function is always called after the hierarchy has been resolved. The
699  * caller holds the thread lock
700  */
701 static inline void
thread_assert_has_valid_thread_group(thread_t t)702 thread_assert_has_valid_thread_group(thread_t t)
703 {
704 	__assert_only struct thread_group *home_tg = thread_group_get_home_group(t);
705 
706 	assert(thread_get_reevaluate_tg_hierarchy_locked(t) == false);
707 
708 	__assert_only struct thread_group *resolved_tg;
709 	assert(thread_compute_resolved_thread_group(t, &resolved_tg) == false);
710 
711 	assert((t->thread_group == home_tg) ||
712 	    (t->thread_group == t->preadopt_thread_group) ||
713 	    (t->thread_group == t->bank_thread_group) ||
714 	    (t->thread_group == t->auto_join_thread_group) ||
715 	    (t->thread_group == t->work_interval_thread_group));
716 }
717 #endif
718 
719 /*
720  * This function is called when the thread group hierarchy on the thread_t is
721  * resolved and t->thread_group is the result of the hierarchy resolution. Once
722  * this has happened, there is state that needs to be cleared up which is
723  * handled by this function.
724  *
725  * Prior to this call, we should have either
726  * a) Resolved the hierarchy and discovered no change needed
727  * b) Resolved the hierarchy and modified the t->thread_group
728  */
729 static void
thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)730 thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)
731 {
732 #if CONFIG_PREADOPT_TG
733 	/*
734 	 * We have just reevaluated the thread's hierarchy so we don't need to do it
735 	 * again later.
736 	 */
737 	thread_clear_reevaluate_tg_hierarchy_locked(t);
738 
739 	/*
740 	 * Clear the old_preadopt_thread_group field whose sole purpose was to make
741 	 * sure that t->thread_group didn't have a dangling pointer.
742 	 */
743 	thread_assert_has_valid_thread_group(t);
744 
745 	if (t->old_preadopt_thread_group) {
746 		thread_group_deallocate_safe(t->old_preadopt_thread_group);
747 		t->old_preadopt_thread_group = NULL;
748 	}
749 #endif
750 }
751 
752 /*
753  * Called with thread lock held, always called on self.  This function simply
754  * moves the thread to the right clutch scheduler bucket and informs CLPC of the
755  * change
756  */
757 static void
thread_notify_thread_group_change_self(thread_t t,struct thread_group * __unused old_tg,struct thread_group * __unused new_tg)758 thread_notify_thread_group_change_self(thread_t t, struct thread_group * __unused old_tg,
759     struct thread_group * __unused new_tg)
760 {
761 	assert(current_thread() == t);
762 	assert(old_tg != new_tg);
763 	assert(t->thread_group == new_tg);
764 
765 	uint64_t ctime = mach_approximate_time();
766 	uint64_t arg1, arg2;
767 	machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
768 	machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
769 }
770 
771 /*
772  * Called on any thread with thread lock. Updates the thread_group field on the
773  * thread with the resolved thread group and always make necessary clutch
774  * scheduler callouts. If the thread group is being modified on self,
775  * then also make necessary CLPC callouts.
776  */
777 static void
thread_set_resolved_thread_group(thread_t t,struct thread_group * old_tg,struct thread_group * resolved_tg,bool on_self)778 thread_set_resolved_thread_group(thread_t t, struct thread_group *old_tg,
779     struct thread_group *resolved_tg, bool on_self)
780 {
781 	t->thread_group = resolved_tg;
782 
783 	/* Thread is either running already or is runnable but not on a runqueue */
784 	assert((t->state & (TH_RUN | TH_IDLE)) == TH_RUN);
785 	assert(t->runq == PROCESSOR_NULL);
786 
787 	struct thread_group *home_tg = thread_group_get_home_group(t);
788 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
789 	    thread_group_id(old_tg), thread_group_id(resolved_tg),
790 	    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
791 
792 #if CONFIG_PREADOPT_TG
793 	if (resolved_tg == t->preadopt_thread_group) {
794 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
795 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
796 		    thread_tid(t), thread_group_id(home_tg));
797 	}
798 #endif
799 
800 #if CONFIG_SCHED_CLUTCH
801 	sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
802 	sched_clutch_t new_clutch = (resolved_tg) ? &(resolved_tg->tg_sched_clutch) : NULL;
803 	if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
804 		sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
805 	}
806 #endif
807 
808 	if (on_self) {
809 		assert(t == current_thread());
810 		thread_notify_thread_group_change_self(t, old_tg, resolved_tg);
811 	}
812 
813 	thread_mark_thread_group_hierarchy_resolved(t);
814 }
815 
816 /* Caller has thread lock. Always called on self */
817 static void
thread_resolve_thread_group_hierarchy_self_locked(thread_t t,__unused bool clear_preadopt)818 thread_resolve_thread_group_hierarchy_self_locked(thread_t t, __unused bool clear_preadopt)
819 {
820 	assert(current_thread() == t);
821 
822 #if CONFIG_PREADOPT_TG
823 	struct thread_group *preadopt_tg = NULL;
824 	if (clear_preadopt) {
825 		if (t->preadopt_thread_group) {
826 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
827 			    (uintptr_t)thread_tid(t), thread_group_id(t->preadopt_thread_group), 0, 0);
828 
829 			preadopt_tg = t->preadopt_thread_group;
830 			t->preadopt_thread_group = NULL;
831 		}
832 	}
833 #endif
834 
835 	struct thread_group *resolved_tg = NULL;
836 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
837 
838 	if (needs_change) {
839 		struct thread_group *old_tg = t->thread_group;
840 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
841 	}
842 
843 	/*
844 	 * Regardless of whether we modified the t->thread_group above or not, the
845 	 * hierarchy is now resolved
846 	 */
847 	thread_mark_thread_group_hierarchy_resolved(t);
848 
849 #if CONFIG_PREADOPT_TG
850 	if (preadopt_tg) {
851 		thread_group_deallocate_safe(preadopt_tg);
852 	}
853 #endif
854 }
855 
856 /*
857  * Caller has thread lock, never called on self, always called on a thread not
858  * on a runqueue. This is called from sched_prim.c. Counter part for calling on
859  * self is thread_resolve_thread_group_hierarchy_self
860  */
861 #if CONFIG_PREADOPT_TG
862 void
thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)863 thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)
864 {
865 	assert(t != current_thread());
866 	assert(t->runq == NULL);
867 
868 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
869 		struct thread_group *resolved_tg = NULL;
870 
871 		bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
872 		if (needs_change) {
873 			struct thread_group *old_tg = t->thread_group;
874 			thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
875 		}
876 
877 		/*
878 		 * Regardless of whether we modified the t->thread_group above or not,
879 		 * the hierarchy is now resolved
880 		 */
881 		thread_mark_thread_group_hierarchy_resolved(t);
882 	}
883 }
884 #endif
885 
886 #if CONFIG_PREADOPT_TG
887 /*
888  * The thread being passed can be the current thread and it can also be another
889  * thread which is running on another core. This function is called with spin
890  * locks held (kq and wq lock) but the thread lock is not held by caller.
891  *
892  * The thread always takes a +1 on the thread group and will release the
893  * previous preadoption thread group's reference or stash it.
894  */
895 void
thread_set_preadopt_thread_group(thread_t t,struct thread_group * tg)896 thread_set_preadopt_thread_group(thread_t t, struct thread_group *tg)
897 {
898 	spl_t s = splsched();
899 	thread_lock(t);
900 
901 	/*
902 	 * Assert that this is never called on WindowServer when it has already
903 	 * issued a block callout to CLPC.
904 	 *
905 	 * This should never happen because we don't ever call
906 	 * thread_set_preadopt_thread_group on a servicer after going out to
907 	 * userspace unless we are doing so to/after an unbind
908 	 */
909 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
910 
911 	struct thread_group *old_tg = t->thread_group;
912 	struct thread_group *home_tg = thread_group_get_home_group(t);
913 
914 	/*
915 	 * Since the preadoption thread group can disappear from under you, we need
916 	 * to make sure that the thread_group pointer is always pointing to valid
917 	 * memory.
918 	 *
919 	 * We run the risk of the thread group pointer pointing to dangling memory
920 	 * when the following happens:
921 	 *
922 	 * a) We update the preadopt_thread_group
923 	 * b) We resolve hierarchy and need to change the resolved_thread_group
924 	 * c) For some reason, we are not able to do so and we need to set the
925 	 * resolved thread group later.
926 	 */
927 
928 	/* take the ref from the thread */
929 	struct thread_group *old_preadopt_tg = t->preadopt_thread_group;
930 
931 	if (tg == NULL) {
932 		t->preadopt_thread_group = NULL;
933 		if (old_preadopt_tg != NULL) {
934 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
935 			    thread_tid(t), thread_group_id(old_preadopt_tg), 0, 0);
936 		}
937 	} else {
938 		t->preadopt_thread_group = thread_group_retain(tg);
939 	}
940 
941 	struct thread_group *resolved_tg = NULL;
942 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
943 	if (!needs_change) {
944 		/*
945 		 * Setting preadoption thread group didn't change anything, simply mark
946 		 * the hierarchy as resolved and exit.
947 		 */
948 		thread_mark_thread_group_hierarchy_resolved(t);
949 		goto out;
950 	}
951 
952 	if (t != current_thread()) {
953 		/*
954 		 * We're modifying the thread group of another thread, we need to take
955 		 * action according to the state of the other thread.
956 		 *
957 		 * If the thread is runnable and not yet running, try removing it from
958 		 * the runq, modify it's TG and then reinsert it for reevaluation. If it
959 		 * isn't runnable (already running or started running concurrently, or
960 		 * if it is waiting), then mark a bit having the thread reevaluate its
961 		 * own hierarchy the next time it is being inserted into a runq
962 		 */
963 		if ((t->state & TH_RUN) && (t->runq != PROCESSOR_NULL)) {
964 			/* Thread is runnable but not running */
965 
966 			bool removed_from_runq = thread_run_queue_remove(t);
967 			if (removed_from_runq) {
968 				thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
969 
970 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
971 				    thread_group_id(old_tg), thread_group_id(tg),
972 				    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
973 
974 				thread_run_queue_reinsert(t, SCHED_TAILQ);
975 			} else {
976 				/*
977 				 * We failed to remove it from the runq - it probably started
978 				 * running, let the thread reevaluate the next time it gets
979 				 * enqueued on a runq
980 				 */
981 				thread_set_reevaluate_tg_hierarchy_locked(t);
982 
983 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
984 				    thread_group_id(old_tg), thread_group_id(tg),
985 				    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
986 			}
987 		} else {
988 			/*
989 			 * The thread is not runnable or it is running already - let the
990 			 * thread reevaluate the next time it gets enqueued on a runq
991 			 */
992 			thread_set_reevaluate_tg_hierarchy_locked(t);
993 
994 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
995 			    thread_group_id(old_tg), thread_group_id(tg),
996 			    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
997 		}
998 	} else {
999 		/* We're modifying thread group on ourselves */
1000 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
1001 
1002 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
1003 		    thread_group_id(old_tg), thread_group_id(tg),
1004 		    thread_tid(t), thread_group_id(home_tg));
1005 	}
1006 
1007 out:
1008 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
1009 		assert(t->thread_group == old_tg);
1010 		/*
1011 		 * We need to reevaluate TG hierarchy later as a result of this
1012 		 * `thread_set_preadopt_thread_group` operation. This means that the
1013 		 * thread group on the thread was pointing to either the home thread
1014 		 * group, the preadoption thread group we just replaced, or the old
1015 		 * preadoption thread group stashed on the thread.
1016 		 */
1017 		assert(t->thread_group == home_tg ||
1018 		    t->thread_group == old_preadopt_tg ||
1019 		    t->old_preadopt_thread_group);
1020 
1021 		if (t->thread_group == old_preadopt_tg) {
1022 			/*
1023 			 * t->thread_group is pointing to the preadopt thread group we just
1024 			 * replaced. This means the hierarchy was resolved before this call.
1025 			 * Assert that there was no old_preadopt_thread_group on the thread.
1026 			 */
1027 			assert(t->old_preadopt_thread_group == NULL);
1028 			/*
1029 			 * Since t->thread_group is still pointing to the old preadopt thread
1030 			 * group - we need to keep it alive until we reevaluate the hierarchy
1031 			 * next
1032 			 */
1033 			t->old_preadopt_thread_group = old_tg; // transfer ref back to thread
1034 		} else if (old_preadopt_tg != NULL) {
1035 			thread_group_deallocate_safe(old_preadopt_tg);
1036 		}
1037 	} else {
1038 		/* We resolved the hierarchy just now */
1039 		thread_assert_has_valid_thread_group(t);
1040 
1041 		/*
1042 		 * We don't need the old preadopt thread group that we stashed in our
1043 		 * local variable, drop it.
1044 		 */
1045 		if (old_preadopt_tg) {
1046 			thread_group_deallocate_safe(old_preadopt_tg);
1047 		}
1048 	}
1049 	thread_unlock(t);
1050 	splx(s);
1051 	return;
1052 }
1053 
1054 #endif
1055 
1056 /*
1057  * thread_set_thread_group()
1058  *
1059  * Caller must guarantee lifetime of the thread group for the life of the call -
1060  * this overrides the thread group without going through the hierarchy
1061  * resolution. This is for special thread groups like the VM and IO thread
1062  * groups only.
1063  */
1064 static void
thread_set_thread_group(thread_t t,struct thread_group * tg)1065 thread_set_thread_group(thread_t t, struct thread_group *tg)
1066 {
1067 	struct thread_group *home_tg = thread_group_get_home_group(t);
1068 	struct thread_group *old_tg = NULL;
1069 
1070 	spl_t s = splsched();
1071 	old_tg = t->thread_group;
1072 
1073 	if (old_tg != tg) {
1074 		thread_lock(t);
1075 
1076 		assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1077 		t->thread_group = tg;
1078 
1079 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1080 		    thread_group_id(old_tg), thread_group_id(tg),
1081 		    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1082 
1083 		thread_notify_thread_group_change_self(t, old_tg, tg);
1084 
1085 		thread_unlock(t);
1086 	}
1087 
1088 	splx(s);
1089 }
1090 
1091 /* Called without the thread lock held, called on current thread */
1092 void
thread_group_set_bank(thread_t t,struct thread_group * tg)1093 thread_group_set_bank(thread_t t, struct thread_group *tg)
1094 {
1095 	assert(current_thread() == t);
1096 	/* boot arg disables groups in bank */
1097 	if (tg_set_by_bankvoucher == FALSE) {
1098 		return;
1099 	}
1100 
1101 	spl_t s = splsched();
1102 	thread_lock(t);
1103 
1104 	/* This is a borrowed reference from the current bank voucher */
1105 	t->bank_thread_group = tg;
1106 
1107 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1108 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1109 
1110 	thread_unlock(t);
1111 	splx(s);
1112 }
1113 
1114 #if CONFIG_SCHED_AUTO_JOIN
1115 /*
1116  * thread_group_set_autojoin_thread_group_locked()
1117  *
1118  * Sets the thread group of a thread based on auto-join rules and reevaluates
1119  * the hierarchy.
1120  *
1121  * Preconditions:
1122  * - Thread must not be part of a runq (freshly made runnable threads or terminating only)
1123  * - Thread must be locked by the caller already
1124  */
1125 void
thread_set_autojoin_thread_group_locked(thread_t t,struct thread_group * tg)1126 thread_set_autojoin_thread_group_locked(thread_t t, struct thread_group *tg)
1127 {
1128 	assert(t->runq == PROCESSOR_NULL);
1129 
1130 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1131 	t->auto_join_thread_group = tg;
1132 
1133 	struct thread_group *resolved_tg = NULL;
1134 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
1135 
1136 	if (needs_change) {
1137 		struct thread_group *old_tg = t->thread_group;
1138 		struct thread_group *home_tg = thread_group_get_home_group(t);
1139 
1140 		t->thread_group = resolved_tg;
1141 
1142 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1143 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
1144 		    thread_tid(t), thread_group_id(home_tg));
1145 		/*
1146 		 * If the thread group is being changed for the current thread, callout
1147 		 * to CLPC to update the thread's information at that layer. This makes
1148 		 * sure CLPC has consistent state when the current thread is going
1149 		 * off-core.
1150 		 *
1151 		 * Note that we are passing in the PERFCONTROL_CALLOUT_WAKE_UNSAFE flag
1152 		 * to CLPC here (as opposed to 0 in thread_notify_thread_group_change_self)
1153 		 */
1154 		if (t == current_thread()) {
1155 			uint64_t ctime = mach_approximate_time();
1156 			uint64_t arg1, arg2;
1157 			machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
1158 			machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
1159 		}
1160 	}
1161 
1162 	thread_mark_thread_group_hierarchy_resolved(t);
1163 }
1164 #endif
1165 
1166 /* Thread is not locked. Thread is self */
1167 void
thread_set_work_interval_thread_group(thread_t t,struct thread_group * tg)1168 thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg)
1169 {
1170 	assert(current_thread() == t);
1171 	assert(!(t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN));
1172 
1173 	/*
1174 	 * We have a work interval, we don't need the preadoption thread group
1175 	 * anymore (ie, it shouldn't be available for us to jump back to it after
1176 	 * the thread leaves the work interval)
1177 	 */
1178 	spl_t s = splsched();
1179 	thread_lock(t);
1180 
1181 	t->work_interval_thread_group = tg;
1182 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1183 
1184 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1185 
1186 	thread_unlock(t);
1187 	splx(s);
1188 }
1189 
1190 inline cluster_type_t
thread_group_recommendation(struct thread_group * tg)1191 thread_group_recommendation(struct thread_group *tg)
1192 {
1193 	if (tg == NULL) {
1194 		return CLUSTER_TYPE_SMP;
1195 	} else {
1196 		return tg->tg_recommendation;
1197 	}
1198 }
1199 
1200 inline uint64_t
thread_group_get_id(struct thread_group * tg)1201 thread_group_get_id(struct thread_group *tg)
1202 {
1203 	return tg->tg_id;
1204 }
1205 
1206 uint32_t
thread_group_count(void)1207 thread_group_count(void)
1208 {
1209 	return tg_count;
1210 }
1211 
1212 /*
1213  * Can only be called while tg cannot be destroyed
1214  */
1215 inline const char*
thread_group_get_name(struct thread_group * tg)1216 thread_group_get_name(struct thread_group *tg)
1217 {
1218 	return tg->tg_name;
1219 }
1220 
1221 inline void *
thread_group_get_machine_data(struct thread_group * tg)1222 thread_group_get_machine_data(struct thread_group *tg)
1223 {
1224 	return &tg->tg_machine_data;
1225 }
1226 
1227 inline uint32_t
thread_group_machine_data_size(void)1228 thread_group_machine_data_size(void)
1229 {
1230 	return tg_machine_data_size;
1231 }
1232 
1233 inline boolean_t
thread_group_uses_immediate_ipi(struct thread_group * tg)1234 thread_group_uses_immediate_ipi(struct thread_group *tg)
1235 {
1236 	return thread_group_get_id(tg) == THREAD_GROUP_PERF_CONTROLLER && perf_controller_thread_group_immediate_ipi != 0;
1237 }
1238 
1239 kern_return_t
thread_group_iterate_stackshot(thread_group_iterate_fn_t callout,void * arg)1240 thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
1241 {
1242 	struct thread_group *tg;
1243 	int i = 0;
1244 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
1245 		if (tg == NULL || !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
1246 			return KERN_FAILURE;
1247 		}
1248 		callout(arg, i, tg);
1249 		i++;
1250 	}
1251 	return KERN_SUCCESS;
1252 }
1253 
1254 void
thread_group_join_io_storage(void)1255 thread_group_join_io_storage(void)
1256 {
1257 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
1258 	assert(tg != NULL);
1259 	thread_set_thread_group(current_thread(), tg);
1260 }
1261 
1262 void
thread_group_join_perf_controller(void)1263 thread_group_join_perf_controller(void)
1264 {
1265 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
1266 	assert(tg != NULL);
1267 	thread_set_thread_group(current_thread(), tg);
1268 }
1269 
1270 void
thread_group_vm_add(void)1271 thread_group_vm_add(void)
1272 {
1273 	assert(tg_vm != NULL);
1274 	thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM));
1275 }
1276 
1277 uint32_t
thread_group_get_flags(struct thread_group * tg)1278 thread_group_get_flags(struct thread_group *tg)
1279 {
1280 	return tg->tg_flags;
1281 }
1282 
1283 void
thread_group_update_recommendation(struct thread_group * tg,cluster_type_t new_recommendation)1284 thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
1285 {
1286 	/*
1287 	 * Since the tg->tg_recommendation field is read by CPUs trying to determine
1288 	 * where a thread/thread group needs to be placed, it is important to use
1289 	 * atomic operations to update the recommendation.
1290 	 */
1291 	os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
1292 }
1293 
1294 #if CONFIG_SCHED_EDGE
1295 
1296 int sched_edge_restrict_ut = 1;
1297 int sched_edge_restrict_bg = 1;
1298 
1299 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1300 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1301 {
1302 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1303 	/*
1304 	 * CLUSTER_TYPE_SMP was used for some debugging support when CLPC dynamic control was turned off.
1305 	 * In more recent implementations, CLPC simply recommends "P-spill" when dynamic control is turned off. So it should
1306 	 * never be recommending CLUSTER_TYPE_SMP for thread groups.
1307 	 */
1308 	assert(new_recommendation != CLUSTER_TYPE_SMP);
1309 	/*
1310 	 * The Edge scheduler expects preferred cluster recommendations for each QoS level within a TG. Until the new CLPC
1311 	 * routine is being called, fake out the call from the old CLPC interface.
1312 	 */
1313 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
1314 	/*
1315 	 * For all buckets higher than UT, apply the recommendation to the thread group bucket
1316 	 */
1317 	for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SHARE_UT; bucket++) {
1318 		tg_bucket_preferred_cluster[bucket] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1319 	}
1320 	/* For UT & BG QoS, set the recommendation only if they havent been restricted via sysctls */
1321 	if (!sched_edge_restrict_ut) {
1322 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_UT] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1323 	}
1324 	if (!sched_edge_restrict_bg) {
1325 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_BG] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1326 	}
1327 	sched_perfcontrol_preferred_cluster_options_t options = 0;
1328 	if (new_recommendation == CLUSTER_TYPE_P) {
1329 		options |= SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING;
1330 	}
1331 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1332 }
1333 
1334 void
sched_perfcontrol_edge_matrix_get(sched_clutch_edge * edge_matrix,bool * edge_request_bitmap,uint64_t flags,uint64_t matrix_order)1335 sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t matrix_order)
1336 {
1337 	sched_edge_matrix_get(edge_matrix, edge_request_bitmap, flags, matrix_order);
1338 }
1339 
1340 void
sched_perfcontrol_edge_matrix_set(sched_clutch_edge * edge_matrix,bool * edge_changes_bitmap,uint64_t flags,uint64_t matrix_order)1341 sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t matrix_order)
1342 {
1343 	sched_edge_matrix_set(edge_matrix, edge_changes_bitmap, flags, matrix_order);
1344 }
1345 
1346 void
sched_perfcontrol_thread_group_preferred_clusters_set(void * machine_data,uint32_t tg_preferred_cluster,uint32_t overrides[PERFCONTROL_CLASS_MAX],sched_perfcontrol_preferred_cluster_options_t options)1347 sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
1348     uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
1349 {
1350 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1351 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {
1352 		[TH_BUCKET_FIXPRI]   = (overrides[PERFCONTROL_CLASS_ABOVEUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_ABOVEUI] : tg_preferred_cluster,
1353 		[TH_BUCKET_SHARE_FG] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1354 		[TH_BUCKET_SHARE_IN] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1355 		[TH_BUCKET_SHARE_DF] = (overrides[PERFCONTROL_CLASS_NONUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_NONUI] : tg_preferred_cluster,
1356 		[TH_BUCKET_SHARE_UT] = (overrides[PERFCONTROL_CLASS_UTILITY] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UTILITY] : tg_preferred_cluster,
1357 		[TH_BUCKET_SHARE_BG] = (overrides[PERFCONTROL_CLASS_BACKGROUND] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_BACKGROUND] : tg_preferred_cluster,
1358 	};
1359 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1360 }
1361 
1362 #else /* CONFIG_SCHED_EDGE */
1363 
1364 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1365 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1366 {
1367 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1368 	SCHED(thread_group_recommendation_change)(tg, new_recommendation);
1369 }
1370 
1371 void
sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_request_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1372 sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1373 {
1374 }
1375 
1376 void
sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_changes_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1377 sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1378 {
1379 }
1380 
1381 void
sched_perfcontrol_thread_group_preferred_clusters_set(__unused void * machine_data,__unused uint32_t tg_preferred_cluster,__unused uint32_t overrides[PERFCONTROL_CLASS_MAX],__unused sched_perfcontrol_preferred_cluster_options_t options)1382 sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
1383     __unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
1384 {
1385 }
1386 
1387 #endif /* CONFIG_SCHED_EDGE */
1388 
1389 /*
1390  * Can only be called while tg cannot be destroyed.
1391  * Names can be up to THREAD_GROUP_MAXNAME long and are not necessarily null-terminated.
1392  */
1393 const char*
sched_perfcontrol_thread_group_get_name(void * machine_data)1394 sched_perfcontrol_thread_group_get_name(void *machine_data)
1395 {
1396 	struct thread_group *tg = __container_of(machine_data, struct thread_group, tg_machine_data);
1397 	return thread_group_get_name(tg);
1398 }
1399 
1400 #endif /* CONFIG_THREAD_GROUPS */
1401