xref: /xnu-10063.121.3/osfmk/kern/thread_group.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <kern/kern_types.h>
31 #include <kern/processor.h>
32 #include <kern/thread.h>
33 #include <kern/zalloc.h>
34 #include <kern/task.h>
35 #include <kern/machine.h>
36 #include <kern/coalition.h>
37 #include <sys/errno.h>
38 #include <kern/queue.h>
39 #include <kern/locks.h>
40 #include <kern/thread_group.h>
41 #include <kern/sched_clutch.h>
42 
43 #if CONFIG_THREAD_GROUPS
44 
45 #define TG_MACHINE_DATA_ALIGN_SIZE (16)
46 
47 struct thread_group {
48 	uint64_t                tg_id;
49 	char                    tg_name[THREAD_GROUP_MAXNAME];
50 	struct os_refcnt        tg_refcount;
51 	struct {
52 		uint32_t                tg_flags;
53 		cluster_type_t          tg_recommendation;
54 	};
55 	/* We make the mpsc destroy chain link a separate field here because while
56 	 * refs = 0 and the thread group is enqueued on the daemon queue, CLPC
57 	 * (which does not hold an explicit ref) is still under the assumption that
58 	 * this thread group is alive and may provide recommendation changes/updates
59 	 * to it. As such, we need to make sure that all parts of the thread group
60 	 * structure are valid.
61 	 */
62 	struct mpsc_queue_chain tg_destroy_link;
63 	queue_chain_t           tg_queue_chain;
64 #if CONFIG_SCHED_CLUTCH
65 	struct sched_clutch     tg_sched_clutch;
66 #endif /* CONFIG_SCHED_CLUTCH */
67 	uint8_t                 tg_machine_data[] __attribute__((aligned(TG_MACHINE_DATA_ALIGN_SIZE)));
68 } __attribute__((aligned(8)));
69 
70 static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
71 static uint32_t tg_count;
72 static queue_head_t tg_queue;
73 static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
74 static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
75 static LCK_MTX_DECLARE(tg_flags_update_lock, &tg_lck_grp);
76 
77 static uint64_t tg_next_id = 0;
78 static uint32_t tg_size;
79 static uint32_t tg_machine_data_size;
80 static uint32_t perf_controller_thread_group_immediate_ipi;
81 static struct thread_group *tg_system;
82 static struct thread_group *tg_background;
83 static struct thread_group *tg_vm;
84 static struct thread_group *tg_io_storage;
85 static struct thread_group *tg_perf_controller;
86 int tg_set_by_bankvoucher;
87 
88 static bool thread_group_retain_try(struct thread_group *tg);
89 
90 static struct mpsc_daemon_queue thread_group_deallocate_queue;
91 static void thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,
92     __assert_only mpsc_daemon_queue_t dq);
93 
94 /*
95  * Initialize thread groups at boot
96  */
97 void
thread_group_init(void)98 thread_group_init(void)
99 {
100 	// Get thread group structure extension from EDT or boot-args (which can override EDT)
101 	if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
102 		if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
103 			tg_machine_data_size = 8;
104 		}
105 	}
106 
107 	if (!PE_parse_boot_argn("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
108 		if (!PE_get_default("kern.perf_tg_no_dipi", &perf_controller_thread_group_immediate_ipi, sizeof(perf_controller_thread_group_immediate_ipi))) {
109 			perf_controller_thread_group_immediate_ipi = 0;
110 		}
111 	}
112 
113 	// Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
114 	if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
115 		if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
116 			tg_set_by_bankvoucher = 1;
117 		}
118 	}
119 
120 	tg_size = sizeof(struct thread_group) + tg_machine_data_size;
121 	if (tg_size % TG_MACHINE_DATA_ALIGN_SIZE) {
122 		tg_size += TG_MACHINE_DATA_ALIGN_SIZE - (tg_size % TG_MACHINE_DATA_ALIGN_SIZE);
123 	}
124 	tg_machine_data_size = tg_size - sizeof(struct thread_group);
125 	// printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
126 	assert(offsetof(struct thread_group, tg_machine_data) % TG_MACHINE_DATA_ALIGN_SIZE == 0);
127 	tg_zone = zone_create("thread_groups", tg_size, ZC_ALIGNMENT_REQUIRED);
128 
129 	queue_head_init(tg_queue);
130 	tg_system = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
131 	thread_group_set_name(tg_system, "system");
132 	tg_background = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
133 	thread_group_set_name(tg_background, "background");
134 	lck_mtx_lock(&tg_lock);
135 	tg_next_id++;  // Skip ID 2, which used to be the "adaptive" group. (It was never used.)
136 	lck_mtx_unlock(&tg_lock);
137 	tg_vm = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
138 	thread_group_set_name(tg_vm, "VM");
139 	tg_io_storage = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
140 	thread_group_set_name(tg_io_storage, "io storage");
141 	tg_perf_controller = thread_group_create_and_retain(THREAD_GROUP_FLAGS_DEFAULT);
142 	thread_group_set_name(tg_perf_controller, "perf_controller");
143 
144 	/*
145 	 * The thread group deallocation queue must be a thread call based queue
146 	 * because it is woken up from contexts where the thread lock is held. The
147 	 * only way to perform wakeups safely in those contexts is to wakeup a
148 	 * thread call which is guaranteed to be on a different waitq and would
149 	 * not hash onto the same global waitq which might be currently locked.
150 	 */
151 	mpsc_daemon_queue_init_with_thread_call(&thread_group_deallocate_queue,
152 	    thread_group_deallocate_queue_invoke, THREAD_CALL_PRIORITY_KERNEL,
153 	    MPSC_DAEMON_INIT_NONE);
154 }
155 
156 #if CONFIG_SCHED_CLUTCH
157 /*
158  * sched_clutch_for_thread
159  *
160  * The routine provides a back linkage from the thread to the
161  * sched_clutch it belongs to. This relationship is based on the
162  * thread group membership of the thread. Since that membership is
163  * changed from the thread context with the thread lock held, this
164  * linkage should be looked at only with the thread lock held or
165  * when the thread cannot be running (for eg. the thread is in the
166  * runq and being removed as part of thread_select().
167  */
168 sched_clutch_t
sched_clutch_for_thread(thread_t thread)169 sched_clutch_for_thread(thread_t thread)
170 {
171 	assert(thread->thread_group != NULL);
172 	return &(thread->thread_group->tg_sched_clutch);
173 }
174 
175 sched_clutch_t
sched_clutch_for_thread_group(struct thread_group * thread_group)176 sched_clutch_for_thread_group(struct thread_group *thread_group)
177 {
178 	return &(thread_group->tg_sched_clutch);
179 }
180 
181 #endif /* CONFIG_SCHED_CLUTCH */
182 
183 uint64_t
thread_group_id(struct thread_group * tg)184 thread_group_id(struct thread_group *tg)
185 {
186 	return (tg == NULL) ? 0 : tg->tg_id;
187 }
188 
189 #if CONFIG_PREADOPT_TG
190 static inline bool
thread_get_reevaluate_tg_hierarchy_locked(thread_t t)191 thread_get_reevaluate_tg_hierarchy_locked(thread_t t)
192 {
193 	return t->sched_flags & TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
194 }
195 
196 static inline void
thread_set_reevaluate_tg_hierarchy_locked(thread_t t)197 thread_set_reevaluate_tg_hierarchy_locked(thread_t t)
198 {
199 	t->sched_flags |= TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
200 }
201 
202 static inline void
thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)203 thread_clear_reevaluate_tg_hierarchy_locked(thread_t t)
204 {
205 	t->sched_flags &= ~TH_SFLAG_REEVALUTE_TG_HIERARCHY_LATER;
206 }
207 #endif
208 
209 /*
210  * Use a spinlock to protect all thread group flag updates.
211  * The lock should not have heavy contention since these flag updates should
212  * be infrequent. If this lock has contention issues, it should be changed to
213  * a per thread-group lock.
214  *
215  * The lock protects the flags field in the thread_group structure. It is also
216  * held while doing callouts to CLPC to reflect these flag changes.
217  */
218 
219 void
thread_group_flags_update_lock(void)220 thread_group_flags_update_lock(void)
221 {
222 	lck_mtx_lock(&tg_flags_update_lock);
223 }
224 
225 void
thread_group_flags_update_unlock(void)226 thread_group_flags_update_unlock(void)
227 {
228 	lck_mtx_unlock(&tg_flags_update_lock);
229 }
230 
231 /*
232  * Inform platform code about already existing thread groups
233  * or ask it to free state for all thread groups
234  */
235 void
thread_group_resync(boolean_t create)236 thread_group_resync(boolean_t create)
237 {
238 	struct thread_group *tg;
239 
240 	thread_group_flags_update_lock();
241 	lck_mtx_lock(&tg_lock);
242 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
243 		if (create) {
244 			machine_thread_group_init(tg);
245 		} else {
246 			machine_thread_group_deinit(tg);
247 		}
248 	}
249 	lck_mtx_unlock(&tg_lock);
250 	thread_group_flags_update_unlock();
251 }
252 
253 /*
254  * Create new thread group and add new reference to it.
255  */
256 struct thread_group *
thread_group_create_and_retain(uint32_t flags)257 thread_group_create_and_retain(uint32_t flags)
258 {
259 	struct thread_group *tg;
260 
261 	tg = zalloc_flags(tg_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
262 	assert((uintptr_t)tg % TG_MACHINE_DATA_ALIGN_SIZE == 0);
263 
264 	tg->tg_flags = flags;
265 
266 #if CONFIG_SCHED_CLUTCH
267 	/*
268 	 * The clutch scheduler maintains a bunch of runqs per thread group. For
269 	 * each thread group it maintains a sched_clutch structure. The lifetime
270 	 * of that structure is tied directly to the lifetime of the thread group.
271 	 */
272 	sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
273 
274 #endif /* CONFIG_SCHED_CLUTCH */
275 
276 	lck_mtx_lock(&tg_lock);
277 	tg->tg_id = tg_next_id++;
278 	tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
279 	os_ref_init(&tg->tg_refcount, NULL);
280 	tg_count++;
281 	enqueue_tail(&tg_queue, &tg->tg_queue_chain);
282 
283 	// call machine layer init before this thread group becomes visible
284 	machine_thread_group_init(tg);
285 	lck_mtx_unlock(&tg_lock);
286 
287 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), thread_group_id(tg), thread_group_get_flags(tg));
288 	if (flags) {
289 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS), thread_group_id(tg), thread_group_get_flags(tg), 0);
290 	}
291 
292 	return tg;
293 }
294 
295 /*
296  * Point newly created thread to its home thread group
297  */
298 void
thread_group_init_thread(thread_t t,task_t task)299 thread_group_init_thread(thread_t t, task_t task)
300 {
301 	struct thread_group *tg = task_coalition_get_thread_group(task);
302 	t->thread_group = tg;
303 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
304 	    THREAD_GROUP_INVALID, thread_group_id(tg), (uintptr_t)thread_tid(t));
305 }
306 
307 /*
308  * Set thread group name
309  */
310 void
thread_group_set_name(__unused struct thread_group * tg,__unused const char * name)311 thread_group_set_name(__unused struct thread_group *tg, __unused const char *name)
312 {
313 	if (name == NULL) {
314 		return;
315 	}
316 	if (!thread_group_retain_try(tg)) {
317 		return;
318 	}
319 	if (name[0] != '\0') {
320 		strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
321 #if defined(__LP64__)
322 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
323 		    tg->tg_id,
324 		    *(uint64_t*)(void*)&tg->tg_name[0],
325 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
326 		    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
327 		    );
328 #else /* defined(__LP64__) */
329 		KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
330 		    tg->tg_id,
331 		    *(uint32_t*)(void*)&tg->tg_name[0],
332 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
333 		    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
334 		    );
335 #endif /* defined(__LP64__) */
336 	}
337 	thread_group_release(tg);
338 }
339 
340 void
thread_group_set_flags(struct thread_group * tg,uint32_t flags)341 thread_group_set_flags(struct thread_group *tg, uint32_t flags)
342 {
343 	thread_group_flags_update_lock();
344 	thread_group_set_flags_locked(tg, flags);
345 	thread_group_flags_update_unlock();
346 }
347 
348 /*
349  * Return true if flags are valid, false otherwise.
350  * Some flags are mutually exclusive.
351  */
352 boolean_t
thread_group_valid_flags(uint32_t flags)353 thread_group_valid_flags(uint32_t flags)
354 {
355 	const uint32_t sflags = flags & ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
356 	const uint32_t eflags = flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
357 
358 	if ((sflags & THREAD_GROUP_FLAGS_SHARED) != sflags) {
359 		return false;
360 	}
361 
362 	if ((eflags & THREAD_GROUP_FLAGS_EXCLUSIVE) != eflags) {
363 		return false;
364 	}
365 
366 	/* Only one of the exclusive flags may be set. */
367 	if (((eflags - 1) & eflags) != 0) {
368 		return false;
369 	}
370 
371 	return true;
372 }
373 
374 void
thread_group_clear_flags(struct thread_group * tg,uint32_t flags)375 thread_group_clear_flags(struct thread_group *tg, uint32_t flags)
376 {
377 	thread_group_flags_update_lock();
378 	thread_group_clear_flags_locked(tg, flags);
379 	thread_group_flags_update_unlock();
380 }
381 
382 /*
383  * Set thread group flags and perform related actions.
384  * The tg_flags_update_lock should be held.
385  * Currently supported flags are listed in the
386  * THREAD_GROUP_FLAGS_EXCLUSIVE and THREAD_GROUP_FLAGS_SHARED masks.
387  */
388 void
thread_group_set_flags_locked(struct thread_group * tg,uint32_t flags)389 thread_group_set_flags_locked(struct thread_group *tg, uint32_t flags)
390 {
391 	if (!thread_group_valid_flags(flags)) {
392 		panic("thread_group_set_flags: Invalid flags %u", flags);
393 	}
394 
395 	/* Disallow any exclusive flags from being set after creation, with the
396 	 * exception of moving from default to application */
397 	if ((flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) &&
398 	    !((flags & THREAD_GROUP_FLAGS_APPLICATION) &&
399 	    (tg->tg_flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) ==
400 	    THREAD_GROUP_FLAGS_DEFAULT)) {
401 		flags &= ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
402 	}
403 	if ((tg->tg_flags & flags) == flags) {
404 		return;
405 	}
406 
407 	if (tg == tg_system) {
408 		/*
409 		 * The system TG is used for kernel and launchd. It is also used
410 		 * for processes which are getting spawned and do not have a home
411 		 * TG yet (see task_coalition_get_thread_group()). Make sure the
412 		 * policies for those processes do not update the flags for the
413 		 * system TG. The flags for this thread group should only be set
414 		 * at creation via thread_group_create_and_retain().
415 		 */
416 		return;
417 	}
418 
419 	__kdebug_only uint64_t old_flags = tg->tg_flags;
420 	tg->tg_flags |= flags;
421 
422 	machine_thread_group_flags_update(tg, tg->tg_flags);
423 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
424 	    tg->tg_id, tg->tg_flags, old_flags);
425 }
426 
427 /*
428  * Clear thread group flags and perform related actions
429  * The tg_flags_update_lock should be held.
430  * Currently supported flags are listed in the
431  * THREAD_GROUP_FLAGS_EXCLUSIVE and THREAD_GROUP_FLAGS_SHARED masks.
432  */
433 void
thread_group_clear_flags_locked(struct thread_group * tg,uint32_t flags)434 thread_group_clear_flags_locked(struct thread_group *tg, uint32_t flags)
435 {
436 	if (!thread_group_valid_flags(flags)) {
437 		panic("thread_group_clear_flags: Invalid flags %u", flags);
438 	}
439 
440 	/* Disallow any exclusive flags from being cleared */
441 	if (flags & THREAD_GROUP_EXCLUSIVE_FLAGS_MASK) {
442 		flags &= ~THREAD_GROUP_EXCLUSIVE_FLAGS_MASK;
443 	}
444 	if ((tg->tg_flags & flags) == 0) {
445 		return;
446 	}
447 
448 	__kdebug_only uint64_t old_flags = tg->tg_flags;
449 	tg->tg_flags &= ~flags;
450 	machine_thread_group_flags_update(tg, tg->tg_flags);
451 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
452 	    tg->tg_id, tg->tg_flags, old_flags);
453 }
454 
455 
456 
457 /*
458  * Find thread group with specified name and put new reference to it.
459  */
460 struct thread_group *
thread_group_find_by_name_and_retain(char * name)461 thread_group_find_by_name_and_retain(char *name)
462 {
463 	struct thread_group *result = NULL;
464 
465 	if (name == NULL) {
466 		return NULL;
467 	}
468 
469 	if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
470 		return thread_group_retain(tg_system);
471 	} else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
472 		return thread_group_retain(tg_background);
473 	} else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
474 		return thread_group_retain(tg_perf_controller);
475 	}
476 
477 	struct thread_group *tg;
478 	lck_mtx_lock(&tg_lock);
479 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
480 		if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
481 		    thread_group_retain_try(tg)) {
482 			result = tg;
483 			break;
484 		}
485 	}
486 	lck_mtx_unlock(&tg_lock);
487 	return result;
488 }
489 
490 /*
491  * Find thread group with specified ID and add new reference to it.
492  */
493 struct thread_group *
thread_group_find_by_id_and_retain(uint64_t id)494 thread_group_find_by_id_and_retain(uint64_t id)
495 {
496 	struct thread_group *tg = NULL;
497 	struct thread_group *result = NULL;
498 
499 	switch (id) {
500 	case THREAD_GROUP_SYSTEM:
501 		result = tg_system;
502 		thread_group_retain(tg_system);
503 		break;
504 	case THREAD_GROUP_BACKGROUND:
505 		result = tg_background;
506 		thread_group_retain(tg_background);
507 		break;
508 	case THREAD_GROUP_VM:
509 		result = tg_vm;
510 		thread_group_retain(tg_vm);
511 		break;
512 	case THREAD_GROUP_IO_STORAGE:
513 		result = tg_io_storage;
514 		thread_group_retain(tg_io_storage);
515 		break;
516 	case THREAD_GROUP_PERF_CONTROLLER:
517 		result = tg_perf_controller;
518 		thread_group_retain(tg_perf_controller);
519 		break;
520 	default:
521 		lck_mtx_lock(&tg_lock);
522 		qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
523 			if (tg->tg_id == id && thread_group_retain_try(tg)) {
524 				result = tg;
525 				break;
526 			}
527 		}
528 		lck_mtx_unlock(&tg_lock);
529 	}
530 	return result;
531 }
532 
533 /*
534  * Add new reference to specified thread group
535  */
536 struct thread_group *
thread_group_retain(struct thread_group * tg)537 thread_group_retain(struct thread_group *tg)
538 {
539 	os_ref_retain(&tg->tg_refcount);
540 	return tg;
541 }
542 
543 /*
544  * Similar to thread_group_retain, but fails for thread groups with a
545  * zero reference count. Returns true if retained successfully.
546  */
547 static bool
thread_group_retain_try(struct thread_group * tg)548 thread_group_retain_try(struct thread_group *tg)
549 {
550 	return os_ref_retain_try(&tg->tg_refcount);
551 }
552 
553 static void
thread_group_deallocate_complete(struct thread_group * tg)554 thread_group_deallocate_complete(struct thread_group *tg)
555 {
556 	lck_mtx_lock(&tg_lock);
557 	tg_count--;
558 	remqueue(&tg->tg_queue_chain);
559 	lck_mtx_unlock(&tg_lock);
560 	static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 3), "thread group name is too short");
561 	static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
562 #if defined(__LP64__)
563 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
564 	    tg->tg_id,
565 	    *(uint64_t*)(void*)&tg->tg_name[0],
566 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)],
567 	    *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t) * 2]
568 	    );
569 #else /* defined(__LP64__) */
570 	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
571 	    tg->tg_id,
572 	    *(uint32_t*)(void*)&tg->tg_name[0],
573 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)],
574 	    *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t) * 2]
575 	    );
576 #endif /* defined(__LP64__) */
577 	machine_thread_group_deinit(tg);
578 #if CONFIG_SCHED_CLUTCH
579 	sched_clutch_destroy(&(tg->tg_sched_clutch));
580 #endif /* CONFIG_SCHED_CLUTCH */
581 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
582 	zfree(tg_zone, tg);
583 }
584 
585 /*
586  * Drop a reference to specified thread group
587  */
588 void
thread_group_release(struct thread_group * tg)589 thread_group_release(struct thread_group *tg)
590 {
591 	if (os_ref_release(&tg->tg_refcount) == 0) {
592 		thread_group_deallocate_complete(tg);
593 	}
594 }
595 
596 void
thread_group_release_live(struct thread_group * tg)597 thread_group_release_live(struct thread_group *tg)
598 {
599 	os_ref_release_live(&tg->tg_refcount);
600 }
601 
602 static void
thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)603 thread_group_deallocate_queue_invoke(mpsc_queue_chain_t e, __assert_only mpsc_daemon_queue_t dq)
604 {
605 	assert(dq == &thread_group_deallocate_queue);
606 	struct thread_group *tg = mpsc_queue_element(e, struct thread_group, tg_destroy_link);
607 
608 	thread_group_deallocate_complete(tg);
609 }
610 
611 void
thread_group_deallocate_safe(struct thread_group * tg)612 thread_group_deallocate_safe(struct thread_group *tg)
613 {
614 	if (os_ref_release(&tg->tg_refcount) == 0) {
615 		mpsc_daemon_enqueue(&thread_group_deallocate_queue, &tg->tg_destroy_link,
616 		    MPSC_QUEUE_NONE);
617 	}
618 }
619 
620 /*
621  * Get thread's current thread group
622  */
623 inline struct thread_group *
thread_group_get(thread_t t)624 thread_group_get(thread_t t)
625 {
626 	return t->thread_group;
627 }
628 
629 struct thread_group *
thread_group_get_home_group(thread_t t)630 thread_group_get_home_group(thread_t t)
631 {
632 	return task_coalition_get_thread_group(get_threadtask(t));
633 }
634 
635 /*
636  * The thread group is resolved according to a hierarchy:
637  *
638  * 1) work interval specified group (explicit API)
639  * 2) Auto-join thread group (wakeup tracking for special work intervals)
640  * 3) bank voucher carried group (implicitly set)
641  * 4) Preadopt thread group (if any)
642  * 5) coalition default thread group (ambient)
643  *
644  * Returns true if the thread's thread group needs to be changed and resolving
645  * TG is passed through in-out param. See also
646  * thread_mark_thread_group_hierarchy_resolved and
647  * thread_set_resolved_thread_group
648  *
649  * Caller should have thread lock. Interrupts are disabled. Thread doesn't have
650  * to be self
651  */
652 static bool
thread_compute_resolved_thread_group(thread_t t,struct thread_group ** resolved_tg)653 thread_compute_resolved_thread_group(thread_t t, struct thread_group **resolved_tg)
654 {
655 	struct thread_group *cur_tg, *tg;
656 	cur_tg = t->thread_group;
657 
658 	tg = thread_group_get_home_group(t);
659 
660 #if CONFIG_PREADOPT_TG
661 	if (t->preadopt_thread_group) {
662 		tg = t->preadopt_thread_group;
663 	}
664 #endif
665 	if (t->bank_thread_group) {
666 		tg = t->bank_thread_group;
667 	}
668 
669 	if (t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN) {
670 		if (t->auto_join_thread_group) {
671 			tg = t->auto_join_thread_group;
672 		}
673 	} else {
674 		if (t->work_interval_thread_group) {
675 			tg = t->work_interval_thread_group;
676 		}
677 	}
678 
679 	*resolved_tg = tg;
680 	return tg != cur_tg;
681 }
682 
683 #if CONFIG_PREADOPT_TG
684 
685 /*
686  * This function is always called after the hierarchy has been resolved. The
687  * caller holds the thread lock
688  */
689 static inline void
thread_assert_has_valid_thread_group(thread_t t)690 thread_assert_has_valid_thread_group(thread_t t)
691 {
692 	__assert_only struct thread_group *home_tg = thread_group_get_home_group(t);
693 
694 	assert(thread_get_reevaluate_tg_hierarchy_locked(t) == false);
695 
696 	__assert_only struct thread_group *resolved_tg;
697 	assert(thread_compute_resolved_thread_group(t, &resolved_tg) == false);
698 
699 	assert((t->thread_group == home_tg) ||
700 	    (t->thread_group == t->preadopt_thread_group) ||
701 	    (t->thread_group == t->bank_thread_group) ||
702 	    (t->thread_group == t->auto_join_thread_group) ||
703 	    (t->thread_group == t->work_interval_thread_group));
704 }
705 #endif
706 
707 /*
708  * This function is called when the thread group hierarchy on the thread_t is
709  * resolved and t->thread_group is the result of the hierarchy resolution. Once
710  * this has happened, there is state that needs to be cleared up which is
711  * handled by this function.
712  *
713  * Prior to this call, we should have either
714  * a) Resolved the hierarchy and discovered no change needed
715  * b) Resolved the hierarchy and modified the t->thread_group
716  */
717 static void
thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)718 thread_mark_thread_group_hierarchy_resolved(thread_t __unused t)
719 {
720 #if CONFIG_PREADOPT_TG
721 	/*
722 	 * We have just reevaluated the thread's hierarchy so we don't need to do it
723 	 * again later.
724 	 */
725 	thread_clear_reevaluate_tg_hierarchy_locked(t);
726 
727 	/*
728 	 * Clear the old_preadopt_thread_group field whose sole purpose was to make
729 	 * sure that t->thread_group didn't have a dangling pointer.
730 	 */
731 	thread_assert_has_valid_thread_group(t);
732 
733 	if (t->old_preadopt_thread_group) {
734 		thread_group_deallocate_safe(t->old_preadopt_thread_group);
735 		t->old_preadopt_thread_group = NULL;
736 	}
737 #endif
738 }
739 
740 /*
741  * Called with thread lock held, always called on self.  This function simply
742  * moves the thread to the right clutch scheduler bucket and informs CLPC of the
743  * change
744  */
745 static void
thread_notify_thread_group_change_self(thread_t t,struct thread_group * __unused old_tg,struct thread_group * __unused new_tg)746 thread_notify_thread_group_change_self(thread_t t, struct thread_group * __unused old_tg,
747     struct thread_group * __unused new_tg)
748 {
749 	assert(current_thread() == t);
750 	assert(old_tg != new_tg);
751 	assert(t->thread_group == new_tg);
752 
753 	uint64_t ctime = mach_approximate_time();
754 	uint64_t arg1, arg2;
755 	machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
756 	machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
757 }
758 
759 /*
760  * Called on any thread with thread lock. Updates the thread_group field on the
761  * thread with the resolved thread group and always make necessary clutch
762  * scheduler callouts. If the thread group is being modified on self,
763  * then also make necessary CLPC callouts.
764  */
765 static void
thread_set_resolved_thread_group(thread_t t,struct thread_group * old_tg,struct thread_group * resolved_tg,bool on_self)766 thread_set_resolved_thread_group(thread_t t, struct thread_group *old_tg,
767     struct thread_group *resolved_tg, bool on_self)
768 {
769 	t->thread_group = resolved_tg;
770 
771 	/* Thread is either running already or is runnable but not on a runqueue */
772 	assert((t->state & (TH_RUN | TH_IDLE)) == TH_RUN);
773 	thread_assert_runq_null(t);
774 
775 	struct thread_group *home_tg = thread_group_get_home_group(t);
776 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
777 	    thread_group_id(old_tg), thread_group_id(resolved_tg),
778 	    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
779 
780 #if CONFIG_PREADOPT_TG
781 	if (resolved_tg == t->preadopt_thread_group) {
782 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
783 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
784 		    thread_tid(t), thread_group_id(home_tg));
785 	}
786 #endif
787 
788 #if CONFIG_SCHED_CLUTCH
789 	sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
790 	sched_clutch_t new_clutch = (resolved_tg) ? &(resolved_tg->tg_sched_clutch) : NULL;
791 	if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
792 		sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
793 	}
794 #endif
795 
796 	if (on_self) {
797 		assert(t == current_thread());
798 		thread_notify_thread_group_change_self(t, old_tg, resolved_tg);
799 	}
800 
801 	thread_mark_thread_group_hierarchy_resolved(t);
802 }
803 
804 /* Caller has thread lock. Always called on self */
805 static void
thread_resolve_thread_group_hierarchy_self_locked(thread_t t,__unused bool clear_preadopt)806 thread_resolve_thread_group_hierarchy_self_locked(thread_t t, __unused bool clear_preadopt)
807 {
808 	assert(current_thread() == t);
809 
810 #if CONFIG_PREADOPT_TG
811 	struct thread_group *preadopt_tg = NULL;
812 	if (clear_preadopt) {
813 		if (t->preadopt_thread_group) {
814 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
815 			    (uintptr_t)thread_tid(t), thread_group_id(t->preadopt_thread_group), 0, 0);
816 
817 			preadopt_tg = t->preadopt_thread_group;
818 			t->preadopt_thread_group = NULL;
819 		}
820 	}
821 #endif
822 
823 	struct thread_group *resolved_tg = NULL;
824 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
825 
826 	if (needs_change) {
827 		struct thread_group *old_tg = t->thread_group;
828 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
829 	}
830 
831 	/*
832 	 * Regardless of whether we modified the t->thread_group above or not, the
833 	 * hierarchy is now resolved
834 	 */
835 	thread_mark_thread_group_hierarchy_resolved(t);
836 
837 #if CONFIG_PREADOPT_TG
838 	if (preadopt_tg) {
839 		thread_group_deallocate_safe(preadopt_tg);
840 	}
841 #endif
842 }
843 
844 /*
845  * Caller has thread lock, never called on self, always called on a thread not
846  * on a runqueue. This is called from sched_prim.c. Counter part for calling on
847  * self is thread_resolve_thread_group_hierarchy_self
848  */
849 #if CONFIG_PREADOPT_TG
850 void
thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)851 thread_resolve_and_enforce_thread_group_hierarchy_if_needed(thread_t t)
852 {
853 	assert(t != current_thread());
854 	thread_assert_runq_null(t);
855 
856 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
857 		struct thread_group *resolved_tg = NULL;
858 
859 		bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
860 		if (needs_change) {
861 			struct thread_group *old_tg = t->thread_group;
862 			thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
863 		}
864 
865 		/*
866 		 * Regardless of whether we modified the t->thread_group above or not,
867 		 * the hierarchy is now resolved
868 		 */
869 		thread_mark_thread_group_hierarchy_resolved(t);
870 	}
871 }
872 #endif
873 
874 #if CONFIG_PREADOPT_TG
875 /*
876  * The thread being passed can be the current thread and it can also be another
877  * thread which is running on another core. This function is called with spin
878  * locks held (kq and wq lock) but the thread lock is not held by caller.
879  *
880  * The thread always takes a +1 on the thread group and will release the
881  * previous preadoption thread group's reference or stash it.
882  */
883 void
thread_set_preadopt_thread_group(thread_t t,struct thread_group * tg)884 thread_set_preadopt_thread_group(thread_t t, struct thread_group *tg)
885 {
886 	spl_t s = splsched();
887 	thread_lock(t);
888 
889 	/*
890 	 * Assert that this is never called on WindowServer when it has already
891 	 * issued a block callout to CLPC.
892 	 *
893 	 * This should never happen because we don't ever call
894 	 * thread_set_preadopt_thread_group on a servicer after going out to
895 	 * userspace unless we are doing so to/after an unbind
896 	 */
897 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
898 
899 	struct thread_group *old_tg = t->thread_group;
900 	struct thread_group *home_tg = thread_group_get_home_group(t);
901 
902 	/*
903 	 * Since the preadoption thread group can disappear from under you, we need
904 	 * to make sure that the thread_group pointer is always pointing to valid
905 	 * memory.
906 	 *
907 	 * We run the risk of the thread group pointer pointing to dangling memory
908 	 * when the following happens:
909 	 *
910 	 * a) We update the preadopt_thread_group
911 	 * b) We resolve hierarchy and need to change the resolved_thread_group
912 	 * c) For some reason, we are not able to do so and we need to set the
913 	 * resolved thread group later.
914 	 */
915 
916 	/* take the ref from the thread */
917 	struct thread_group *old_preadopt_tg = t->preadopt_thread_group;
918 
919 	if (tg == NULL) {
920 		t->preadopt_thread_group = NULL;
921 		if (old_preadopt_tg != NULL) {
922 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_CLEAR),
923 			    thread_tid(t), thread_group_id(old_preadopt_tg), 0, 0);
924 		}
925 	} else {
926 		t->preadopt_thread_group = thread_group_retain(tg);
927 	}
928 
929 	struct thread_group *resolved_tg = NULL;
930 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
931 	if (!needs_change) {
932 		/*
933 		 * Setting preadoption thread group didn't change anything, simply mark
934 		 * the hierarchy as resolved and exit.
935 		 */
936 		thread_mark_thread_group_hierarchy_resolved(t);
937 		goto out;
938 	}
939 
940 	if (t != current_thread()) {
941 		/*
942 		 * We're modifying the thread group of another thread, we need to take
943 		 * action according to the state of the other thread.
944 		 *
945 		 * Try removing the thread from its runq, modify its TG and then
946 		 * reinsert it for reevaluation. If the thread isn't runnable (already
947 		 * running, started running concurrently, or in a waiting state), then
948 		 * mark a bit that will cause the thread to reevaluate its own
949 		 * hierarchy the next time it is being inserted into a runq
950 		 */
951 		if (thread_run_queue_remove(t)) {
952 			/* Thread is runnable and we successfully removed it from the runq */
953 			thread_set_resolved_thread_group(t, old_tg, resolved_tg, false);
954 
955 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
956 			    thread_group_id(old_tg), thread_group_id(tg),
957 			    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
958 
959 			thread_run_queue_reinsert(t, SCHED_TAILQ);
960 		} else {
961 			/*
962 			 * The thread is not runnable or it is running already - let the
963 			 * thread reevaluate the next time it gets enqueued on a runq
964 			 */
965 			thread_set_reevaluate_tg_hierarchy_locked(t);
966 
967 			KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT_NEXTTIME),
968 			    thread_group_id(old_tg), thread_group_id(tg),
969 			    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
970 		}
971 	} else {
972 		/* We're modifying thread group on ourselves */
973 		thread_set_resolved_thread_group(t, old_tg, resolved_tg, true);
974 
975 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_PREADOPT),
976 		    thread_group_id(old_tg), thread_group_id(tg),
977 		    thread_tid(t), thread_group_id(home_tg));
978 	}
979 
980 out:
981 	if (thread_get_reevaluate_tg_hierarchy_locked(t)) {
982 		assert(t->thread_group == old_tg);
983 		/*
984 		 * We need to reevaluate TG hierarchy later as a result of this
985 		 * `thread_set_preadopt_thread_group` operation. This means that the
986 		 * thread group on the thread was pointing to either the home thread
987 		 * group, the preadoption thread group we just replaced, or the old
988 		 * preadoption thread group stashed on the thread.
989 		 */
990 		assert(t->thread_group == home_tg ||
991 		    t->thread_group == old_preadopt_tg ||
992 		    t->old_preadopt_thread_group);
993 
994 		if (t->thread_group == old_preadopt_tg) {
995 			/*
996 			 * t->thread_group is pointing to the preadopt thread group we just
997 			 * replaced. This means the hierarchy was resolved before this call.
998 			 * Assert that there was no old_preadopt_thread_group on the thread.
999 			 */
1000 			assert(t->old_preadopt_thread_group == NULL);
1001 			/*
1002 			 * Since t->thread_group is still pointing to the old preadopt thread
1003 			 * group - we need to keep it alive until we reevaluate the hierarchy
1004 			 * next
1005 			 */
1006 			t->old_preadopt_thread_group = old_tg; // transfer ref back to thread
1007 		} else if (old_preadopt_tg != NULL) {
1008 			thread_group_deallocate_safe(old_preadopt_tg);
1009 		}
1010 	} else {
1011 		/* We resolved the hierarchy just now */
1012 		thread_assert_has_valid_thread_group(t);
1013 
1014 		/*
1015 		 * We don't need the old preadopt thread group that we stashed in our
1016 		 * local variable, drop it.
1017 		 */
1018 		if (old_preadopt_tg) {
1019 			thread_group_deallocate_safe(old_preadopt_tg);
1020 		}
1021 	}
1022 	thread_unlock(t);
1023 	splx(s);
1024 	return;
1025 }
1026 
1027 #endif
1028 
1029 /*
1030  * thread_set_thread_group()
1031  *
1032  * Caller must guarantee lifetime of the thread group for the life of the call -
1033  * this overrides the thread group without going through the hierarchy
1034  * resolution. This is for special thread groups like the VM and IO thread
1035  * groups only.
1036  */
1037 static void
thread_set_thread_group(thread_t t,struct thread_group * tg)1038 thread_set_thread_group(thread_t t, struct thread_group *tg)
1039 {
1040 	struct thread_group *home_tg = thread_group_get_home_group(t);
1041 	struct thread_group *old_tg = NULL;
1042 
1043 	spl_t s = splsched();
1044 	old_tg = t->thread_group;
1045 
1046 	if (old_tg != tg) {
1047 		thread_lock(t);
1048 
1049 		assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1050 		t->thread_group = tg;
1051 
1052 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1053 		    thread_group_id(old_tg), thread_group_id(tg),
1054 		    (uintptr_t)thread_tid(t), thread_group_id(home_tg));
1055 
1056 		thread_notify_thread_group_change_self(t, old_tg, tg);
1057 
1058 		thread_unlock(t);
1059 	}
1060 
1061 	splx(s);
1062 }
1063 
1064 /* Called without the thread lock held, called on current thread */
1065 void
thread_group_set_bank(thread_t t,struct thread_group * tg)1066 thread_group_set_bank(thread_t t, struct thread_group *tg)
1067 {
1068 	assert(current_thread() == t);
1069 	/* boot arg disables groups in bank */
1070 	if (tg_set_by_bankvoucher == FALSE) {
1071 		return;
1072 	}
1073 
1074 	spl_t s = splsched();
1075 	thread_lock(t);
1076 
1077 	/* This is a borrowed reference from the current bank voucher */
1078 	t->bank_thread_group = tg;
1079 
1080 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1081 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1082 
1083 	thread_unlock(t);
1084 	splx(s);
1085 }
1086 
1087 #if CONFIG_SCHED_AUTO_JOIN
1088 /*
1089  * thread_group_set_autojoin_thread_group_locked()
1090  *
1091  * Sets the thread group of a thread based on auto-join rules and reevaluates
1092  * the hierarchy.
1093  *
1094  * Preconditions:
1095  * - Thread must not be part of a runq (freshly made runnable threads or terminating only)
1096  * - Thread must be locked by the caller already
1097  */
1098 void
thread_set_autojoin_thread_group_locked(thread_t t,struct thread_group * tg)1099 thread_set_autojoin_thread_group_locked(thread_t t, struct thread_group *tg)
1100 {
1101 	thread_assert_runq_null(t);
1102 
1103 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1104 	t->auto_join_thread_group = tg;
1105 
1106 	struct thread_group *resolved_tg = NULL;
1107 	bool needs_change = thread_compute_resolved_thread_group(t, &resolved_tg);
1108 
1109 	if (needs_change) {
1110 		struct thread_group *old_tg = t->thread_group;
1111 		struct thread_group *home_tg = thread_group_get_home_group(t);
1112 
1113 		t->thread_group = resolved_tg;
1114 
1115 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
1116 		    thread_group_id(old_tg), thread_group_id(resolved_tg),
1117 		    thread_tid(t), thread_group_id(home_tg));
1118 		/*
1119 		 * If the thread group is being changed for the current thread, callout
1120 		 * to CLPC to update the thread's information at that layer. This makes
1121 		 * sure CLPC has consistent state when the current thread is going
1122 		 * off-core.
1123 		 *
1124 		 * Note that we are passing in the PERFCONTROL_CALLOUT_WAKE_UNSAFE flag
1125 		 * to CLPC here (as opposed to 0 in thread_notify_thread_group_change_self)
1126 		 */
1127 		if (t == current_thread()) {
1128 			uint64_t ctime = mach_approximate_time();
1129 			uint64_t arg1, arg2;
1130 			machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
1131 			machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
1132 		}
1133 	}
1134 
1135 	thread_mark_thread_group_hierarchy_resolved(t);
1136 }
1137 #endif
1138 
1139 /* Thread is not locked. Thread is self */
1140 void
thread_set_work_interval_thread_group(thread_t t,struct thread_group * tg)1141 thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg)
1142 {
1143 	assert(current_thread() == t);
1144 	assert(!(t->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN));
1145 
1146 	/*
1147 	 * We have a work interval, we don't need the preadoption thread group
1148 	 * anymore (ie, it shouldn't be available for us to jump back to it after
1149 	 * the thread leaves the work interval)
1150 	 */
1151 	spl_t s = splsched();
1152 	thread_lock(t);
1153 
1154 	t->work_interval_thread_group = tg;
1155 	assert((t->options & TH_OPT_IPC_TG_BLOCKED) == 0);
1156 
1157 	thread_resolve_thread_group_hierarchy_self_locked(t, tg != NULL);
1158 
1159 	thread_unlock(t);
1160 	splx(s);
1161 }
1162 
1163 inline cluster_type_t
thread_group_recommendation(struct thread_group * tg)1164 thread_group_recommendation(struct thread_group *tg)
1165 {
1166 	if (tg == NULL) {
1167 		return CLUSTER_TYPE_SMP;
1168 	} else {
1169 		return tg->tg_recommendation;
1170 	}
1171 }
1172 
1173 inline uint64_t
thread_group_get_id(struct thread_group * tg)1174 thread_group_get_id(struct thread_group *tg)
1175 {
1176 	return tg->tg_id;
1177 }
1178 
1179 uint32_t
thread_group_count(void)1180 thread_group_count(void)
1181 {
1182 	return tg_count;
1183 }
1184 
1185 /*
1186  * Can only be called while tg cannot be destroyed
1187  */
1188 inline const char*
thread_group_get_name(struct thread_group * tg)1189 thread_group_get_name(struct thread_group *tg)
1190 {
1191 	return tg->tg_name;
1192 }
1193 
1194 inline void *
thread_group_get_machine_data(struct thread_group * tg)1195 thread_group_get_machine_data(struct thread_group *tg)
1196 {
1197 	return &tg->tg_machine_data;
1198 }
1199 
1200 inline uint32_t
thread_group_machine_data_size(void)1201 thread_group_machine_data_size(void)
1202 {
1203 	return tg_machine_data_size;
1204 }
1205 
1206 inline boolean_t
thread_group_uses_immediate_ipi(struct thread_group * tg)1207 thread_group_uses_immediate_ipi(struct thread_group *tg)
1208 {
1209 	return thread_group_get_id(tg) == THREAD_GROUP_PERF_CONTROLLER && perf_controller_thread_group_immediate_ipi != 0;
1210 }
1211 
1212 kern_return_t
thread_group_iterate_stackshot(thread_group_iterate_fn_t callout,void * arg)1213 thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
1214 {
1215 	struct thread_group *tg;
1216 	int i = 0;
1217 	qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
1218 		if (tg == NULL || !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
1219 			return KERN_FAILURE;
1220 		}
1221 		callout(arg, i, tg);
1222 		i++;
1223 	}
1224 	return KERN_SUCCESS;
1225 }
1226 
1227 void
thread_group_join_io_storage(void)1228 thread_group_join_io_storage(void)
1229 {
1230 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
1231 	assert(tg != NULL);
1232 	thread_set_thread_group(current_thread(), tg);
1233 }
1234 
1235 void
thread_group_join_perf_controller(void)1236 thread_group_join_perf_controller(void)
1237 {
1238 	struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
1239 	assert(tg != NULL);
1240 	thread_set_thread_group(current_thread(), tg);
1241 }
1242 
1243 void
thread_group_vm_add(void)1244 thread_group_vm_add(void)
1245 {
1246 	assert(tg_vm != NULL);
1247 	thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM));
1248 }
1249 
1250 uint32_t
thread_group_get_flags(struct thread_group * tg)1251 thread_group_get_flags(struct thread_group *tg)
1252 {
1253 	return tg->tg_flags;
1254 }
1255 
1256 void
thread_group_update_recommendation(struct thread_group * tg,cluster_type_t new_recommendation)1257 thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
1258 {
1259 	/*
1260 	 * Since the tg->tg_recommendation field is read by CPUs trying to determine
1261 	 * where a thread/thread group needs to be placed, it is important to use
1262 	 * atomic operations to update the recommendation.
1263 	 */
1264 	os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
1265 }
1266 
1267 #if CONFIG_SCHED_EDGE
1268 
1269 int sched_edge_restrict_ut = 1;
1270 int sched_edge_restrict_bg = 1;
1271 
1272 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1273 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1274 {
1275 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1276 	/*
1277 	 * CLUSTER_TYPE_SMP was used for some debugging support when CLPC dynamic control was turned off.
1278 	 * In more recent implementations, CLPC simply recommends "P-spill" when dynamic control is turned off. So it should
1279 	 * never be recommending CLUSTER_TYPE_SMP for thread groups.
1280 	 */
1281 	assert(new_recommendation != CLUSTER_TYPE_SMP);
1282 	/*
1283 	 * The Edge scheduler expects preferred cluster recommendations for each QoS level within a TG. Until the new CLPC
1284 	 * routine is being called, fake out the call from the old CLPC interface.
1285 	 */
1286 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
1287 	/*
1288 	 * For all buckets higher than UT, apply the recommendation to the thread group bucket
1289 	 */
1290 	for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SHARE_UT; bucket++) {
1291 		tg_bucket_preferred_cluster[bucket] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1292 	}
1293 	/* For UT & BG QoS, set the recommendation only if they havent been restricted via sysctls */
1294 	if (!sched_edge_restrict_ut) {
1295 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_UT] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1296 	}
1297 	if (!sched_edge_restrict_bg) {
1298 		tg_bucket_preferred_cluster[TH_BUCKET_SHARE_BG] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
1299 	}
1300 	sched_perfcontrol_preferred_cluster_options_t options = 0;
1301 	if (new_recommendation == CLUSTER_TYPE_P) {
1302 		options |= SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING;
1303 	}
1304 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1305 }
1306 
1307 void
sched_perfcontrol_edge_matrix_get(sched_clutch_edge * edge_matrix,bool * edge_request_bitmap,uint64_t flags,uint64_t matrix_order)1308 sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t matrix_order)
1309 {
1310 	sched_edge_matrix_get(edge_matrix, edge_request_bitmap, flags, matrix_order);
1311 }
1312 
1313 void
sched_perfcontrol_edge_matrix_set(sched_clutch_edge * edge_matrix,bool * edge_changes_bitmap,uint64_t flags,uint64_t matrix_order)1314 sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t matrix_order)
1315 {
1316 	sched_edge_matrix_set(edge_matrix, edge_changes_bitmap, flags, matrix_order);
1317 }
1318 
1319 void
sched_perfcontrol_thread_group_preferred_clusters_set(void * machine_data,uint32_t tg_preferred_cluster,uint32_t overrides[PERFCONTROL_CLASS_MAX],sched_perfcontrol_preferred_cluster_options_t options)1320 sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
1321     uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
1322 {
1323 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1324 	uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {
1325 		[TH_BUCKET_FIXPRI]   = (overrides[PERFCONTROL_CLASS_ABOVEUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_ABOVEUI] : tg_preferred_cluster,
1326 		[TH_BUCKET_SHARE_FG] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
1327 		[TH_BUCKET_SHARE_IN] = (overrides[PERFCONTROL_CLASS_USER_INITIATED] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_USER_INITIATED] : tg_preferred_cluster,
1328 		[TH_BUCKET_SHARE_DF] = (overrides[PERFCONTROL_CLASS_NONUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_NONUI] : tg_preferred_cluster,
1329 		[TH_BUCKET_SHARE_UT] = (overrides[PERFCONTROL_CLASS_UTILITY] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UTILITY] : tg_preferred_cluster,
1330 		[TH_BUCKET_SHARE_BG] = (overrides[PERFCONTROL_CLASS_BACKGROUND] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_BACKGROUND] : tg_preferred_cluster,
1331 	};
1332 	sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
1333 }
1334 
1335 void
sched_perfcontrol_edge_cpu_rotation_bitmasks_set(uint32_t cluster_id,uint64_t preferred_bitmask,uint64_t migration_bitmask)1336 sched_perfcontrol_edge_cpu_rotation_bitmasks_set(uint32_t cluster_id, uint64_t preferred_bitmask, uint64_t migration_bitmask)
1337 {
1338 	assert(cluster_id < MAX_PSETS);
1339 	assert((preferred_bitmask & migration_bitmask) == 0);
1340 	processor_set_t pset = pset_array[cluster_id];
1341 	pset->perfcontrol_cpu_preferred_bitmask = preferred_bitmask;
1342 	pset->perfcontrol_cpu_migration_bitmask = migration_bitmask;
1343 }
1344 
1345 void
sched_perfcontrol_edge_cpu_rotation_bitmasks_get(uint32_t cluster_id,uint64_t * preferred_bitmask,uint64_t * migration_bitmask)1346 sched_perfcontrol_edge_cpu_rotation_bitmasks_get(uint32_t cluster_id, uint64_t *preferred_bitmask, uint64_t *migration_bitmask)
1347 {
1348 	assert(cluster_id < MAX_PSETS);
1349 	processor_set_t pset = pset_array[cluster_id];
1350 	*preferred_bitmask = pset->perfcontrol_cpu_preferred_bitmask;
1351 	*migration_bitmask = pset->perfcontrol_cpu_migration_bitmask;
1352 }
1353 
1354 #else /* CONFIG_SCHED_EDGE */
1355 
1356 void
sched_perfcontrol_thread_group_recommend(__unused void * machine_data,__unused cluster_type_t new_recommendation)1357 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
1358 {
1359 	struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
1360 	SCHED(thread_group_recommendation_change)(tg, new_recommendation);
1361 }
1362 
1363 void
sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_request_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1364 sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1365 {
1366 }
1367 
1368 void
sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge * edge_matrix,__unused bool * edge_changes_bitmap,__unused uint64_t flags,__unused uint64_t matrix_order)1369 sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
1370 {
1371 }
1372 
1373 void
sched_perfcontrol_thread_group_preferred_clusters_set(__unused void * machine_data,__unused uint32_t tg_preferred_cluster,__unused uint32_t overrides[PERFCONTROL_CLASS_MAX],__unused sched_perfcontrol_preferred_cluster_options_t options)1374 sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
1375     __unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
1376 {
1377 }
1378 
1379 void
sched_perfcontrol_edge_cpu_rotation_bitmasks_set(__unused uint32_t cluster_id,__unused uint64_t preferred_bitmask,__unused uint64_t migration_bitmask)1380 sched_perfcontrol_edge_cpu_rotation_bitmasks_set(__unused uint32_t cluster_id, __unused uint64_t preferred_bitmask, __unused uint64_t migration_bitmask)
1381 {
1382 }
1383 
1384 void
sched_perfcontrol_edge_cpu_rotation_bitmasks_get(__unused uint32_t cluster_id,__unused uint64_t * preferred_bitmask,__unused uint64_t * migration_bitmask)1385 sched_perfcontrol_edge_cpu_rotation_bitmasks_get(__unused uint32_t cluster_id, __unused uint64_t *preferred_bitmask, __unused uint64_t *migration_bitmask)
1386 {
1387 }
1388 
1389 #endif /* CONFIG_SCHED_EDGE */
1390 
1391 /*
1392  * Can only be called while tg cannot be destroyed.
1393  * Names can be up to THREAD_GROUP_MAXNAME long and are not necessarily null-terminated.
1394  */
1395 const char*
sched_perfcontrol_thread_group_get_name(void * machine_data)1396 sched_perfcontrol_thread_group_get_name(void *machine_data)
1397 {
1398 	struct thread_group *tg = __container_of(machine_data, struct thread_group, tg_machine_data);
1399 	return thread_group_get_name(tg);
1400 }
1401 
1402 #endif /* CONFIG_THREAD_GROUPS */
1403