xref: /xnu-10063.121.3/osfmk/kern/processor.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 /*
60  *	processor.c: processor and processor_set manipulation routines.
61  */
62 
63 #include <mach/boolean.h>
64 #include <mach/policy.h>
65 #include <mach/processor.h>
66 #include <mach/processor_info.h>
67 #include <mach/vm_param.h>
68 #include <kern/cpu_number.h>
69 #include <kern/host.h>
70 #include <kern/ipc_host.h>
71 #include <kern/ipc_tt.h>
72 #include <kern/kalloc.h>
73 #include <kern/machine.h>
74 #include <kern/misc_protos.h>
75 #include <kern/processor.h>
76 #include <kern/sched.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/timer.h>
80 #if KPERF
81 #include <kperf/kperf.h>
82 #endif /* KPERF */
83 #include <ipc/ipc_port.h>
84 
85 #include <security/mac_mach_internal.h>
86 
87 #if defined(CONFIG_XNUPOST)
88 
89 #include <tests/xnupost.h>
90 
91 #endif /* CONFIG_XNUPOST */
92 
93 /*
94  * Exported interface
95  */
96 #include <mach/mach_host_server.h>
97 #include <mach/processor_set_server.h>
98 #include <san/kcov.h>
99 
100 /*
101  * The first pset and the pset_node are created by default for all platforms.
102  * Those typically represent the boot-cluster. For AMP platforms, all clusters
103  * of the same type are part of the same pset_node. This allows for easier
104  * CPU selection logic.
105  */
106 struct processor_set    pset0;
107 struct pset_node        pset_node0;
108 
109 #if __AMP__
110 struct pset_node        pset_node1;
111 pset_node_t             ecore_node;
112 pset_node_t             pcore_node;
113 #endif
114 
115 LCK_SPIN_DECLARE(pset_node_lock, LCK_GRP_NULL);
116 
117 LCK_GRP_DECLARE(pset_lck_grp, "pset");
118 
119 queue_head_t            tasks;
120 queue_head_t            terminated_tasks;       /* To be used ONLY for stackshot. */
121 queue_head_t            corpse_tasks;
122 int                     tasks_count;
123 int                     terminated_tasks_count;
124 queue_head_t            threads;
125 queue_head_t            terminated_threads;
126 int                     threads_count;
127 int                     terminated_threads_count;
128 LCK_GRP_DECLARE(task_lck_grp, "task");
129 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
130 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
131 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
132 
133 processor_t             processor_list;
134 unsigned int            processor_count;
135 static processor_t      processor_list_tail;
136 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
137 
138 uint32_t                processor_avail_count;
139 uint32_t                processor_avail_count_user;
140 uint32_t                primary_processor_avail_count;
141 uint32_t                primary_processor_avail_count_user;
142 
143 SECURITY_READ_ONLY_LATE(int)    master_cpu = 0;
144 
145 struct processor        PERCPU_DATA(processor);
146 processor_t             processor_array[MAX_SCHED_CPUS] = { 0 };
147 processor_set_t         pset_array[MAX_PSETS] = { 0 };
148 
149 static timer_call_func_t running_timer_funcs[] = {
150 	[RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
151 	[RUNNING_TIMER_PREEMPT] = thread_preempt_expire,
152 	[RUNNING_TIMER_KPERF] = kperf_timer_expire,
153 };
154 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
155     == RUNNING_TIMER_MAX, "missing running timer function");
156 
157 #if defined(CONFIG_XNUPOST)
158 kern_return_t ipi_test(void);
159 extern void arm64_ipi_test(void);
160 
161 kern_return_t
ipi_test()162 ipi_test()
163 {
164 #if __arm64__
165 	processor_t p;
166 
167 	for (p = processor_list; p != NULL; p = p->processor_list) {
168 		thread_bind(p);
169 		thread_block(THREAD_CONTINUE_NULL);
170 		kprintf("Running IPI test on cpu %d\n", p->cpu_id);
171 		arm64_ipi_test();
172 	}
173 
174 	/* unbind thread from specific cpu */
175 	thread_bind(PROCESSOR_NULL);
176 	thread_block(THREAD_CONTINUE_NULL);
177 
178 	T_PASS("Done running IPI tests");
179 #else
180 	T_PASS("Unsupported platform. Not running IPI tests");
181 
182 #endif /* __arm64__ */
183 
184 	return KERN_SUCCESS;
185 }
186 #endif /* defined(CONFIG_XNUPOST) */
187 
188 int sched_enable_smt = 1;
189 
190 void
processor_bootstrap(void)191 processor_bootstrap(void)
192 {
193 	/* Initialize PSET node and PSET associated with boot cluster */
194 	pset_node0.psets = &pset0;
195 	pset_node0.pset_cluster_type = PSET_SMP;
196 
197 #if __AMP__
198 	const ml_topology_info_t *topology_info = ml_get_topology_info();
199 
200 	/*
201 	 * Since this is an AMP system, fill up cluster type and ID information; this should do the
202 	 * same kind of initialization done via ml_processor_register()
203 	 */
204 	ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
205 	pset0.pset_id = boot_cluster->cluster_id;
206 	pset0.pset_cluster_id = boot_cluster->cluster_id;
207 	if (boot_cluster->cluster_type == CLUSTER_TYPE_E) {
208 		pset0.pset_cluster_type      = PSET_AMP_E;
209 		pset_node0.pset_cluster_type = PSET_AMP_E;
210 		ecore_node = &pset_node0;
211 
212 		pset_node1.pset_cluster_type = PSET_AMP_P;
213 		pcore_node = &pset_node1;
214 	} else {
215 		pset0.pset_cluster_type      = PSET_AMP_P;
216 		pset_node0.pset_cluster_type = PSET_AMP_P;
217 		pcore_node = &pset_node0;
218 
219 		pset_node1.pset_cluster_type = PSET_AMP_E;
220 		ecore_node = &pset_node1;
221 	}
222 
223 	/* Link pset_node1 to pset_node0 */
224 	pset_node0.node_list = &pset_node1;
225 #endif
226 
227 	pset_init(&pset0, &pset_node0);
228 	queue_init(&tasks);
229 	queue_init(&terminated_tasks);
230 	queue_init(&threads);
231 	queue_init(&terminated_threads);
232 	queue_init(&corpse_tasks);
233 
234 	processor_init(master_processor, master_cpu, &pset0);
235 }
236 
237 /*
238  *	Initialize the given processor for the cpu
239  *	indicated by cpu_id, and assign to the
240  *	specified processor set.
241  */
242 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)243 processor_init(
244 	processor_t            processor,
245 	int                    cpu_id,
246 	processor_set_t        pset)
247 {
248 	spl_t           s;
249 
250 	assert(cpu_id < MAX_SCHED_CPUS);
251 	processor->cpu_id = cpu_id;
252 
253 	if (processor != master_processor) {
254 		/* Scheduler state for master_processor initialized in sched_init() */
255 		SCHED(processor_init)(processor);
256 		smr_cpu_init(processor);
257 	}
258 
259 	processor->state = PROCESSOR_OFF_LINE;
260 	processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
261 	processor->processor_set = pset;
262 	processor_state_update_idle(processor);
263 	processor->starting_pri = MINPRI;
264 	processor->quantum_end = UINT64_MAX;
265 	processor->deadline = UINT64_MAX;
266 	processor->first_timeslice = FALSE;
267 	processor->processor_offlined = false;
268 	processor->processor_primary = processor; /* no SMT relationship known at this point */
269 	processor->processor_secondary = NULL;
270 	processor->is_SMT = false;
271 	processor->is_recommended = true;
272 	processor->processor_self = IP_NULL;
273 	processor->processor_list = NULL;
274 	processor->must_idle = false;
275 	processor->next_idle_short = false;
276 	processor->last_startup_reason = REASON_SYSTEM;
277 	processor->last_shutdown_reason = REASON_NONE;
278 	processor->shutdown_temporary = false;
279 	processor->shutdown_locked = false;
280 	processor->last_recommend_reason = REASON_SYSTEM;
281 	processor->last_derecommend_reason = REASON_NONE;
282 	processor->running_timers_active = false;
283 	for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
284 		timer_call_setup(&processor->running_timers[i],
285 		    running_timer_funcs[i], processor);
286 		running_timer_clear(processor, i);
287 	}
288 	recount_processor_init(processor);
289 	simple_lock_init(&processor->start_state_lock, 0);
290 
291 	s = splsched();
292 	pset_lock(pset);
293 	bit_set(pset->cpu_bitmask, cpu_id);
294 	bit_set(pset->recommended_bitmask, cpu_id);
295 	bit_set(pset->primary_map, cpu_id);
296 	bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
297 	if (pset->cpu_set_count++ == 0) {
298 		pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
299 	} else {
300 		pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
301 		pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
302 	}
303 	pset_unlock(pset);
304 	splx(s);
305 
306 	simple_lock(&processor_list_lock, LCK_GRP_NULL);
307 	if (processor_list == NULL) {
308 		processor_list = processor;
309 	} else {
310 		processor_list_tail->processor_list = processor;
311 	}
312 	processor_list_tail = processor;
313 	processor_count++;
314 	simple_unlock(&processor_list_lock);
315 	processor_array[cpu_id] = processor;
316 }
317 
318 bool system_is_SMT = false;
319 
320 void
processor_set_primary(processor_t processor,processor_t primary)321 processor_set_primary(
322 	processor_t             processor,
323 	processor_t             primary)
324 {
325 	assert(processor->processor_primary == primary || processor->processor_primary == processor);
326 	/* Re-adjust primary point for this (possibly) secondary processor */
327 	processor->processor_primary = primary;
328 
329 	assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
330 	if (primary != processor) {
331 		/* Link primary to secondary, assumes a 2-way SMT model
332 		 * We'll need to move to a queue if any future architecture
333 		 * requires otherwise.
334 		 */
335 		assert(processor->processor_secondary == NULL);
336 		primary->processor_secondary = processor;
337 		/* Mark both processors as SMT siblings */
338 		primary->is_SMT = TRUE;
339 		processor->is_SMT = TRUE;
340 
341 		if (!system_is_SMT) {
342 			system_is_SMT = true;
343 			sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
344 		}
345 
346 		processor_set_t pset = processor->processor_set;
347 		spl_t s = splsched();
348 		pset_lock(pset);
349 		if (!pset->is_SMT) {
350 			pset->is_SMT = true;
351 		}
352 		bit_clear(pset->primary_map, processor->cpu_id);
353 		pset_unlock(pset);
354 		splx(s);
355 	}
356 }
357 
358 processor_set_t
processor_pset(processor_t processor)359 processor_pset(
360 	processor_t     processor)
361 {
362 	return processor->processor_set;
363 }
364 
365 #if CONFIG_SCHED_EDGE
366 
367 cluster_type_t
pset_type_for_id(uint32_t cluster_id)368 pset_type_for_id(uint32_t cluster_id)
369 {
370 	return pset_array[cluster_id]->pset_type;
371 }
372 
373 /*
374  * Processor foreign threads
375  *
376  * With the Edge scheduler, each pset maintains a bitmap of processors running threads
377  * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
378  * if its of a different type than its preferred cluster type (E/P). The bitmap should
379  * be updated every time a new thread is assigned to run on a processor. Cluster shared
380  * resource intensive threads are also not counted as foreign threads since these
381  * threads should not be rebalanced when running on non-preferred clusters.
382  *
383  * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
384  * for rebalancing.
385  */
386 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)387 processor_state_update_running_foreign(processor_t processor, thread_t thread)
388 {
389 	cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
390 	cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
391 
392 	boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
393 	boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
394 	if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
395 		bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
396 	} else {
397 		bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
398 	}
399 }
400 
401 /*
402  * Cluster shared resource intensive threads
403  *
404  * With the Edge scheduler, each pset maintains a bitmap of processors running
405  * threads that are shared resource intensive. This per-thread property is set
406  * by the performance controller or explicitly via dispatch SPIs. The bitmap
407  * allows the Edge scheduler to calculate the cluster shared resource load on
408  * any given cluster and load balance intensive threads accordingly.
409  */
410 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)411 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
412 {
413 	if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
414 		bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
415 	} else {
416 		bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
417 	}
418 	if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
419 		bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
420 	} else {
421 		bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
422 	}
423 }
424 
425 #endif /* CONFIG_SCHED_EDGE */
426 
427 void
processor_state_update_idle(processor_t processor)428 processor_state_update_idle(processor_t processor)
429 {
430 	processor->current_pri = IDLEPRI;
431 	processor->current_sfi_class = SFI_CLASS_KERNEL;
432 	processor->current_recommended_pset_type = PSET_SMP;
433 #if CONFIG_THREAD_GROUPS
434 	processor->current_thread_group = NULL;
435 #endif
436 	processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
437 	processor->current_urgency = THREAD_URGENCY_NONE;
438 	processor->current_is_NO_SMT = false;
439 	processor->current_is_bound = false;
440 	processor->current_is_eagerpreempt = false;
441 #if CONFIG_SCHED_EDGE
442 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
443 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
444 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
445 #endif /* CONFIG_SCHED_EDGE */
446 	sched_update_pset_load_average(processor->processor_set, 0);
447 }
448 
449 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)450 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
451 {
452 	processor->current_pri = thread->sched_pri;
453 	processor->current_sfi_class = thread->sfi_class;
454 	processor->current_recommended_pset_type = recommended_pset_type(thread);
455 #if CONFIG_SCHED_EDGE
456 	processor_state_update_running_foreign(processor, thread);
457 	processor_state_update_running_cluster_shared_rsrc(processor, thread);
458 	/* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
459 	sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
460 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
461 #endif /* CONFIG_SCHED_EDGE */
462 
463 #if CONFIG_THREAD_GROUPS
464 	processor->current_thread_group = thread_group_get(thread);
465 #endif
466 	processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
467 	processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
468 	processor->current_is_NO_SMT = thread_no_smt(thread);
469 	processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
470 	processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
471 	if (pset_lock_held) {
472 		/* Only update the pset load average when the pset lock is held */
473 		sched_update_pset_load_average(processor->processor_set, 0);
474 	}
475 }
476 
477 void
processor_state_update_explicit(processor_t processor,int pri,sfi_class_id_t sfi_class,pset_cluster_type_t pset_type,perfcontrol_class_t perfctl_class,thread_urgency_t urgency,__unused sched_bucket_t bucket)478 processor_state_update_explicit(processor_t processor, int pri, sfi_class_id_t sfi_class,
479     pset_cluster_type_t pset_type, perfcontrol_class_t perfctl_class, thread_urgency_t urgency, __unused sched_bucket_t bucket)
480 {
481 	processor->current_pri = pri;
482 	processor->current_sfi_class = sfi_class;
483 	processor->current_recommended_pset_type = pset_type;
484 	processor->current_perfctl_class = perfctl_class;
485 	processor->current_urgency = urgency;
486 #if CONFIG_SCHED_EDGE
487 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
488 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
489 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
490 #endif /* CONFIG_SCHED_EDGE */
491 }
492 
493 pset_node_t
pset_node_root(void)494 pset_node_root(void)
495 {
496 	return &pset_node0;
497 }
498 
499 LCK_GRP_DECLARE(pset_create_grp, "pset_create");
500 LCK_MTX_DECLARE(pset_create_lock, &pset_create_grp);
501 
502 processor_set_t
pset_create(pset_node_t node,pset_cluster_type_t pset_type,uint32_t pset_cluster_id,int pset_id)503 pset_create(
504 	pset_node_t node,
505 	pset_cluster_type_t pset_type,
506 	uint32_t pset_cluster_id,
507 	int      pset_id)
508 {
509 	/* some schedulers do not support multiple psets */
510 	if (SCHED(multiple_psets_enabled) == FALSE) {
511 		return processor_pset(master_processor);
512 	}
513 
514 	processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
515 
516 	if (pset != PROCESSOR_SET_NULL) {
517 		pset->pset_cluster_type = pset_type;
518 		pset->pset_cluster_id = pset_cluster_id;
519 		pset->pset_id = pset_id;
520 		pset_init(pset, node);
521 
522 		lck_spin_lock(&pset_node_lock);
523 
524 		prev = &node->psets;
525 		while (*prev != PROCESSOR_SET_NULL) {
526 			prev = &(*prev)->pset_list;
527 		}
528 
529 		*prev = pset;
530 
531 		lck_spin_unlock(&pset_node_lock);
532 	}
533 
534 	return pset;
535 }
536 
537 /*
538  *	Find processor set with specified cluster_id.
539  *	Returns default_pset if not found.
540  */
541 processor_set_t
pset_find(uint32_t cluster_id,processor_set_t default_pset)542 pset_find(
543 	uint32_t cluster_id,
544 	processor_set_t default_pset)
545 {
546 	lck_spin_lock(&pset_node_lock);
547 	pset_node_t node = &pset_node0;
548 	processor_set_t pset = NULL;
549 
550 	do {
551 		pset = node->psets;
552 		while (pset != NULL) {
553 			if (pset->pset_cluster_id == cluster_id) {
554 				break;
555 			}
556 			pset = pset->pset_list;
557 		}
558 	} while (pset == NULL && (node = node->node_list) != NULL);
559 	lck_spin_unlock(&pset_node_lock);
560 	if (pset == NULL) {
561 		return default_pset;
562 	}
563 	return pset;
564 }
565 
566 /*
567  *	Initialize the given processor_set structure.
568  */
569 void
pset_init(processor_set_t pset,pset_node_t node)570 pset_init(
571 	processor_set_t         pset,
572 	pset_node_t                     node)
573 {
574 	pset->online_processor_count = 0;
575 	pset->load_average = 0;
576 	bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
577 	pset->cpu_set_low = pset->cpu_set_hi = 0;
578 	pset->cpu_set_count = 0;
579 	pset->last_chosen = -1;
580 	pset->cpu_bitmask = 0;
581 	pset->recommended_bitmask = 0;
582 	pset->primary_map = 0;
583 	pset->realtime_map = 0;
584 	pset->cpu_available_map = 0;
585 
586 	for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
587 		pset->cpu_state_map[i] = 0;
588 	}
589 	pset->pending_AST_URGENT_cpu_mask = 0;
590 	pset->pending_AST_PREEMPT_cpu_mask = 0;
591 #if defined(CONFIG_SCHED_DEFERRED_AST)
592 	pset->pending_deferred_AST_cpu_mask = 0;
593 #endif
594 	pset->pending_spill_cpu_mask = 0;
595 	pset->rt_pending_spill_cpu_mask = 0;
596 	pset_lock_init(pset);
597 	pset->pset_self = IP_NULL;
598 	pset->pset_name_self = IP_NULL;
599 	pset->pset_list = PROCESSOR_SET_NULL;
600 	pset->is_SMT = false;
601 #if CONFIG_SCHED_EDGE
602 	bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
603 	pset->cpu_running_foreign = 0;
604 	for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
605 		pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
606 		pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
607 	}
608 #endif /* CONFIG_SCHED_EDGE */
609 
610 	/*
611 	 * No initial preferences or forced migrations, so use the least numbered
612 	 * available idle core when picking amongst idle cores in a cluster.
613 	 */
614 	pset->perfcontrol_cpu_preferred_bitmask = 0;
615 	pset->perfcontrol_cpu_migration_bitmask = 0;
616 	pset->cpu_preferred_last_chosen = -1;
617 
618 	pset->stealable_rt_threads_earliest_deadline = UINT64_MAX;
619 
620 	if (pset != &pset0) {
621 		/*
622 		 * Scheduler runqueue initialization for non-boot psets.
623 		 * This initialization for pset0 happens in sched_init().
624 		 */
625 		SCHED(pset_init)(pset);
626 		SCHED(rt_init)(pset);
627 	}
628 
629 	/*
630 	 * Because the pset_node_lock is not taken by every client of the pset_map,
631 	 * we need to make sure that the initialized pset contents are visible to any
632 	 * client that loads a non-NULL value from pset_array.
633 	 */
634 	os_atomic_store(&pset_array[pset->pset_id], pset, release);
635 
636 	lck_spin_lock(&pset_node_lock);
637 	bit_set(node->pset_map, pset->pset_id);
638 	pset->node = node;
639 	lck_spin_unlock(&pset_node_lock);
640 }
641 
642 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)643 processor_info_count(
644 	processor_flavor_t              flavor,
645 	mach_msg_type_number_t  *count)
646 {
647 	switch (flavor) {
648 	case PROCESSOR_BASIC_INFO:
649 		*count = PROCESSOR_BASIC_INFO_COUNT;
650 		break;
651 
652 	case PROCESSOR_CPU_LOAD_INFO:
653 		*count = PROCESSOR_CPU_LOAD_INFO_COUNT;
654 		break;
655 
656 	default:
657 		return cpu_info_count(flavor, count);
658 	}
659 
660 	return KERN_SUCCESS;
661 }
662 
663 void
processor_cpu_load_info(processor_t processor,natural_t ticks[static CPU_STATE_MAX])664 processor_cpu_load_info(processor_t processor,
665     natural_t ticks[static CPU_STATE_MAX])
666 {
667 	struct recount_usage usage = { 0 };
668 	uint64_t idle_time = 0;
669 	recount_processor_usage(&processor->pr_recount, &usage, &idle_time);
670 
671 	ticks[CPU_STATE_USER] += (uint32_t)(usage.ru_metrics[RCT_LVL_USER].rm_time_mach /
672 	    hz_tick_interval);
673 	ticks[CPU_STATE_SYSTEM] += (uint32_t)(
674 		recount_usage_system_time_mach(&usage) / hz_tick_interval);
675 	ticks[CPU_STATE_IDLE] += (uint32_t)(idle_time / hz_tick_interval);
676 }
677 
678 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)679 processor_info(
680 	processor_t     processor,
681 	processor_flavor_t              flavor,
682 	host_t                                  *host,
683 	processor_info_t                info,
684 	mach_msg_type_number_t  *count)
685 {
686 	int     cpu_id, state;
687 	kern_return_t   result;
688 
689 	if (processor == PROCESSOR_NULL) {
690 		return KERN_INVALID_ARGUMENT;
691 	}
692 
693 	cpu_id = processor->cpu_id;
694 
695 	switch (flavor) {
696 	case PROCESSOR_BASIC_INFO:
697 	{
698 		processor_basic_info_t          basic_info;
699 
700 		if (*count < PROCESSOR_BASIC_INFO_COUNT) {
701 			return KERN_FAILURE;
702 		}
703 
704 		basic_info = (processor_basic_info_t) info;
705 		basic_info->cpu_type = slot_type(cpu_id);
706 		basic_info->cpu_subtype = slot_subtype(cpu_id);
707 		state = processor->state;
708 		if (((state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) && !processor->shutdown_temporary)
709 #if defined(__x86_64__)
710 		    || !processor->is_recommended
711 #endif
712 		    ) {
713 			basic_info->running = FALSE;
714 		} else {
715 			basic_info->running = TRUE;
716 		}
717 		basic_info->slot_num = cpu_id;
718 		if (processor == master_processor) {
719 			basic_info->is_master = TRUE;
720 		} else {
721 			basic_info->is_master = FALSE;
722 		}
723 
724 		*count = PROCESSOR_BASIC_INFO_COUNT;
725 		*host = &realhost;
726 
727 		return KERN_SUCCESS;
728 	}
729 
730 	case PROCESSOR_CPU_LOAD_INFO:
731 	{
732 		processor_cpu_load_info_t       cpu_load_info;
733 
734 		if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
735 			return KERN_FAILURE;
736 		}
737 
738 		cpu_load_info = (processor_cpu_load_info_t) info;
739 
740 		cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
741 		cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
742 		cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
743 		processor_cpu_load_info(processor, cpu_load_info->cpu_ticks);
744 		cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
745 
746 		*count = PROCESSOR_CPU_LOAD_INFO_COUNT;
747 		*host = &realhost;
748 
749 		return KERN_SUCCESS;
750 	}
751 
752 	default:
753 		result = cpu_info(flavor, cpu_id, info, count);
754 		if (result == KERN_SUCCESS) {
755 			*host = &realhost;
756 		}
757 
758 		return result;
759 	}
760 }
761 
762 void
processor_wait_for_start(processor_t processor)763 processor_wait_for_start(processor_t processor)
764 {
765 	spl_t s = splsched();
766 	simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
767 	while (processor->state == PROCESSOR_START) {
768 		assert_wait_timeout((event_t)&processor->state, THREAD_UNINT, 1000, 1000 * NSEC_PER_USEC); /* 1 second */
769 		simple_unlock(&processor->start_state_lock);
770 		splx(s);
771 
772 		wait_result_t wait_result = thread_block(THREAD_CONTINUE_NULL);
773 		if (wait_result == THREAD_TIMED_OUT) {
774 			panic("%s>cpu %d failed to start\n", __FUNCTION__, processor->cpu_id);
775 		}
776 
777 		s = splsched();
778 		simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
779 	}
780 	simple_unlock(&processor->start_state_lock);
781 	splx(s);
782 }
783 
784 LCK_GRP_DECLARE(processor_updown_grp, "processor_updown");
785 LCK_MTX_DECLARE(processor_updown_lock, &processor_updown_grp);
786 
787 static kern_return_t
processor_startup(processor_t processor,processor_reason_t reason,uint32_t flags)788 processor_startup(
789 	processor_t                     processor,
790 	processor_reason_t              reason,
791 	uint32_t                        flags)
792 {
793 	processor_set_t         pset;
794 	thread_t                        thread;
795 	kern_return_t           result;
796 	spl_t                           s;
797 
798 	if (processor == PROCESSOR_NULL || processor->processor_set == PROCESSOR_SET_NULL) {
799 		return KERN_INVALID_ARGUMENT;
800 	}
801 
802 	if ((flags & (LOCK_STATE | UNLOCK_STATE)) && (reason != REASON_SYSTEM)) {
803 		return KERN_INVALID_ARGUMENT;
804 	}
805 
806 	lck_mtx_lock(&processor_updown_lock);
807 
808 	if (processor == master_processor) {
809 		processor_t             prev;
810 
811 		processor->last_startup_reason = reason;
812 
813 		ml_cpu_power_enable(processor->cpu_id);
814 
815 		prev = thread_bind(processor);
816 		thread_block(THREAD_CONTINUE_NULL);
817 
818 		result = cpu_start(processor->cpu_id);
819 
820 		thread_bind(prev);
821 
822 		lck_mtx_unlock(&processor_updown_lock);
823 		return result;
824 	}
825 
826 	bool scheduler_disable = false;
827 
828 	if ((processor->processor_primary != processor) && (sched_enable_smt == 0)) {
829 		if (cpu_can_exit(processor->cpu_id)) {
830 			lck_mtx_unlock(&processor_updown_lock);
831 			return KERN_SUCCESS;
832 		}
833 		/*
834 		 * This secondary SMT processor must start in order to service interrupts,
835 		 * so instead it will be disabled at the scheduler level.
836 		 */
837 		scheduler_disable = true;
838 	}
839 
840 	s = splsched();
841 	pset = processor->processor_set;
842 	pset_lock(pset);
843 	if (flags & LOCK_STATE) {
844 		processor->shutdown_locked = true;
845 	} else if (flags & UNLOCK_STATE) {
846 		processor->shutdown_locked = false;
847 	}
848 
849 	if (processor->state == PROCESSOR_START) {
850 		pset_unlock(pset);
851 		splx(s);
852 
853 		processor_wait_for_start(processor);
854 
855 		lck_mtx_unlock(&processor_updown_lock);
856 		return KERN_SUCCESS;
857 	}
858 
859 	if ((processor->state != PROCESSOR_OFF_LINE) || ((flags & SHUTDOWN_TEMPORARY) && !processor->shutdown_temporary)) {
860 		pset_unlock(pset);
861 		splx(s);
862 
863 		lck_mtx_unlock(&processor_updown_lock);
864 		return KERN_FAILURE;
865 	}
866 
867 	pset_update_processor_state(pset, processor, PROCESSOR_START);
868 	processor->last_startup_reason = reason;
869 	pset_unlock(pset);
870 	splx(s);
871 
872 	/*
873 	 *	Create the idle processor thread.
874 	 */
875 	if (processor->idle_thread == THREAD_NULL) {
876 		result = idle_thread_create(processor);
877 		if (result != KERN_SUCCESS) {
878 			s = splsched();
879 			pset_lock(pset);
880 			pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
881 			pset_unlock(pset);
882 			splx(s);
883 
884 			lck_mtx_unlock(&processor_updown_lock);
885 			return result;
886 		}
887 	}
888 
889 	/*
890 	 *	If there is no active thread, the processor
891 	 *	has never been started.  Create a dedicated
892 	 *	start up thread.
893 	 */
894 	if (processor->active_thread == THREAD_NULL &&
895 	    processor->startup_thread == THREAD_NULL) {
896 		result = kernel_thread_create(processor_start_thread, NULL, MAXPRI_KERNEL, &thread);
897 		if (result != KERN_SUCCESS) {
898 			s = splsched();
899 			pset_lock(pset);
900 			pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
901 			pset_unlock(pset);
902 			splx(s);
903 
904 			lck_mtx_unlock(&processor_updown_lock);
905 			return result;
906 		}
907 
908 		s = splsched();
909 		thread_lock(thread);
910 		thread->bound_processor = processor;
911 		processor->startup_thread = thread;
912 		thread->state = TH_RUN;
913 		thread->last_made_runnable_time = thread->last_basepri_change_time = mach_absolute_time();
914 		thread_unlock(thread);
915 		splx(s);
916 
917 		thread_deallocate(thread);
918 	}
919 
920 	if (processor->processor_self == IP_NULL) {
921 		ipc_processor_init(processor);
922 	}
923 
924 	ml_cpu_power_enable(processor->cpu_id);
925 	ml_cpu_begin_state_transition(processor->cpu_id);
926 	ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
927 	result = cpu_start(processor->cpu_id);
928 #if defined (__arm__) || defined (__arm64__)
929 	assert(result == KERN_SUCCESS);
930 #else
931 	if (result != KERN_SUCCESS) {
932 		s = splsched();
933 		pset_lock(pset);
934 		pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
935 		pset_unlock(pset);
936 		splx(s);
937 		ml_cpu_end_state_transition(processor->cpu_id);
938 
939 		lck_mtx_unlock(&processor_updown_lock);
940 		return result;
941 	}
942 #endif
943 	if (scheduler_disable) {
944 		assert(processor->processor_primary != processor);
945 		sched_processor_enable(processor, FALSE);
946 	}
947 
948 	if (flags & WAIT_FOR_START) {
949 		processor_wait_for_start(processor);
950 	}
951 
952 	ml_cpu_end_state_transition(processor->cpu_id);
953 	ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
954 
955 #if CONFIG_KCOV
956 	kcov_start_cpu(processor->cpu_id);
957 #endif
958 
959 	lck_mtx_unlock(&processor_updown_lock);
960 	return KERN_SUCCESS;
961 }
962 
963 kern_return_t
processor_exit_reason(processor_t processor,processor_reason_t reason,uint32_t flags)964 processor_exit_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
965 {
966 	if (processor == PROCESSOR_NULL) {
967 		return KERN_INVALID_ARGUMENT;
968 	}
969 
970 	if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
971 #ifdef RHODES_CLUSTER_POWERDOWN_WORKAROUND
972 		/*
973 		 * Must allow CLPC to finish powering down the whole cluster,
974 		 * or IOCPUSleepKernel() will fail to restart the offline cpus.
975 		 */
976 		if (reason != REASON_CLPC_SYSTEM) {
977 			return KERN_FAILURE;
978 		}
979 #else
980 		return KERN_FAILURE;
981 #endif
982 	}
983 
984 	if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
985 		return sched_processor_enable(processor, FALSE);
986 	} else if ((reason == REASON_SYSTEM) || cpu_can_exit(processor->cpu_id)) {
987 		return processor_shutdown(processor, reason, flags);
988 	}
989 
990 	return KERN_INVALID_ARGUMENT;
991 }
992 
993 kern_return_t
processor_exit(processor_t processor)994 processor_exit(
995 	processor_t     processor)
996 {
997 	return processor_exit_reason(processor, REASON_SYSTEM, 0);
998 }
999 
1000 kern_return_t
processor_exit_from_user(processor_t processor)1001 processor_exit_from_user(
1002 	processor_t     processor)
1003 {
1004 	return processor_exit_reason(processor, REASON_USER, 0);
1005 }
1006 
1007 kern_return_t
processor_start_reason(processor_t processor,processor_reason_t reason,uint32_t flags)1008 processor_start_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
1009 {
1010 	if (processor == PROCESSOR_NULL) {
1011 		return KERN_INVALID_ARGUMENT;
1012 	}
1013 
1014 	if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
1015 		return KERN_FAILURE;
1016 	}
1017 
1018 	if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
1019 		return sched_processor_enable(processor, TRUE);
1020 	} else {
1021 		return processor_startup(processor, reason, flags);
1022 	}
1023 }
1024 
1025 kern_return_t
processor_start(processor_t processor)1026 processor_start(
1027 	processor_t                     processor)
1028 {
1029 	return processor_start_reason(processor, REASON_SYSTEM, 0);
1030 }
1031 
1032 kern_return_t
processor_start_from_user(processor_t processor)1033 processor_start_from_user(
1034 	processor_t                     processor)
1035 {
1036 	return processor_start_reason(processor, REASON_USER, 0);
1037 }
1038 
1039 kern_return_t
enable_smt_processors(bool enable)1040 enable_smt_processors(bool enable)
1041 {
1042 	if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
1043 		/* Not an SMT system */
1044 		return KERN_INVALID_ARGUMENT;
1045 	}
1046 
1047 	int ncpus = machine_info.logical_cpu_max;
1048 
1049 	for (int i = 1; i < ncpus; i++) {
1050 		processor_t processor = processor_array[i];
1051 
1052 		if (processor->processor_primary != processor) {
1053 			if (enable) {
1054 				processor_start_from_user(processor);
1055 			} else { /* Disable */
1056 				processor_exit_from_user(processor);
1057 			}
1058 		}
1059 	}
1060 
1061 #define BSD_HOST 1
1062 	host_basic_info_data_t hinfo;
1063 	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1064 	kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
1065 	if (kret != KERN_SUCCESS) {
1066 		return kret;
1067 	}
1068 
1069 	if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
1070 		return KERN_FAILURE;
1071 	}
1072 
1073 	if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
1074 		return KERN_FAILURE;
1075 	}
1076 
1077 	return KERN_SUCCESS;
1078 }
1079 
1080 bool
processor_should_kprintf(processor_t processor,bool starting)1081 processor_should_kprintf(processor_t processor, bool starting)
1082 {
1083 	processor_reason_t reason = starting ? processor->last_startup_reason : processor->last_shutdown_reason;
1084 
1085 	return reason != REASON_CLPC_SYSTEM;
1086 }
1087 
1088 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1089 processor_control(
1090 	processor_t             processor,
1091 	processor_info_t        info,
1092 	mach_msg_type_number_t  count)
1093 {
1094 	if (processor == PROCESSOR_NULL) {
1095 		return KERN_INVALID_ARGUMENT;
1096 	}
1097 
1098 	return cpu_control(processor->cpu_id, info, count);
1099 }
1100 
1101 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1102 processor_get_assignment(
1103 	processor_t     processor,
1104 	processor_set_t *pset)
1105 {
1106 	int state;
1107 
1108 	if (processor == PROCESSOR_NULL) {
1109 		return KERN_INVALID_ARGUMENT;
1110 	}
1111 
1112 	state = processor->state;
1113 	if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) {
1114 		return KERN_FAILURE;
1115 	}
1116 
1117 	*pset = &pset0;
1118 
1119 	return KERN_SUCCESS;
1120 }
1121 
1122 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1123 processor_set_info(
1124 	processor_set_t         pset,
1125 	int                     flavor,
1126 	host_t                  *host,
1127 	processor_set_info_t    info,
1128 	mach_msg_type_number_t  *count)
1129 {
1130 	if (pset == PROCESSOR_SET_NULL) {
1131 		return KERN_INVALID_ARGUMENT;
1132 	}
1133 
1134 	if (flavor == PROCESSOR_SET_BASIC_INFO) {
1135 		processor_set_basic_info_t      basic_info;
1136 
1137 		if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1138 			return KERN_FAILURE;
1139 		}
1140 
1141 		basic_info = (processor_set_basic_info_t) info;
1142 #if defined(__x86_64__)
1143 		basic_info->processor_count = processor_avail_count_user;
1144 #else
1145 		basic_info->processor_count = processor_avail_count;
1146 #endif
1147 		basic_info->default_policy = POLICY_TIMESHARE;
1148 
1149 		*count = PROCESSOR_SET_BASIC_INFO_COUNT;
1150 		*host = &realhost;
1151 		return KERN_SUCCESS;
1152 	} else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1153 		policy_timeshare_base_t ts_base;
1154 
1155 		if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1156 			return KERN_FAILURE;
1157 		}
1158 
1159 		ts_base = (policy_timeshare_base_t) info;
1160 		ts_base->base_priority = BASEPRI_DEFAULT;
1161 
1162 		*count = POLICY_TIMESHARE_BASE_COUNT;
1163 		*host = &realhost;
1164 		return KERN_SUCCESS;
1165 	} else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1166 		policy_fifo_base_t              fifo_base;
1167 
1168 		if (*count < POLICY_FIFO_BASE_COUNT) {
1169 			return KERN_FAILURE;
1170 		}
1171 
1172 		fifo_base = (policy_fifo_base_t) info;
1173 		fifo_base->base_priority = BASEPRI_DEFAULT;
1174 
1175 		*count = POLICY_FIFO_BASE_COUNT;
1176 		*host = &realhost;
1177 		return KERN_SUCCESS;
1178 	} else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1179 		policy_rr_base_t                rr_base;
1180 
1181 		if (*count < POLICY_RR_BASE_COUNT) {
1182 			return KERN_FAILURE;
1183 		}
1184 
1185 		rr_base = (policy_rr_base_t) info;
1186 		rr_base->base_priority = BASEPRI_DEFAULT;
1187 		rr_base->quantum = 1;
1188 
1189 		*count = POLICY_RR_BASE_COUNT;
1190 		*host = &realhost;
1191 		return KERN_SUCCESS;
1192 	} else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1193 		policy_timeshare_limit_t        ts_limit;
1194 
1195 		if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1196 			return KERN_FAILURE;
1197 		}
1198 
1199 		ts_limit = (policy_timeshare_limit_t) info;
1200 		ts_limit->max_priority = MAXPRI_KERNEL;
1201 
1202 		*count = POLICY_TIMESHARE_LIMIT_COUNT;
1203 		*host = &realhost;
1204 		return KERN_SUCCESS;
1205 	} else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1206 		policy_fifo_limit_t             fifo_limit;
1207 
1208 		if (*count < POLICY_FIFO_LIMIT_COUNT) {
1209 			return KERN_FAILURE;
1210 		}
1211 
1212 		fifo_limit = (policy_fifo_limit_t) info;
1213 		fifo_limit->max_priority = MAXPRI_KERNEL;
1214 
1215 		*count = POLICY_FIFO_LIMIT_COUNT;
1216 		*host = &realhost;
1217 		return KERN_SUCCESS;
1218 	} else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1219 		policy_rr_limit_t               rr_limit;
1220 
1221 		if (*count < POLICY_RR_LIMIT_COUNT) {
1222 			return KERN_FAILURE;
1223 		}
1224 
1225 		rr_limit = (policy_rr_limit_t) info;
1226 		rr_limit->max_priority = MAXPRI_KERNEL;
1227 
1228 		*count = POLICY_RR_LIMIT_COUNT;
1229 		*host = &realhost;
1230 		return KERN_SUCCESS;
1231 	} else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1232 		int                             *enabled;
1233 
1234 		if (*count < (sizeof(*enabled) / sizeof(int))) {
1235 			return KERN_FAILURE;
1236 		}
1237 
1238 		enabled = (int *) info;
1239 		*enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1240 
1241 		*count = sizeof(*enabled) / sizeof(int);
1242 		*host = &realhost;
1243 		return KERN_SUCCESS;
1244 	}
1245 
1246 
1247 	*host = HOST_NULL;
1248 	return KERN_INVALID_ARGUMENT;
1249 }
1250 
1251 /*
1252  *	processor_set_statistics
1253  *
1254  *	Returns scheduling statistics for a processor set.
1255  */
1256 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1257 processor_set_statistics(
1258 	processor_set_t         pset,
1259 	int                     flavor,
1260 	processor_set_info_t    info,
1261 	mach_msg_type_number_t  *count)
1262 {
1263 	if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1264 		return KERN_INVALID_PROCESSOR_SET;
1265 	}
1266 
1267 	if (flavor == PROCESSOR_SET_LOAD_INFO) {
1268 		processor_set_load_info_t     load_info;
1269 
1270 		if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1271 			return KERN_FAILURE;
1272 		}
1273 
1274 		load_info = (processor_set_load_info_t) info;
1275 
1276 		load_info->mach_factor = sched_mach_factor;
1277 		load_info->load_average = sched_load_average;
1278 
1279 		load_info->task_count = tasks_count;
1280 		load_info->thread_count = threads_count;
1281 
1282 		*count = PROCESSOR_SET_LOAD_INFO_COUNT;
1283 		return KERN_SUCCESS;
1284 	}
1285 
1286 	return KERN_INVALID_ARGUMENT;
1287 }
1288 
1289 /*
1290  *	processor_set_things:
1291  *
1292  *	Common internals for processor_set_{threads,tasks}
1293  */
1294 static kern_return_t
processor_set_things(processor_set_t pset,void ** thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1295 processor_set_things(
1296 	processor_set_t pset,
1297 	void **thing_list,
1298 	mach_msg_type_number_t *countp,
1299 	int type,
1300 	mach_task_flavor_t flavor)
1301 {
1302 	unsigned int i;
1303 	task_t task;
1304 	thread_t thread;
1305 
1306 	task_t *task_list;
1307 	vm_size_t actual_tasks, task_count_cur, task_count_needed;
1308 
1309 	thread_t *thread_list;
1310 	vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1311 
1312 	void *addr, *newaddr;
1313 	vm_size_t count, count_needed;
1314 
1315 	if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1316 		return KERN_INVALID_ARGUMENT;
1317 	}
1318 
1319 	task_count_cur = 0;
1320 	task_count_needed = 0;
1321 	task_list = NULL;
1322 	actual_tasks = 0;
1323 
1324 	thread_count_cur = 0;
1325 	thread_count_needed = 0;
1326 	thread_list = NULL;
1327 	actual_threads = 0;
1328 
1329 	for (;;) {
1330 		lck_mtx_lock(&tasks_threads_lock);
1331 
1332 		/* do we have the memory we need? */
1333 		if (type == PSET_THING_THREAD) {
1334 			thread_count_needed = threads_count;
1335 		}
1336 #if !CONFIG_MACF
1337 		else
1338 #endif
1339 		task_count_needed = tasks_count;
1340 
1341 		if (task_count_needed <= task_count_cur &&
1342 		    thread_count_needed <= thread_count_cur) {
1343 			break;
1344 		}
1345 
1346 		/* unlock and allocate more memory */
1347 		lck_mtx_unlock(&tasks_threads_lock);
1348 
1349 		/* grow task array */
1350 		if (task_count_needed > task_count_cur) {
1351 			kfree_type(task_t, task_count_cur, task_list);
1352 			assert(task_count_needed > 0);
1353 			task_count_cur = task_count_needed;
1354 
1355 			task_list = kalloc_type(task_t, task_count_cur, Z_WAITOK | Z_ZERO);
1356 			if (task_list == NULL) {
1357 				kfree_type(thread_t, thread_count_cur, thread_list);
1358 				return KERN_RESOURCE_SHORTAGE;
1359 			}
1360 		}
1361 
1362 		/* grow thread array */
1363 		if (thread_count_needed > thread_count_cur) {
1364 			kfree_type(thread_t, thread_count_cur, thread_list);
1365 
1366 			assert(thread_count_needed > 0);
1367 			thread_count_cur = thread_count_needed;
1368 
1369 			thread_list = kalloc_type(thread_t, thread_count_cur, Z_WAITOK | Z_ZERO);
1370 			if (thread_list == NULL) {
1371 				kfree_type(task_t, task_count_cur, task_list);
1372 				return KERN_RESOURCE_SHORTAGE;
1373 			}
1374 		}
1375 	}
1376 
1377 	/* OK, have memory and the list locked */
1378 
1379 	/* If we need it, get the thread list */
1380 	if (type == PSET_THING_THREAD) {
1381 		queue_iterate(&threads, thread, thread_t, threads) {
1382 			task = get_threadtask(thread);
1383 #if defined(SECURE_KERNEL)
1384 			if (task == kernel_task) {
1385 				/* skip threads belonging to kernel_task */
1386 				continue;
1387 			}
1388 #endif
1389 			if (!task->ipc_active || task_is_exec_copy(task)) {
1390 				/* skip threads in inactive tasks (in the middle of exec/fork/spawn) */
1391 				continue;
1392 			}
1393 
1394 			thread_reference(thread);
1395 			thread_list[actual_threads++] = thread;
1396 		}
1397 	}
1398 #if !CONFIG_MACF
1399 	else
1400 #endif
1401 	{
1402 		/* get a list of the tasks */
1403 		queue_iterate(&tasks, task, task_t, tasks) {
1404 #if defined(SECURE_KERNEL)
1405 			if (task == kernel_task) {
1406 				/* skip kernel_task */
1407 				continue;
1408 			}
1409 #endif
1410 			if (!task->ipc_active || task_is_exec_copy(task)) {
1411 				/* skip inactive tasks (in the middle of exec/fork/spawn) */
1412 				continue;
1413 			}
1414 
1415 			task_reference(task);
1416 			task_list[actual_tasks++] = task;
1417 		}
1418 	}
1419 
1420 	lck_mtx_unlock(&tasks_threads_lock);
1421 
1422 #if CONFIG_MACF
1423 	unsigned int j, used;
1424 
1425 	/* for each task, make sure we are allowed to examine it */
1426 	for (i = used = 0; i < actual_tasks; i++) {
1427 		if (mac_task_check_expose_task(task_list[i], flavor)) {
1428 			task_deallocate(task_list[i]);
1429 			continue;
1430 		}
1431 		task_list[used++] = task_list[i];
1432 	}
1433 	actual_tasks = used;
1434 	task_count_needed = actual_tasks;
1435 
1436 	if (type == PSET_THING_THREAD) {
1437 		/* for each thread (if any), make sure it's task is in the allowed list */
1438 		for (i = used = 0; i < actual_threads; i++) {
1439 			boolean_t found_task = FALSE;
1440 
1441 			task = get_threadtask(thread_list[i]);
1442 			for (j = 0; j < actual_tasks; j++) {
1443 				if (task_list[j] == task) {
1444 					found_task = TRUE;
1445 					break;
1446 				}
1447 			}
1448 			if (found_task) {
1449 				thread_list[used++] = thread_list[i];
1450 			} else {
1451 				thread_deallocate(thread_list[i]);
1452 			}
1453 		}
1454 		actual_threads = used;
1455 		thread_count_needed = actual_threads;
1456 
1457 		/* done with the task list */
1458 		for (i = 0; i < actual_tasks; i++) {
1459 			task_deallocate(task_list[i]);
1460 		}
1461 		kfree_type(task_t, task_count_cur, task_list);
1462 		task_count_cur = 0;
1463 		actual_tasks = 0;
1464 		task_list = NULL;
1465 	}
1466 #endif
1467 
1468 	if (type == PSET_THING_THREAD) {
1469 		if (actual_threads == 0) {
1470 			/* no threads available to return */
1471 			assert(task_count_cur == 0);
1472 			kfree_type(thread_t, thread_count_cur, thread_list);
1473 			*thing_list = NULL;
1474 			*countp = 0;
1475 			return KERN_SUCCESS;
1476 		}
1477 		count_needed = actual_threads;
1478 		count = thread_count_cur;
1479 		addr = thread_list;
1480 	} else {
1481 		if (actual_tasks == 0) {
1482 			/* no tasks available to return */
1483 			assert(thread_count_cur == 0);
1484 			kfree_type(task_t, task_count_cur, task_list);
1485 			*thing_list = NULL;
1486 			*countp = 0;
1487 			return KERN_SUCCESS;
1488 		}
1489 		count_needed = actual_tasks;
1490 		count = task_count_cur;
1491 		addr = task_list;
1492 	}
1493 
1494 	/* if we allocated too much, must copy */
1495 	if (count_needed < count) {
1496 		newaddr = kalloc_type(void *, count_needed, Z_WAITOK | Z_ZERO);
1497 		if (newaddr == 0) {
1498 			for (i = 0; i < actual_tasks; i++) {
1499 				if (type == PSET_THING_THREAD) {
1500 					thread_deallocate(thread_list[i]);
1501 				} else {
1502 					task_deallocate(task_list[i]);
1503 				}
1504 			}
1505 			kfree_type(void *, count, addr);
1506 			return KERN_RESOURCE_SHORTAGE;
1507 		}
1508 
1509 		bcopy(addr, newaddr, count_needed * sizeof(void *));
1510 		kfree_type(void *, count, addr);
1511 
1512 		addr = newaddr;
1513 		count = count_needed;
1514 	}
1515 
1516 	*thing_list = (void **)addr;
1517 	*countp = (mach_msg_type_number_t)count;
1518 
1519 	return KERN_SUCCESS;
1520 }
1521 
1522 /*
1523  *	processor_set_tasks:
1524  *
1525  *	List all tasks in the processor set.
1526  */
1527 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1528 processor_set_tasks_internal(
1529 	processor_set_t         pset,
1530 	task_array_t            *task_list,
1531 	mach_msg_type_number_t  *count,
1532 	mach_task_flavor_t      flavor)
1533 {
1534 	kern_return_t ret;
1535 	mach_msg_type_number_t i;
1536 
1537 	ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK, flavor);
1538 	if (ret != KERN_SUCCESS) {
1539 		return ret;
1540 	}
1541 
1542 	/* do the conversion that Mig should handle */
1543 	switch (flavor) {
1544 	case TASK_FLAVOR_CONTROL:
1545 		for (i = 0; i < *count; i++) {
1546 			if ((*task_list)[i] == current_task()) {
1547 				/* if current_task(), return pinned port */
1548 				(*task_list)[i] = (task_t)convert_task_to_port_pinned((*task_list)[i]);
1549 			} else {
1550 				(*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
1551 			}
1552 		}
1553 		break;
1554 	case TASK_FLAVOR_READ:
1555 		for (i = 0; i < *count; i++) {
1556 			(*task_list)[i] = (task_t)convert_task_read_to_port((*task_list)[i]);
1557 		}
1558 		break;
1559 	case TASK_FLAVOR_INSPECT:
1560 		for (i = 0; i < *count; i++) {
1561 			(*task_list)[i] = (task_t)convert_task_inspect_to_port((*task_list)[i]);
1562 		}
1563 		break;
1564 	case TASK_FLAVOR_NAME:
1565 		for (i = 0; i < *count; i++) {
1566 			(*task_list)[i] = (task_t)convert_task_name_to_port((*task_list)[i]);
1567 		}
1568 		break;
1569 	default:
1570 		return KERN_INVALID_ARGUMENT;
1571 	}
1572 
1573 	return KERN_SUCCESS;
1574 }
1575 
1576 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1577 processor_set_tasks(
1578 	processor_set_t         pset,
1579 	task_array_t            *task_list,
1580 	mach_msg_type_number_t  *count)
1581 {
1582 	return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1583 }
1584 
1585 /*
1586  *	processor_set_tasks_with_flavor:
1587  *
1588  *	Based on flavor, return task/inspect/read port to all tasks in the processor set.
1589  */
1590 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1591 processor_set_tasks_with_flavor(
1592 	processor_set_t         pset,
1593 	mach_task_flavor_t      flavor,
1594 	task_array_t            *task_list,
1595 	mach_msg_type_number_t  *count)
1596 {
1597 	switch (flavor) {
1598 	case TASK_FLAVOR_CONTROL:
1599 	case TASK_FLAVOR_READ:
1600 	case TASK_FLAVOR_INSPECT:
1601 	case TASK_FLAVOR_NAME:
1602 		return processor_set_tasks_internal(pset, task_list, count, flavor);
1603 	default:
1604 		return KERN_INVALID_ARGUMENT;
1605 	}
1606 }
1607 
1608 /*
1609  *	processor_set_threads:
1610  *
1611  *	List all threads in the processor set.
1612  */
1613 #if defined(SECURE_KERNEL)
1614 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1615 processor_set_threads(
1616 	__unused processor_set_t                pset,
1617 	__unused thread_array_t         *thread_list,
1618 	__unused mach_msg_type_number_t *count)
1619 {
1620 	return KERN_FAILURE;
1621 }
1622 #elif !defined(XNU_TARGET_OS_OSX)
1623 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1624 processor_set_threads(
1625 	__unused processor_set_t                pset,
1626 	__unused thread_array_t         *thread_list,
1627 	__unused mach_msg_type_number_t *count)
1628 {
1629 	return KERN_NOT_SUPPORTED;
1630 }
1631 #else
1632 kern_return_t
processor_set_threads(processor_set_t pset,thread_array_t * thread_list,mach_msg_type_number_t * count)1633 processor_set_threads(
1634 	processor_set_t         pset,
1635 	thread_array_t          *thread_list,
1636 	mach_msg_type_number_t  *count)
1637 {
1638 	kern_return_t ret;
1639 	mach_msg_type_number_t i;
1640 
1641 	ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1642 	if (ret != KERN_SUCCESS) {
1643 		return ret;
1644 	}
1645 
1646 	/* do the conversion that Mig should handle */
1647 	for (i = 0; i < *count; i++) {
1648 		(*thread_list)[i] = (thread_t)convert_thread_to_port((*thread_list)[i]);
1649 	}
1650 	return KERN_SUCCESS;
1651 }
1652 #endif
1653 
1654 pset_cluster_type_t
recommended_pset_type(thread_t thread)1655 recommended_pset_type(thread_t thread)
1656 {
1657 #if CONFIG_THREAD_GROUPS && __AMP__
1658 	if (thread == THREAD_NULL) {
1659 		return PSET_AMP_E;
1660 	}
1661 
1662 #if DEVELOPMENT || DEBUG
1663 	extern bool system_ecore_only;
1664 	extern int enable_task_set_cluster_type;
1665 	task_t task = get_threadtask(thread);
1666 	if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1667 		processor_set_t pset_hint = task->pset_hint;
1668 		if (pset_hint) {
1669 			return pset_hint->pset_cluster_type;
1670 		}
1671 	}
1672 
1673 	if (system_ecore_only) {
1674 		return PSET_AMP_E;
1675 	}
1676 #endif
1677 
1678 	if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1679 		return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1680 	}
1681 
1682 	if (thread->base_pri <= MAXPRI_THROTTLE) {
1683 		if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1684 			return PSET_AMP_E;
1685 		}
1686 	} else if (thread->base_pri <= BASEPRI_UTILITY) {
1687 		if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1688 			return PSET_AMP_E;
1689 		}
1690 	}
1691 
1692 	struct thread_group *tg = thread_group_get(thread);
1693 	cluster_type_t recommendation = thread_group_recommendation(tg);
1694 	switch (recommendation) {
1695 	case CLUSTER_TYPE_SMP:
1696 	default:
1697 		if (get_threadtask(thread) == kernel_task) {
1698 			return PSET_AMP_E;
1699 		}
1700 		return PSET_AMP_P;
1701 	case CLUSTER_TYPE_E:
1702 		return PSET_AMP_E;
1703 	case CLUSTER_TYPE_P:
1704 		return PSET_AMP_P;
1705 	}
1706 #else
1707 	(void)thread;
1708 	return PSET_SMP;
1709 #endif
1710 }
1711 
1712 #if CONFIG_THREAD_GROUPS && __AMP__
1713 
1714 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)1715 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1716 {
1717 	sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1718 
1719 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1720 
1721 	switch (perfctl_class) {
1722 	case PERFCONTROL_CLASS_UTILITY:
1723 		os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1724 		break;
1725 	case PERFCONTROL_CLASS_BACKGROUND:
1726 		os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1727 		break;
1728 	default:
1729 		panic("perfctl_class invalid");
1730 		break;
1731 	}
1732 }
1733 
1734 #elif defined(__arm64__)
1735 
1736 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1737 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)1738 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1739 {
1740 }
1741 
1742 #endif /* defined(__arm64__) */
1743