xref: /xnu-8796.121.2/osfmk/kern/processor.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a) !
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 /*
60  *	processor.c: processor and processor_set manipulation routines.
61  */
62 
63 #include <mach/boolean.h>
64 #include <mach/policy.h>
65 #include <mach/processor.h>
66 #include <mach/processor_info.h>
67 #include <mach/vm_param.h>
68 #include <kern/cpu_number.h>
69 #include <kern/host.h>
70 #include <kern/ipc_host.h>
71 #include <kern/ipc_tt.h>
72 #include <kern/kalloc.h>
73 #include <kern/machine.h>
74 #include <kern/misc_protos.h>
75 #include <kern/processor.h>
76 #include <kern/sched.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/timer.h>
80 #if KPERF
81 #include <kperf/kperf.h>
82 #endif /* KPERF */
83 #include <ipc/ipc_port.h>
84 
85 #include <security/mac_mach_internal.h>
86 
87 #if defined(CONFIG_XNUPOST)
88 
89 #include <tests/xnupost.h>
90 
91 #endif /* CONFIG_XNUPOST */
92 
93 /*
94  * Exported interface
95  */
96 #include <mach/mach_host_server.h>
97 #include <mach/processor_set_server.h>
98 #include <san/kcov.h>
99 
100 /*
101  * The first pset and the pset_node are created by default for all platforms.
102  * Those typically represent the boot-cluster. For AMP platforms, all clusters
103  * of the same type are part of the same pset_node. This allows for easier
104  * CPU selection logic.
105  */
106 struct processor_set    pset0;
107 struct pset_node        pset_node0;
108 
109 #if __AMP__
110 struct pset_node        pset_node1;
111 pset_node_t             ecore_node;
112 pset_node_t             pcore_node;
113 #endif
114 
115 LCK_SPIN_DECLARE(pset_node_lock, LCK_GRP_NULL);
116 
117 LCK_GRP_DECLARE(pset_lck_grp, "pset");
118 
119 queue_head_t            tasks;
120 queue_head_t            terminated_tasks;       /* To be used ONLY for stackshot. */
121 queue_head_t            corpse_tasks;
122 int                     tasks_count;
123 int                     terminated_tasks_count;
124 queue_head_t            threads;
125 queue_head_t            terminated_threads;
126 int                     threads_count;
127 int                     terminated_threads_count;
128 LCK_GRP_DECLARE(task_lck_grp, "task");
129 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
130 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
131 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
132 
133 processor_t             processor_list;
134 unsigned int            processor_count;
135 static processor_t      processor_list_tail;
136 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
137 
138 uint32_t                processor_avail_count;
139 uint32_t                processor_avail_count_user;
140 uint32_t                primary_processor_avail_count;
141 uint32_t                primary_processor_avail_count_user;
142 
143 SECURITY_READ_ONLY_LATE(int)    master_cpu = 0;
144 
145 struct processor        PERCPU_DATA(processor);
146 processor_t             processor_array[MAX_SCHED_CPUS] = { 0 };
147 processor_set_t         pset_array[MAX_PSETS] = { 0 };
148 
149 static timer_call_func_t running_timer_funcs[] = {
150 	[RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
151 	[RUNNING_TIMER_KPERF] = kperf_timer_expire,
152 };
153 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
154     == RUNNING_TIMER_MAX, "missing running timer function");
155 
156 #if defined(CONFIG_XNUPOST)
157 kern_return_t ipi_test(void);
158 extern void arm64_ipi_test(void);
159 
160 kern_return_t
ipi_test()161 ipi_test()
162 {
163 #if __arm64__
164 	processor_t p;
165 
166 	for (p = processor_list; p != NULL; p = p->processor_list) {
167 		thread_bind(p);
168 		thread_block(THREAD_CONTINUE_NULL);
169 		kprintf("Running IPI test on cpu %d\n", p->cpu_id);
170 		arm64_ipi_test();
171 	}
172 
173 	/* unbind thread from specific cpu */
174 	thread_bind(PROCESSOR_NULL);
175 	thread_block(THREAD_CONTINUE_NULL);
176 
177 	T_PASS("Done running IPI tests");
178 #else
179 	T_PASS("Unsupported platform. Not running IPI tests");
180 
181 #endif /* __arm64__ */
182 
183 	return KERN_SUCCESS;
184 }
185 #endif /* defined(CONFIG_XNUPOST) */
186 
187 int sched_enable_smt = 1;
188 
189 void
processor_bootstrap(void)190 processor_bootstrap(void)
191 {
192 	/* Initialize PSET node and PSET associated with boot cluster */
193 	pset_node0.psets = &pset0;
194 	pset_node0.pset_cluster_type = PSET_SMP;
195 
196 #if __AMP__
197 	const ml_topology_info_t *topology_info = ml_get_topology_info();
198 
199 	/*
200 	 * Since this is an AMP system, fill up cluster type and ID information; this should do the
201 	 * same kind of initialization done via ml_processor_register()
202 	 */
203 	ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
204 	pset0.pset_id = boot_cluster->cluster_id;
205 	pset0.pset_cluster_id = boot_cluster->cluster_id;
206 	if (boot_cluster->cluster_type == CLUSTER_TYPE_E) {
207 		pset0.pset_cluster_type      = PSET_AMP_E;
208 		pset_node0.pset_cluster_type = PSET_AMP_E;
209 		ecore_node = &pset_node0;
210 
211 		pset_node1.pset_cluster_type = PSET_AMP_P;
212 		pcore_node = &pset_node1;
213 	} else {
214 		pset0.pset_cluster_type      = PSET_AMP_P;
215 		pset_node0.pset_cluster_type = PSET_AMP_P;
216 		pcore_node = &pset_node0;
217 
218 		pset_node1.pset_cluster_type = PSET_AMP_E;
219 		ecore_node = &pset_node1;
220 	}
221 
222 	/* Link pset_node1 to pset_node0 */
223 	pset_node0.node_list = &pset_node1;
224 #endif
225 
226 	pset_init(&pset0, &pset_node0);
227 	queue_init(&tasks);
228 	queue_init(&terminated_tasks);
229 	queue_init(&threads);
230 	queue_init(&terminated_threads);
231 	queue_init(&corpse_tasks);
232 
233 	processor_init(master_processor, master_cpu, &pset0);
234 }
235 
236 /*
237  *	Initialize the given processor for the cpu
238  *	indicated by cpu_id, and assign to the
239  *	specified processor set.
240  */
241 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)242 processor_init(
243 	processor_t            processor,
244 	int                    cpu_id,
245 	processor_set_t        pset)
246 {
247 	spl_t           s;
248 
249 	assert(cpu_id < MAX_SCHED_CPUS);
250 	processor->cpu_id = cpu_id;
251 
252 	if (processor != master_processor) {
253 		/* Scheduler state for master_processor initialized in sched_init() */
254 		SCHED(processor_init)(processor);
255 	}
256 
257 	processor->state = PROCESSOR_OFF_LINE;
258 	processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
259 	processor->processor_set = pset;
260 	processor_state_update_idle(processor);
261 	processor->starting_pri = MINPRI;
262 	processor->quantum_end = UINT64_MAX;
263 	processor->deadline = UINT64_MAX;
264 	processor->first_timeslice = FALSE;
265 	processor->processor_offlined = false;
266 	processor->processor_primary = processor; /* no SMT relationship known at this point */
267 	processor->processor_secondary = NULL;
268 	processor->is_SMT = false;
269 	processor->is_recommended = true;
270 	processor->processor_self = IP_NULL;
271 	processor->processor_list = NULL;
272 	processor->must_idle = false;
273 	processor->last_startup_reason = REASON_SYSTEM;
274 	processor->last_shutdown_reason = REASON_NONE;
275 	processor->shutdown_temporary = false;
276 	processor->shutdown_locked = false;
277 	processor->last_recommend_reason = REASON_SYSTEM;
278 	processor->last_derecommend_reason = REASON_NONE;
279 	processor->running_timers_active = false;
280 	for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
281 		timer_call_setup(&processor->running_timers[i],
282 		    running_timer_funcs[i], processor);
283 		running_timer_clear(processor, i);
284 	}
285 	recount_processor_init(processor);
286 	simple_lock_init(&processor->start_state_lock, 0);
287 
288 	s = splsched();
289 	pset_lock(pset);
290 	bit_set(pset->cpu_bitmask, cpu_id);
291 	bit_set(pset->recommended_bitmask, cpu_id);
292 	bit_set(pset->primary_map, cpu_id);
293 	bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
294 	if (pset->cpu_set_count++ == 0) {
295 		pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
296 	} else {
297 		pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
298 		pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
299 	}
300 	pset_unlock(pset);
301 	splx(s);
302 
303 	simple_lock(&processor_list_lock, LCK_GRP_NULL);
304 	if (processor_list == NULL) {
305 		processor_list = processor;
306 	} else {
307 		processor_list_tail->processor_list = processor;
308 	}
309 	processor_list_tail = processor;
310 	processor_count++;
311 	simple_unlock(&processor_list_lock);
312 	processor_array[cpu_id] = processor;
313 }
314 
315 bool system_is_SMT = false;
316 
317 void
processor_set_primary(processor_t processor,processor_t primary)318 processor_set_primary(
319 	processor_t             processor,
320 	processor_t             primary)
321 {
322 	assert(processor->processor_primary == primary || processor->processor_primary == processor);
323 	/* Re-adjust primary point for this (possibly) secondary processor */
324 	processor->processor_primary = primary;
325 
326 	assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
327 	if (primary != processor) {
328 		/* Link primary to secondary, assumes a 2-way SMT model
329 		 * We'll need to move to a queue if any future architecture
330 		 * requires otherwise.
331 		 */
332 		assert(processor->processor_secondary == NULL);
333 		primary->processor_secondary = processor;
334 		/* Mark both processors as SMT siblings */
335 		primary->is_SMT = TRUE;
336 		processor->is_SMT = TRUE;
337 
338 		if (!system_is_SMT) {
339 			system_is_SMT = true;
340 			sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
341 		}
342 
343 		processor_set_t pset = processor->processor_set;
344 		spl_t s = splsched();
345 		pset_lock(pset);
346 		if (!pset->is_SMT) {
347 			pset->is_SMT = true;
348 		}
349 		bit_clear(pset->primary_map, processor->cpu_id);
350 		pset_unlock(pset);
351 		splx(s);
352 	}
353 }
354 
355 processor_set_t
processor_pset(processor_t processor)356 processor_pset(
357 	processor_t     processor)
358 {
359 	return processor->processor_set;
360 }
361 
362 #if CONFIG_SCHED_EDGE
363 
364 cluster_type_t
pset_type_for_id(uint32_t cluster_id)365 pset_type_for_id(uint32_t cluster_id)
366 {
367 	return pset_array[cluster_id]->pset_type;
368 }
369 
370 /*
371  * Processor foreign threads
372  *
373  * With the Edge scheduler, each pset maintains a bitmap of processors running threads
374  * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
375  * if its of a different type than its preferred cluster type (E/P). The bitmap should
376  * be updated every time a new thread is assigned to run on a processor. Cluster shared
377  * resource intensive threads are also not counted as foreign threads since these
378  * threads should not be rebalanced when running on non-preferred clusters.
379  *
380  * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
381  * for rebalancing.
382  */
383 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)384 processor_state_update_running_foreign(processor_t processor, thread_t thread)
385 {
386 	cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
387 	cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
388 
389 	boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
390 	boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
391 	if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
392 		bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
393 	} else {
394 		bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
395 	}
396 }
397 
398 /*
399  * Cluster shared resource intensive threads
400  *
401  * With the Edge scheduler, each pset maintains a bitmap of processors running
402  * threads that are shared resource intensive. This per-thread property is set
403  * by the performance controller or explicitly via dispatch SPIs. The bitmap
404  * allows the Edge scheduler to calculate the cluster shared resource load on
405  * any given cluster and load balance intensive threads accordingly.
406  */
407 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)408 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
409 {
410 	if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
411 		bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
412 	} else {
413 		bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
414 	}
415 	if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
416 		bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
417 	} else {
418 		bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
419 	}
420 }
421 
422 #endif /* CONFIG_SCHED_EDGE */
423 
424 void
processor_state_update_idle(processor_t processor)425 processor_state_update_idle(processor_t processor)
426 {
427 	processor->current_pri = IDLEPRI;
428 	processor->current_sfi_class = SFI_CLASS_KERNEL;
429 	processor->current_recommended_pset_type = PSET_SMP;
430 #if CONFIG_THREAD_GROUPS
431 	processor->current_thread_group = NULL;
432 #endif
433 	processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
434 	processor->current_urgency = THREAD_URGENCY_NONE;
435 	processor->current_is_NO_SMT = false;
436 	processor->current_is_bound = false;
437 	processor->current_is_eagerpreempt = false;
438 #if CONFIG_SCHED_EDGE
439 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
440 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
441 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
442 #endif /* CONFIG_SCHED_EDGE */
443 	sched_update_pset_load_average(processor->processor_set, 0);
444 }
445 
446 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)447 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
448 {
449 	processor->current_pri = thread->sched_pri;
450 	processor->current_sfi_class = thread->sfi_class;
451 	processor->current_recommended_pset_type = recommended_pset_type(thread);
452 #if CONFIG_SCHED_EDGE
453 	processor_state_update_running_foreign(processor, thread);
454 	processor_state_update_running_cluster_shared_rsrc(processor, thread);
455 	/* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
456 	sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
457 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
458 #endif /* CONFIG_SCHED_EDGE */
459 
460 #if CONFIG_THREAD_GROUPS
461 	processor->current_thread_group = thread_group_get(thread);
462 #endif
463 	processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
464 	processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
465 	processor->current_is_NO_SMT = thread_no_smt(thread);
466 	processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
467 	processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
468 	if (pset_lock_held) {
469 		/* Only update the pset load average when the pset lock is held */
470 		sched_update_pset_load_average(processor->processor_set, 0);
471 	}
472 }
473 
474 void
processor_state_update_explicit(processor_t processor,int pri,sfi_class_id_t sfi_class,pset_cluster_type_t pset_type,perfcontrol_class_t perfctl_class,thread_urgency_t urgency,__unused sched_bucket_t bucket)475 processor_state_update_explicit(processor_t processor, int pri, sfi_class_id_t sfi_class,
476     pset_cluster_type_t pset_type, perfcontrol_class_t perfctl_class, thread_urgency_t urgency, __unused sched_bucket_t bucket)
477 {
478 	processor->current_pri = pri;
479 	processor->current_sfi_class = sfi_class;
480 	processor->current_recommended_pset_type = pset_type;
481 	processor->current_perfctl_class = perfctl_class;
482 	processor->current_urgency = urgency;
483 #if CONFIG_SCHED_EDGE
484 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
485 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
486 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
487 #endif /* CONFIG_SCHED_EDGE */
488 }
489 
490 pset_node_t
pset_node_root(void)491 pset_node_root(void)
492 {
493 	return &pset_node0;
494 }
495 
496 LCK_GRP_DECLARE(pset_create_grp, "pset_create");
497 LCK_MTX_DECLARE(pset_create_lock, &pset_create_grp);
498 
499 processor_set_t
pset_create(pset_node_t node,pset_cluster_type_t pset_type,uint32_t pset_cluster_id,int pset_id)500 pset_create(
501 	pset_node_t node,
502 	pset_cluster_type_t pset_type,
503 	uint32_t pset_cluster_id,
504 	int      pset_id)
505 {
506 	/* some schedulers do not support multiple psets */
507 	if (SCHED(multiple_psets_enabled) == FALSE) {
508 		return processor_pset(master_processor);
509 	}
510 
511 	processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
512 
513 	if (pset != PROCESSOR_SET_NULL) {
514 		pset->pset_cluster_type = pset_type;
515 		pset->pset_cluster_id = pset_cluster_id;
516 		pset->pset_id = pset_id;
517 		pset_init(pset, node);
518 
519 		lck_spin_lock(&pset_node_lock);
520 
521 		prev = &node->psets;
522 		while (*prev != PROCESSOR_SET_NULL) {
523 			prev = &(*prev)->pset_list;
524 		}
525 
526 		*prev = pset;
527 
528 		lck_spin_unlock(&pset_node_lock);
529 	}
530 
531 	return pset;
532 }
533 
534 /*
535  *	Find processor set with specified cluster_id.
536  *	Returns default_pset if not found.
537  */
538 processor_set_t
pset_find(uint32_t cluster_id,processor_set_t default_pset)539 pset_find(
540 	uint32_t cluster_id,
541 	processor_set_t default_pset)
542 {
543 	lck_spin_lock(&pset_node_lock);
544 	pset_node_t node = &pset_node0;
545 	processor_set_t pset = NULL;
546 
547 	do {
548 		pset = node->psets;
549 		while (pset != NULL) {
550 			if (pset->pset_cluster_id == cluster_id) {
551 				break;
552 			}
553 			pset = pset->pset_list;
554 		}
555 	} while (pset == NULL && (node = node->node_list) != NULL);
556 	lck_spin_unlock(&pset_node_lock);
557 	if (pset == NULL) {
558 		return default_pset;
559 	}
560 	return pset;
561 }
562 
563 /*
564  *	Initialize the given processor_set structure.
565  */
566 void
pset_init(processor_set_t pset,pset_node_t node)567 pset_init(
568 	processor_set_t         pset,
569 	pset_node_t                     node)
570 {
571 	pset->online_processor_count = 0;
572 	pset->load_average = 0;
573 	bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
574 	pset->cpu_set_low = pset->cpu_set_hi = 0;
575 	pset->cpu_set_count = 0;
576 	pset->last_chosen = -1;
577 	pset->cpu_bitmask = 0;
578 	pset->recommended_bitmask = 0;
579 	pset->primary_map = 0;
580 	pset->realtime_map = 0;
581 	pset->cpu_available_map = 0;
582 
583 	for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
584 		pset->cpu_state_map[i] = 0;
585 	}
586 	pset->pending_AST_URGENT_cpu_mask = 0;
587 	pset->pending_AST_PREEMPT_cpu_mask = 0;
588 #if defined(CONFIG_SCHED_DEFERRED_AST)
589 	pset->pending_deferred_AST_cpu_mask = 0;
590 #endif
591 	pset->pending_spill_cpu_mask = 0;
592 	pset->rt_pending_spill_cpu_mask = 0;
593 	pset_lock_init(pset);
594 	pset->pset_self = IP_NULL;
595 	pset->pset_name_self = IP_NULL;
596 	pset->pset_list = PROCESSOR_SET_NULL;
597 	pset->is_SMT = false;
598 #if CONFIG_SCHED_EDGE
599 	bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
600 	pset->cpu_running_foreign = 0;
601 	for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
602 		pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
603 		pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
604 	}
605 #endif /* CONFIG_SCHED_EDGE */
606 	pset->stealable_rt_threads_earliest_deadline = UINT64_MAX;
607 
608 	if (pset != &pset0) {
609 		/*
610 		 * Scheduler runqueue initialization for non-boot psets.
611 		 * This initialization for pset0 happens in sched_init().
612 		 */
613 		SCHED(pset_init)(pset);
614 		SCHED(rt_init)(pset);
615 	}
616 
617 	/*
618 	 * Because the pset_node_lock is not taken by every client of the pset_map,
619 	 * we need to make sure that the initialized pset contents are visible to any
620 	 * client that loads a non-NULL value from pset_array.
621 	 */
622 	os_atomic_store(&pset_array[pset->pset_id], pset, release);
623 
624 	lck_spin_lock(&pset_node_lock);
625 	bit_set(node->pset_map, pset->pset_id);
626 	pset->node = node;
627 	lck_spin_unlock(&pset_node_lock);
628 }
629 
630 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)631 processor_info_count(
632 	processor_flavor_t              flavor,
633 	mach_msg_type_number_t  *count)
634 {
635 	switch (flavor) {
636 	case PROCESSOR_BASIC_INFO:
637 		*count = PROCESSOR_BASIC_INFO_COUNT;
638 		break;
639 
640 	case PROCESSOR_CPU_LOAD_INFO:
641 		*count = PROCESSOR_CPU_LOAD_INFO_COUNT;
642 		break;
643 
644 	default:
645 		return cpu_info_count(flavor, count);
646 	}
647 
648 	return KERN_SUCCESS;
649 }
650 
651 void
processor_cpu_load_info(processor_t processor,natural_t ticks[static CPU_STATE_MAX])652 processor_cpu_load_info(processor_t processor,
653     natural_t ticks[static CPU_STATE_MAX])
654 {
655 	struct recount_usage usage = { 0 };
656 	uint64_t idle_time = 0;
657 	recount_processor_usage(&processor->pr_recount, &usage, &idle_time);
658 
659 	ticks[CPU_STATE_USER] += (uint32_t)(usage.ru_user_time_mach /
660 	    hz_tick_interval);
661 	ticks[CPU_STATE_SYSTEM] += (uint32_t)(usage.ru_system_time_mach /
662 	    hz_tick_interval);
663 	ticks[CPU_STATE_IDLE] += (uint32_t)(idle_time / hz_tick_interval);
664 }
665 
666 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)667 processor_info(
668 	processor_t     processor,
669 	processor_flavor_t              flavor,
670 	host_t                                  *host,
671 	processor_info_t                info,
672 	mach_msg_type_number_t  *count)
673 {
674 	int     cpu_id, state;
675 	kern_return_t   result;
676 
677 	if (processor == PROCESSOR_NULL) {
678 		return KERN_INVALID_ARGUMENT;
679 	}
680 
681 	cpu_id = processor->cpu_id;
682 
683 	switch (flavor) {
684 	case PROCESSOR_BASIC_INFO:
685 	{
686 		processor_basic_info_t          basic_info;
687 
688 		if (*count < PROCESSOR_BASIC_INFO_COUNT) {
689 			return KERN_FAILURE;
690 		}
691 
692 		basic_info = (processor_basic_info_t) info;
693 		basic_info->cpu_type = slot_type(cpu_id);
694 		basic_info->cpu_subtype = slot_subtype(cpu_id);
695 		state = processor->state;
696 		if (((state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) && !processor->shutdown_temporary)
697 #if defined(__x86_64__)
698 		    || !processor->is_recommended
699 #endif
700 		    ) {
701 			basic_info->running = FALSE;
702 		} else {
703 			basic_info->running = TRUE;
704 		}
705 		basic_info->slot_num = cpu_id;
706 		if (processor == master_processor) {
707 			basic_info->is_master = TRUE;
708 		} else {
709 			basic_info->is_master = FALSE;
710 		}
711 
712 		*count = PROCESSOR_BASIC_INFO_COUNT;
713 		*host = &realhost;
714 
715 		return KERN_SUCCESS;
716 	}
717 
718 	case PROCESSOR_CPU_LOAD_INFO:
719 	{
720 		processor_cpu_load_info_t       cpu_load_info;
721 
722 		if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
723 			return KERN_FAILURE;
724 		}
725 
726 		cpu_load_info = (processor_cpu_load_info_t) info;
727 
728 		cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
729 		cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
730 		cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
731 		processor_cpu_load_info(processor, cpu_load_info->cpu_ticks);
732 		cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
733 
734 		*count = PROCESSOR_CPU_LOAD_INFO_COUNT;
735 		*host = &realhost;
736 
737 		return KERN_SUCCESS;
738 	}
739 
740 	default:
741 		result = cpu_info(flavor, cpu_id, info, count);
742 		if (result == KERN_SUCCESS) {
743 			*host = &realhost;
744 		}
745 
746 		return result;
747 	}
748 }
749 
750 void
processor_wait_for_start(processor_t processor)751 processor_wait_for_start(processor_t processor)
752 {
753 	spl_t s = splsched();
754 	simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
755 	while (processor->state == PROCESSOR_START) {
756 		assert_wait_timeout((event_t)&processor->state, THREAD_UNINT, 1000, 1000 * NSEC_PER_USEC); /* 1 second */
757 		simple_unlock(&processor->start_state_lock);
758 		splx(s);
759 
760 		wait_result_t wait_result = thread_block(THREAD_CONTINUE_NULL);
761 		if (wait_result == THREAD_TIMED_OUT) {
762 			panic("%s>cpu %d failed to start\n", __FUNCTION__, processor->cpu_id);
763 		}
764 
765 		s = splsched();
766 		simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
767 	}
768 	simple_unlock(&processor->start_state_lock);
769 	splx(s);
770 }
771 
772 LCK_GRP_DECLARE(processor_updown_grp, "processor_updown");
773 LCK_MTX_DECLARE(processor_updown_lock, &processor_updown_grp);
774 
775 static kern_return_t
processor_startup(processor_t processor,processor_reason_t reason,uint32_t flags)776 processor_startup(
777 	processor_t                     processor,
778 	processor_reason_t              reason,
779 	uint32_t                        flags)
780 {
781 	processor_set_t         pset;
782 	thread_t                        thread;
783 	kern_return_t           result;
784 	spl_t                           s;
785 
786 	if (processor == PROCESSOR_NULL || processor->processor_set == PROCESSOR_SET_NULL) {
787 		return KERN_INVALID_ARGUMENT;
788 	}
789 
790 	if ((flags & (LOCK_STATE | UNLOCK_STATE)) && (reason != REASON_SYSTEM)) {
791 		return KERN_INVALID_ARGUMENT;
792 	}
793 
794 	lck_mtx_lock(&processor_updown_lock);
795 
796 	if (processor == master_processor) {
797 		processor_t             prev;
798 
799 		processor->last_startup_reason = reason;
800 
801 		ml_cpu_power_enable(processor->cpu_id);
802 
803 		prev = thread_bind(processor);
804 		thread_block(THREAD_CONTINUE_NULL);
805 
806 		result = cpu_start(processor->cpu_id);
807 
808 		thread_bind(prev);
809 
810 		lck_mtx_unlock(&processor_updown_lock);
811 		return result;
812 	}
813 
814 	bool scheduler_disable = false;
815 
816 	if ((processor->processor_primary != processor) && (sched_enable_smt == 0)) {
817 		if (cpu_can_exit(processor->cpu_id)) {
818 			lck_mtx_unlock(&processor_updown_lock);
819 			return KERN_SUCCESS;
820 		}
821 		/*
822 		 * This secondary SMT processor must start in order to service interrupts,
823 		 * so instead it will be disabled at the scheduler level.
824 		 */
825 		scheduler_disable = true;
826 	}
827 
828 	s = splsched();
829 	pset = processor->processor_set;
830 	pset_lock(pset);
831 	if (flags & LOCK_STATE) {
832 		processor->shutdown_locked = true;
833 	} else if (flags & UNLOCK_STATE) {
834 		processor->shutdown_locked = false;
835 	}
836 
837 	if (processor->state == PROCESSOR_START) {
838 		pset_unlock(pset);
839 		splx(s);
840 
841 		processor_wait_for_start(processor);
842 
843 		lck_mtx_unlock(&processor_updown_lock);
844 		return KERN_SUCCESS;
845 	}
846 
847 	if ((processor->state != PROCESSOR_OFF_LINE) || ((flags & SHUTDOWN_TEMPORARY) && !processor->shutdown_temporary)) {
848 		pset_unlock(pset);
849 		splx(s);
850 
851 		lck_mtx_unlock(&processor_updown_lock);
852 		return KERN_FAILURE;
853 	}
854 
855 	pset_update_processor_state(pset, processor, PROCESSOR_START);
856 	processor->last_startup_reason = reason;
857 	pset_unlock(pset);
858 	splx(s);
859 
860 	/*
861 	 *	Create the idle processor thread.
862 	 */
863 	if (processor->idle_thread == THREAD_NULL) {
864 		result = idle_thread_create(processor);
865 		if (result != KERN_SUCCESS) {
866 			s = splsched();
867 			pset_lock(pset);
868 			pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
869 			pset_unlock(pset);
870 			splx(s);
871 
872 			lck_mtx_unlock(&processor_updown_lock);
873 			return result;
874 		}
875 	}
876 
877 	/*
878 	 *	If there is no active thread, the processor
879 	 *	has never been started.  Create a dedicated
880 	 *	start up thread.
881 	 */
882 	if (processor->active_thread == THREAD_NULL &&
883 	    processor->startup_thread == THREAD_NULL) {
884 		result = kernel_thread_create(processor_start_thread, NULL, MAXPRI_KERNEL, &thread);
885 		if (result != KERN_SUCCESS) {
886 			s = splsched();
887 			pset_lock(pset);
888 			pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
889 			pset_unlock(pset);
890 			splx(s);
891 
892 			lck_mtx_unlock(&processor_updown_lock);
893 			return result;
894 		}
895 
896 		s = splsched();
897 		thread_lock(thread);
898 		thread->bound_processor = processor;
899 		processor->startup_thread = thread;
900 		thread->state = TH_RUN;
901 		thread->last_made_runnable_time = thread->last_basepri_change_time = mach_absolute_time();
902 		thread_unlock(thread);
903 		splx(s);
904 
905 		thread_deallocate(thread);
906 	}
907 
908 	if (processor->processor_self == IP_NULL) {
909 		ipc_processor_init(processor);
910 	}
911 
912 	ml_cpu_power_enable(processor->cpu_id);
913 	ml_cpu_begin_state_transition(processor->cpu_id);
914 	ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
915 	result = cpu_start(processor->cpu_id);
916 #if defined (__arm__) || defined (__arm64__)
917 	assert(result == KERN_SUCCESS);
918 #else
919 	if (result != KERN_SUCCESS) {
920 		s = splsched();
921 		pset_lock(pset);
922 		pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
923 		pset_unlock(pset);
924 		splx(s);
925 		ml_cpu_end_state_transition(processor->cpu_id);
926 
927 		lck_mtx_unlock(&processor_updown_lock);
928 		return result;
929 	}
930 #endif
931 	if (scheduler_disable) {
932 		assert(processor->processor_primary != processor);
933 		sched_processor_enable(processor, FALSE);
934 	}
935 
936 	if (flags & WAIT_FOR_START) {
937 		processor_wait_for_start(processor);
938 	}
939 
940 	ml_cpu_end_state_transition(processor->cpu_id);
941 	ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
942 
943 #if CONFIG_KCOV
944 	kcov_start_cpu(processor->cpu_id);
945 #endif
946 
947 	lck_mtx_unlock(&processor_updown_lock);
948 	return KERN_SUCCESS;
949 }
950 
951 kern_return_t
processor_exit_reason(processor_t processor,processor_reason_t reason,uint32_t flags)952 processor_exit_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
953 {
954 	if (processor == PROCESSOR_NULL) {
955 		return KERN_INVALID_ARGUMENT;
956 	}
957 
958 	if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
959 #ifdef RHODES_CLUSTER_POWERDOWN_WORKAROUND
960 		/*
961 		 * Must allow CLPC to finish powering down the whole cluster,
962 		 * or IOCPUSleepKernel() will fail to restart the offline cpus.
963 		 */
964 		if (reason != REASON_CLPC_SYSTEM) {
965 			return KERN_FAILURE;
966 		}
967 #else
968 		return KERN_FAILURE;
969 #endif
970 	}
971 
972 	if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
973 		return sched_processor_enable(processor, FALSE);
974 	} else if ((reason == REASON_SYSTEM) || cpu_can_exit(processor->cpu_id)) {
975 		return processor_shutdown(processor, reason, flags);
976 	}
977 
978 	return KERN_INVALID_ARGUMENT;
979 }
980 
981 kern_return_t
processor_exit(processor_t processor)982 processor_exit(
983 	processor_t     processor)
984 {
985 	return processor_exit_reason(processor, REASON_SYSTEM, 0);
986 }
987 
988 kern_return_t
processor_exit_from_user(processor_t processor)989 processor_exit_from_user(
990 	processor_t     processor)
991 {
992 	return processor_exit_reason(processor, REASON_USER, 0);
993 }
994 
995 kern_return_t
processor_start_reason(processor_t processor,processor_reason_t reason,uint32_t flags)996 processor_start_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
997 {
998 	if (processor == PROCESSOR_NULL) {
999 		return KERN_INVALID_ARGUMENT;
1000 	}
1001 
1002 	if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
1003 		return KERN_FAILURE;
1004 	}
1005 
1006 	if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
1007 		return sched_processor_enable(processor, TRUE);
1008 	} else {
1009 		return processor_startup(processor, reason, flags);
1010 	}
1011 }
1012 
1013 kern_return_t
processor_start(processor_t processor)1014 processor_start(
1015 	processor_t                     processor)
1016 {
1017 	return processor_start_reason(processor, REASON_SYSTEM, 0);
1018 }
1019 
1020 kern_return_t
processor_start_from_user(processor_t processor)1021 processor_start_from_user(
1022 	processor_t                     processor)
1023 {
1024 	return processor_start_reason(processor, REASON_USER, 0);
1025 }
1026 
1027 kern_return_t
enable_smt_processors(bool enable)1028 enable_smt_processors(bool enable)
1029 {
1030 	if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
1031 		/* Not an SMT system */
1032 		return KERN_INVALID_ARGUMENT;
1033 	}
1034 
1035 	int ncpus = machine_info.logical_cpu_max;
1036 
1037 	for (int i = 1; i < ncpus; i++) {
1038 		processor_t processor = processor_array[i];
1039 
1040 		if (processor->processor_primary != processor) {
1041 			if (enable) {
1042 				processor_start_from_user(processor);
1043 			} else { /* Disable */
1044 				processor_exit_from_user(processor);
1045 			}
1046 		}
1047 	}
1048 
1049 #define BSD_HOST 1
1050 	host_basic_info_data_t hinfo;
1051 	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1052 	kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
1053 	if (kret != KERN_SUCCESS) {
1054 		return kret;
1055 	}
1056 
1057 	if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
1058 		return KERN_FAILURE;
1059 	}
1060 
1061 	if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
1062 		return KERN_FAILURE;
1063 	}
1064 
1065 	return KERN_SUCCESS;
1066 }
1067 
1068 bool
processor_should_kprintf(processor_t processor,bool starting)1069 processor_should_kprintf(processor_t processor, bool starting)
1070 {
1071 	processor_reason_t reason = starting ? processor->last_startup_reason : processor->last_shutdown_reason;
1072 
1073 	return reason != REASON_CLPC_SYSTEM;
1074 }
1075 
1076 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1077 processor_control(
1078 	processor_t             processor,
1079 	processor_info_t        info,
1080 	mach_msg_type_number_t  count)
1081 {
1082 	if (processor == PROCESSOR_NULL) {
1083 		return KERN_INVALID_ARGUMENT;
1084 	}
1085 
1086 	return cpu_control(processor->cpu_id, info, count);
1087 }
1088 
1089 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1090 processor_get_assignment(
1091 	processor_t     processor,
1092 	processor_set_t *pset)
1093 {
1094 	int state;
1095 
1096 	if (processor == PROCESSOR_NULL) {
1097 		return KERN_INVALID_ARGUMENT;
1098 	}
1099 
1100 	state = processor->state;
1101 	if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) {
1102 		return KERN_FAILURE;
1103 	}
1104 
1105 	*pset = &pset0;
1106 
1107 	return KERN_SUCCESS;
1108 }
1109 
1110 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1111 processor_set_info(
1112 	processor_set_t         pset,
1113 	int                     flavor,
1114 	host_t                  *host,
1115 	processor_set_info_t    info,
1116 	mach_msg_type_number_t  *count)
1117 {
1118 	if (pset == PROCESSOR_SET_NULL) {
1119 		return KERN_INVALID_ARGUMENT;
1120 	}
1121 
1122 	if (flavor == PROCESSOR_SET_BASIC_INFO) {
1123 		processor_set_basic_info_t      basic_info;
1124 
1125 		if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1126 			return KERN_FAILURE;
1127 		}
1128 
1129 		basic_info = (processor_set_basic_info_t) info;
1130 #if defined(__x86_64__)
1131 		basic_info->processor_count = processor_avail_count_user;
1132 #else
1133 		basic_info->processor_count = processor_avail_count;
1134 #endif
1135 		basic_info->default_policy = POLICY_TIMESHARE;
1136 
1137 		*count = PROCESSOR_SET_BASIC_INFO_COUNT;
1138 		*host = &realhost;
1139 		return KERN_SUCCESS;
1140 	} else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1141 		policy_timeshare_base_t ts_base;
1142 
1143 		if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1144 			return KERN_FAILURE;
1145 		}
1146 
1147 		ts_base = (policy_timeshare_base_t) info;
1148 		ts_base->base_priority = BASEPRI_DEFAULT;
1149 
1150 		*count = POLICY_TIMESHARE_BASE_COUNT;
1151 		*host = &realhost;
1152 		return KERN_SUCCESS;
1153 	} else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1154 		policy_fifo_base_t              fifo_base;
1155 
1156 		if (*count < POLICY_FIFO_BASE_COUNT) {
1157 			return KERN_FAILURE;
1158 		}
1159 
1160 		fifo_base = (policy_fifo_base_t) info;
1161 		fifo_base->base_priority = BASEPRI_DEFAULT;
1162 
1163 		*count = POLICY_FIFO_BASE_COUNT;
1164 		*host = &realhost;
1165 		return KERN_SUCCESS;
1166 	} else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1167 		policy_rr_base_t                rr_base;
1168 
1169 		if (*count < POLICY_RR_BASE_COUNT) {
1170 			return KERN_FAILURE;
1171 		}
1172 
1173 		rr_base = (policy_rr_base_t) info;
1174 		rr_base->base_priority = BASEPRI_DEFAULT;
1175 		rr_base->quantum = 1;
1176 
1177 		*count = POLICY_RR_BASE_COUNT;
1178 		*host = &realhost;
1179 		return KERN_SUCCESS;
1180 	} else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1181 		policy_timeshare_limit_t        ts_limit;
1182 
1183 		if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1184 			return KERN_FAILURE;
1185 		}
1186 
1187 		ts_limit = (policy_timeshare_limit_t) info;
1188 		ts_limit->max_priority = MAXPRI_KERNEL;
1189 
1190 		*count = POLICY_TIMESHARE_LIMIT_COUNT;
1191 		*host = &realhost;
1192 		return KERN_SUCCESS;
1193 	} else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1194 		policy_fifo_limit_t             fifo_limit;
1195 
1196 		if (*count < POLICY_FIFO_LIMIT_COUNT) {
1197 			return KERN_FAILURE;
1198 		}
1199 
1200 		fifo_limit = (policy_fifo_limit_t) info;
1201 		fifo_limit->max_priority = MAXPRI_KERNEL;
1202 
1203 		*count = POLICY_FIFO_LIMIT_COUNT;
1204 		*host = &realhost;
1205 		return KERN_SUCCESS;
1206 	} else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1207 		policy_rr_limit_t               rr_limit;
1208 
1209 		if (*count < POLICY_RR_LIMIT_COUNT) {
1210 			return KERN_FAILURE;
1211 		}
1212 
1213 		rr_limit = (policy_rr_limit_t) info;
1214 		rr_limit->max_priority = MAXPRI_KERNEL;
1215 
1216 		*count = POLICY_RR_LIMIT_COUNT;
1217 		*host = &realhost;
1218 		return KERN_SUCCESS;
1219 	} else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1220 		int                             *enabled;
1221 
1222 		if (*count < (sizeof(*enabled) / sizeof(int))) {
1223 			return KERN_FAILURE;
1224 		}
1225 
1226 		enabled = (int *) info;
1227 		*enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1228 
1229 		*count = sizeof(*enabled) / sizeof(int);
1230 		*host = &realhost;
1231 		return KERN_SUCCESS;
1232 	}
1233 
1234 
1235 	*host = HOST_NULL;
1236 	return KERN_INVALID_ARGUMENT;
1237 }
1238 
1239 /*
1240  *	processor_set_statistics
1241  *
1242  *	Returns scheduling statistics for a processor set.
1243  */
1244 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1245 processor_set_statistics(
1246 	processor_set_t         pset,
1247 	int                     flavor,
1248 	processor_set_info_t    info,
1249 	mach_msg_type_number_t  *count)
1250 {
1251 	if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1252 		return KERN_INVALID_PROCESSOR_SET;
1253 	}
1254 
1255 	if (flavor == PROCESSOR_SET_LOAD_INFO) {
1256 		processor_set_load_info_t     load_info;
1257 
1258 		if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1259 			return KERN_FAILURE;
1260 		}
1261 
1262 		load_info = (processor_set_load_info_t) info;
1263 
1264 		load_info->mach_factor = sched_mach_factor;
1265 		load_info->load_average = sched_load_average;
1266 
1267 		load_info->task_count = tasks_count;
1268 		load_info->thread_count = threads_count;
1269 
1270 		*count = PROCESSOR_SET_LOAD_INFO_COUNT;
1271 		return KERN_SUCCESS;
1272 	}
1273 
1274 	return KERN_INVALID_ARGUMENT;
1275 }
1276 
1277 /*
1278  *	processor_set_things:
1279  *
1280  *	Common internals for processor_set_{threads,tasks}
1281  */
1282 static kern_return_t
processor_set_things(processor_set_t pset,void ** thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1283 processor_set_things(
1284 	processor_set_t pset,
1285 	void **thing_list,
1286 	mach_msg_type_number_t *countp,
1287 	int type,
1288 	mach_task_flavor_t flavor)
1289 {
1290 	unsigned int i;
1291 	task_t task;
1292 	thread_t thread;
1293 
1294 	task_t *task_list;
1295 	vm_size_t actual_tasks, task_count_cur, task_count_needed;
1296 
1297 	thread_t *thread_list;
1298 	vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1299 
1300 	void *addr, *newaddr;
1301 	vm_size_t count, count_needed;
1302 
1303 	if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1304 		return KERN_INVALID_ARGUMENT;
1305 	}
1306 
1307 	task_count_cur = 0;
1308 	task_count_needed = 0;
1309 	task_list = NULL;
1310 	actual_tasks = 0;
1311 
1312 	thread_count_cur = 0;
1313 	thread_count_needed = 0;
1314 	thread_list = NULL;
1315 	actual_threads = 0;
1316 
1317 	for (;;) {
1318 		lck_mtx_lock(&tasks_threads_lock);
1319 
1320 		/* do we have the memory we need? */
1321 		if (type == PSET_THING_THREAD) {
1322 			thread_count_needed = threads_count;
1323 		}
1324 #if !CONFIG_MACF
1325 		else
1326 #endif
1327 		task_count_needed = tasks_count;
1328 
1329 		if (task_count_needed <= task_count_cur &&
1330 		    thread_count_needed <= thread_count_cur) {
1331 			break;
1332 		}
1333 
1334 		/* unlock and allocate more memory */
1335 		lck_mtx_unlock(&tasks_threads_lock);
1336 
1337 		/* grow task array */
1338 		if (task_count_needed > task_count_cur) {
1339 			kfree_type(task_t, task_count_cur, task_list);
1340 			assert(task_count_needed > 0);
1341 			task_count_cur = task_count_needed;
1342 
1343 			task_list = kalloc_type(task_t, task_count_cur, Z_WAITOK | Z_ZERO);
1344 			if (task_list == NULL) {
1345 				kfree_type(thread_t, thread_count_cur, thread_list);
1346 				return KERN_RESOURCE_SHORTAGE;
1347 			}
1348 		}
1349 
1350 		/* grow thread array */
1351 		if (thread_count_needed > thread_count_cur) {
1352 			kfree_type(thread_t, thread_count_cur, thread_list);
1353 
1354 			assert(thread_count_needed > 0);
1355 			thread_count_cur = thread_count_needed;
1356 
1357 			thread_list = kalloc_type(thread_t, thread_count_cur, Z_WAITOK | Z_ZERO);
1358 			if (thread_list == NULL) {
1359 				kfree_type(task_t, task_count_cur, task_list);
1360 				return KERN_RESOURCE_SHORTAGE;
1361 			}
1362 		}
1363 	}
1364 
1365 	/* OK, have memory and the list locked */
1366 
1367 	/* If we need it, get the thread list */
1368 	if (type == PSET_THING_THREAD) {
1369 		queue_iterate(&threads, thread, thread_t, threads) {
1370 			task = get_threadtask(thread);
1371 #if defined(SECURE_KERNEL)
1372 			if (task == kernel_task) {
1373 				/* skip threads belonging to kernel_task */
1374 				continue;
1375 			}
1376 #endif
1377 			if (!task->ipc_active || task_is_exec_copy(task)) {
1378 				/* skip threads in inactive tasks (in the middle of exec/fork/spawn) */
1379 				continue;
1380 			}
1381 
1382 			thread_reference(thread);
1383 			thread_list[actual_threads++] = thread;
1384 		}
1385 	}
1386 #if !CONFIG_MACF
1387 	else
1388 #endif
1389 	{
1390 		/* get a list of the tasks */
1391 		queue_iterate(&tasks, task, task_t, tasks) {
1392 #if defined(SECURE_KERNEL)
1393 			if (task == kernel_task) {
1394 				/* skip kernel_task */
1395 				continue;
1396 			}
1397 #endif
1398 			if (!task->ipc_active || task_is_exec_copy(task)) {
1399 				/* skip inactive tasks (in the middle of exec/fork/spawn) */
1400 				continue;
1401 			}
1402 
1403 			task_reference(task);
1404 			task_list[actual_tasks++] = task;
1405 		}
1406 	}
1407 
1408 	lck_mtx_unlock(&tasks_threads_lock);
1409 
1410 #if CONFIG_MACF
1411 	unsigned int j, used;
1412 
1413 	/* for each task, make sure we are allowed to examine it */
1414 	for (i = used = 0; i < actual_tasks; i++) {
1415 		if (mac_task_check_expose_task(task_list[i], flavor)) {
1416 			task_deallocate(task_list[i]);
1417 			continue;
1418 		}
1419 		task_list[used++] = task_list[i];
1420 	}
1421 	actual_tasks = used;
1422 	task_count_needed = actual_tasks;
1423 
1424 	if (type == PSET_THING_THREAD) {
1425 		/* for each thread (if any), make sure it's task is in the allowed list */
1426 		for (i = used = 0; i < actual_threads; i++) {
1427 			boolean_t found_task = FALSE;
1428 
1429 			task = get_threadtask(thread_list[i]);
1430 			for (j = 0; j < actual_tasks; j++) {
1431 				if (task_list[j] == task) {
1432 					found_task = TRUE;
1433 					break;
1434 				}
1435 			}
1436 			if (found_task) {
1437 				thread_list[used++] = thread_list[i];
1438 			} else {
1439 				thread_deallocate(thread_list[i]);
1440 			}
1441 		}
1442 		actual_threads = used;
1443 		thread_count_needed = actual_threads;
1444 
1445 		/* done with the task list */
1446 		for (i = 0; i < actual_tasks; i++) {
1447 			task_deallocate(task_list[i]);
1448 		}
1449 		kfree_type(task_t, task_count_cur, task_list);
1450 		task_count_cur = 0;
1451 		actual_tasks = 0;
1452 		task_list = NULL;
1453 	}
1454 #endif
1455 
1456 	if (type == PSET_THING_THREAD) {
1457 		if (actual_threads == 0) {
1458 			/* no threads available to return */
1459 			assert(task_count_cur == 0);
1460 			kfree_type(thread_t, thread_count_cur, thread_list);
1461 			*thing_list = NULL;
1462 			*countp = 0;
1463 			return KERN_SUCCESS;
1464 		}
1465 		count_needed = actual_threads;
1466 		count = thread_count_cur;
1467 		addr = thread_list;
1468 	} else {
1469 		if (actual_tasks == 0) {
1470 			/* no tasks available to return */
1471 			assert(thread_count_cur == 0);
1472 			kfree_type(task_t, task_count_cur, task_list);
1473 			*thing_list = NULL;
1474 			*countp = 0;
1475 			return KERN_SUCCESS;
1476 		}
1477 		count_needed = actual_tasks;
1478 		count = task_count_cur;
1479 		addr = task_list;
1480 	}
1481 
1482 	/* if we allocated too much, must copy */
1483 	if (count_needed < count) {
1484 		newaddr = kalloc_type(void *, count_needed, Z_WAITOK | Z_ZERO);
1485 		if (newaddr == 0) {
1486 			for (i = 0; i < actual_tasks; i++) {
1487 				if (type == PSET_THING_THREAD) {
1488 					thread_deallocate(thread_list[i]);
1489 				} else {
1490 					task_deallocate(task_list[i]);
1491 				}
1492 			}
1493 			kfree_type(void *, count, addr);
1494 			return KERN_RESOURCE_SHORTAGE;
1495 		}
1496 
1497 		bcopy(addr, newaddr, count_needed * sizeof(void *));
1498 		kfree_type(void *, count, addr);
1499 
1500 		addr = newaddr;
1501 		count = count_needed;
1502 	}
1503 
1504 	*thing_list = (void **)addr;
1505 	*countp = (mach_msg_type_number_t)count;
1506 
1507 	return KERN_SUCCESS;
1508 }
1509 
1510 /*
1511  *	processor_set_tasks:
1512  *
1513  *	List all tasks in the processor set.
1514  */
1515 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1516 processor_set_tasks_internal(
1517 	processor_set_t         pset,
1518 	task_array_t            *task_list,
1519 	mach_msg_type_number_t  *count,
1520 	mach_task_flavor_t      flavor)
1521 {
1522 	kern_return_t ret;
1523 	mach_msg_type_number_t i;
1524 
1525 	ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK, flavor);
1526 	if (ret != KERN_SUCCESS) {
1527 		return ret;
1528 	}
1529 
1530 	/* do the conversion that Mig should handle */
1531 	switch (flavor) {
1532 	case TASK_FLAVOR_CONTROL:
1533 		for (i = 0; i < *count; i++) {
1534 			if ((*task_list)[i] == current_task()) {
1535 				/* if current_task(), return pinned port */
1536 				(*task_list)[i] = (task_t)convert_task_to_port_pinned((*task_list)[i]);
1537 			} else {
1538 				(*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
1539 			}
1540 		}
1541 		break;
1542 	case TASK_FLAVOR_READ:
1543 		for (i = 0; i < *count; i++) {
1544 			(*task_list)[i] = (task_t)convert_task_read_to_port((*task_list)[i]);
1545 		}
1546 		break;
1547 	case TASK_FLAVOR_INSPECT:
1548 		for (i = 0; i < *count; i++) {
1549 			(*task_list)[i] = (task_t)convert_task_inspect_to_port((*task_list)[i]);
1550 		}
1551 		break;
1552 	case TASK_FLAVOR_NAME:
1553 		for (i = 0; i < *count; i++) {
1554 			(*task_list)[i] = (task_t)convert_task_name_to_port((*task_list)[i]);
1555 		}
1556 		break;
1557 	default:
1558 		return KERN_INVALID_ARGUMENT;
1559 	}
1560 
1561 	return KERN_SUCCESS;
1562 }
1563 
1564 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1565 processor_set_tasks(
1566 	processor_set_t         pset,
1567 	task_array_t            *task_list,
1568 	mach_msg_type_number_t  *count)
1569 {
1570 	return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1571 }
1572 
1573 /*
1574  *	processor_set_tasks_with_flavor:
1575  *
1576  *	Based on flavor, return task/inspect/read port to all tasks in the processor set.
1577  */
1578 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1579 processor_set_tasks_with_flavor(
1580 	processor_set_t         pset,
1581 	mach_task_flavor_t      flavor,
1582 	task_array_t            *task_list,
1583 	mach_msg_type_number_t  *count)
1584 {
1585 	switch (flavor) {
1586 	case TASK_FLAVOR_CONTROL:
1587 	case TASK_FLAVOR_READ:
1588 	case TASK_FLAVOR_INSPECT:
1589 	case TASK_FLAVOR_NAME:
1590 		return processor_set_tasks_internal(pset, task_list, count, flavor);
1591 	default:
1592 		return KERN_INVALID_ARGUMENT;
1593 	}
1594 }
1595 
1596 /*
1597  *	processor_set_threads:
1598  *
1599  *	List all threads in the processor set.
1600  */
1601 #if defined(SECURE_KERNEL)
1602 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1603 processor_set_threads(
1604 	__unused processor_set_t                pset,
1605 	__unused thread_array_t         *thread_list,
1606 	__unused mach_msg_type_number_t *count)
1607 {
1608 	return KERN_FAILURE;
1609 }
1610 #elif !defined(XNU_TARGET_OS_OSX)
1611 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1612 processor_set_threads(
1613 	__unused processor_set_t                pset,
1614 	__unused thread_array_t         *thread_list,
1615 	__unused mach_msg_type_number_t *count)
1616 {
1617 	return KERN_NOT_SUPPORTED;
1618 }
1619 #else
1620 kern_return_t
processor_set_threads(processor_set_t pset,thread_array_t * thread_list,mach_msg_type_number_t * count)1621 processor_set_threads(
1622 	processor_set_t         pset,
1623 	thread_array_t          *thread_list,
1624 	mach_msg_type_number_t  *count)
1625 {
1626 	kern_return_t ret;
1627 	mach_msg_type_number_t i;
1628 
1629 	ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1630 	if (ret != KERN_SUCCESS) {
1631 		return ret;
1632 	}
1633 
1634 	/* do the conversion that Mig should handle */
1635 	for (i = 0; i < *count; i++) {
1636 		(*thread_list)[i] = (thread_t)convert_thread_to_port((*thread_list)[i]);
1637 	}
1638 	return KERN_SUCCESS;
1639 }
1640 #endif
1641 
1642 pset_cluster_type_t
recommended_pset_type(thread_t thread)1643 recommended_pset_type(thread_t thread)
1644 {
1645 #if CONFIG_THREAD_GROUPS && __AMP__
1646 	if (thread == THREAD_NULL) {
1647 		return PSET_AMP_E;
1648 	}
1649 
1650 #if DEVELOPMENT || DEBUG
1651 	extern bool system_ecore_only;
1652 	extern int enable_task_set_cluster_type;
1653 	task_t task = get_threadtask(thread);
1654 	if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1655 		processor_set_t pset_hint = task->pset_hint;
1656 		if (pset_hint) {
1657 			return pset_hint->pset_cluster_type;
1658 		}
1659 	}
1660 
1661 	if (system_ecore_only) {
1662 		return PSET_AMP_E;
1663 	}
1664 #endif
1665 
1666 	if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1667 		return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1668 	}
1669 
1670 	if (thread->base_pri <= MAXPRI_THROTTLE) {
1671 		if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1672 			return PSET_AMP_E;
1673 		}
1674 	} else if (thread->base_pri <= BASEPRI_UTILITY) {
1675 		if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1676 			return PSET_AMP_E;
1677 		}
1678 	}
1679 
1680 	struct thread_group *tg = thread_group_get(thread);
1681 	cluster_type_t recommendation = thread_group_recommendation(tg);
1682 	switch (recommendation) {
1683 	case CLUSTER_TYPE_SMP:
1684 	default:
1685 		if (get_threadtask(thread) == kernel_task) {
1686 			return PSET_AMP_E;
1687 		}
1688 		return PSET_AMP_P;
1689 	case CLUSTER_TYPE_E:
1690 		return PSET_AMP_E;
1691 	case CLUSTER_TYPE_P:
1692 		return PSET_AMP_P;
1693 	}
1694 #else
1695 	(void)thread;
1696 	return PSET_SMP;
1697 #endif
1698 }
1699 
1700 #if CONFIG_THREAD_GROUPS && __AMP__
1701 
1702 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)1703 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1704 {
1705 	sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1706 
1707 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1708 
1709 	switch (perfctl_class) {
1710 	case PERFCONTROL_CLASS_UTILITY:
1711 		os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1712 		break;
1713 	case PERFCONTROL_CLASS_BACKGROUND:
1714 		os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1715 		break;
1716 	default:
1717 		panic("perfctl_class invalid");
1718 		break;
1719 	}
1720 }
1721 
1722 #elif defined(__arm64__)
1723 
1724 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1725 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)1726 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1727 {
1728 }
1729 
1730 #endif /* defined(__arm64__) */
1731