xref: /xnu-12377.81.4/osfmk/kern/processor.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 /*
60  *	processor.c: processor and processor_set manipulation routines.
61  */
62 
63 #include <kern/processor.h>
64 
65 #if !SCHED_TEST_HARNESS
66 
67 #include <mach/boolean.h>
68 #include <mach/policy.h>
69 #include <mach/processor.h>
70 #include <mach/processor_info.h>
71 #include <mach/vm_param.h>
72 #include <kern/bits.h>
73 #include <kern/cpu_number.h>
74 #include <kern/host.h>
75 #include <kern/ipc_host.h>
76 #include <kern/ipc_tt.h>
77 #include <kern/kalloc.h>
78 #include <kern/kern_types.h>
79 #include <kern/machine.h>
80 #include <kern/misc_protos.h>
81 #include <kern/sched.h>
82 #include <kern/smr.h>
83 #include <kern/task.h>
84 #include <kern/thread.h>
85 #include <kern/timer.h>
86 #if KPERF
87 #include <kperf/kperf.h>
88 #endif /* KPERF */
89 #include <ipc/ipc_port.h>
90 #include <machine/commpage.h>
91 
92 #include <security/mac_mach_internal.h>
93 
94 #if defined(CONFIG_XNUPOST)
95 
96 #include <tests/xnupost.h>
97 
98 #endif /* CONFIG_XNUPOST */
99 
100 /*
101  * Exported interface
102  */
103 #include <mach/mach_host_server.h>
104 #include <mach/processor_set_server.h>
105 #include <san/kcov.h>
106 
107 #endif /* !SCHED_TEST_HARNESS */
108 
109 
110 #if __AMP__
111 
112 /*
113  * For AMP platforms, all psets of the same type are part of
114  * the same pset_node. This allows for easier CPU selection logic.
115  */
116 struct pset_node            pset_nodes[MAX_AMP_CLUSTER_TYPES];
117 static int                  next_pset_node_index = 1;
118 static pset_node_t          pset_nodes_by_cluster_type[MAX_AMP_CLUSTER_TYPES];
119 
120 static void
pset_node_set_for_pset_cluster_type(pset_node_t node,pset_cluster_type_t pset_cluster_type)121 pset_node_set_for_pset_cluster_type(pset_node_t node, pset_cluster_type_t pset_cluster_type)
122 {
123 	assert3p(pset_nodes_by_cluster_type[pset_cluster_type - 1], ==, PSET_NODE_NULL);
124 	pset_nodes_by_cluster_type[pset_cluster_type - 1] = node;
125 }
126 
127 pset_node_t
pset_node_for_pset_cluster_type(pset_cluster_type_t pset_cluster_type)128 pset_node_for_pset_cluster_type(pset_cluster_type_t pset_cluster_type)
129 {
130 	assert3u(pset_cluster_type, !=, PSET_SMP);
131 	return os_atomic_load(&pset_nodes_by_cluster_type[pset_cluster_type - 1], acquire);
132 }
133 
134 #else /* !__AMP__ */
135 
136 /* The boot node */
137 struct pset_node        pset_node0;
138 
139 #endif /* !__AMP__ */
140 
141 /* The boot pset */
142 SECURITY_READ_ONLY_LATE(processor_set_t) sched_boot_pset = PROCESSOR_SET_NULL;
143 
144 #if !SCHED_TEST_HARNESS
145 
146 LCK_GRP_DECLARE(pset_lck_grp, "pset");
147 
148 queue_head_t            tasks;
149 queue_head_t            terminated_tasks;       /* To be used ONLY for stackshot. */
150 queue_head_t            corpse_tasks;
151 int                     tasks_count;
152 int                     terminated_tasks_count;
153 queue_head_t            threads;
154 queue_head_t            terminated_threads;
155 int                     threads_count;
156 int                     terminated_threads_count;
157 LCK_GRP_DECLARE(task_lck_grp, "task");
158 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
159 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
160 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
161 
162 #endif /* !SCHED_TEST_HARNESS */
163 
164 processor_t             processor_list;
165 unsigned int            processor_count;
166 static processor_t      processor_list_tail;
167 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
168 SIMPLE_LOCK_DECLARE(processor_start_state_lock, 0);
169 
170 uint32_t                processor_avail_count;
171 uint32_t                processor_avail_count_user;
172 #if CONFIG_SCHED_SMT
173 uint32_t                primary_processor_avail_count_user;
174 #endif /* CONFIG_SCHED_SMT */
175 
176 #if XNU_SUPPORT_BOOTCPU_SHUTDOWN
177 TUNABLE(bool, support_bootcpu_shutdown, "support_bootcpu_shutdown", true);
178 #else
179 TUNABLE(bool, support_bootcpu_shutdown, "support_bootcpu_shutdown", false);
180 #endif
181 
182 #if __x86_64__ || XNU_ENABLE_PROCESSOR_EXIT
183 TUNABLE(bool, enable_processor_exit, "processor_exit", true);
184 #else
185 TUNABLE(bool, enable_processor_exit, "processor_exit", false);
186 #endif
187 
188 SECURITY_READ_ONLY_LATE(int)    master_cpu = 0;
189 
190 processor_t             processor_array[MAX_SCHED_CPUS] = { 0 };
191 processor_set_t         pset_array[MAX_PSETS] = { 0 };
192 struct processor_set    pset_array_actual[MAX_PSETS] = { 0 };
193 
194 processor_set_t
pset_for_id_checked(pset_id_t id)195 pset_for_id_checked(pset_id_t id)
196 {
197 #if __AMP__
198 	/* sched_num_psets only exists on AMP platforms, but it should be valid
199 	 * before accessing pset_array entries. */
200 	assert3u(sched_num_psets, >, 0);
201 	assert3u(sched_num_psets, <=, MAX_PSETS);
202 	assert3u(id, <, sched_num_psets);
203 #else /* !__AMP__ */
204 	assert3u(id, <, MAX_PSETS);
205 #endif /* __AMP__ */
206 	assert(pset_array[id] != PROCESSOR_SET_NULL); /* check if pset is initialized */
207 	return pset_for_id(id);
208 }
209 
210 #if !SCHED_TEST_HARNESS
211 
212 struct processor        PERCPU_DATA(processor);
213 static timer_call_func_t running_timer_funcs[] = {
214 	[RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
215 	[RUNNING_TIMER_PREEMPT] = thread_preempt_expire,
216 	[RUNNING_TIMER_KPERF] = kperf_timer_expire,
217 	[RUNNING_TIMER_PERFCONTROL] = perfcontrol_timer_expire,
218 };
219 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
220     == RUNNING_TIMER_MAX, "missing running timer function");
221 
222 #if defined(CONFIG_XNUPOST)
223 kern_return_t ipi_test(void);
224 extern void arm64_ipi_test(void);
225 
226 kern_return_t
ipi_test()227 ipi_test()
228 {
229 #if __arm64__
230 	processor_t p;
231 
232 	for (p = processor_list; p != NULL; p = p->processor_list) {
233 		thread_bind(p);
234 		thread_block(THREAD_CONTINUE_NULL);
235 		kprintf("Running IPI test on cpu %d\n", p->cpu_id);
236 		arm64_ipi_test();
237 	}
238 
239 	/* unbind thread from specific cpu */
240 	thread_bind(PROCESSOR_NULL);
241 	thread_block(THREAD_CONTINUE_NULL);
242 
243 	T_PASS("Done running IPI tests");
244 #else
245 	T_PASS("Unsupported platform. Not running IPI tests");
246 
247 #endif /* __arm64__ */
248 
249 	return KERN_SUCCESS;
250 }
251 #endif /* defined(CONFIG_XNUPOST) */
252 
253 int sched_enable_smt = 1;
254 
255 #endif /* !SCHED_TEST_HARNESS */
256 
257 cpumap_t processor_offline_state_map[PROCESSOR_OFFLINE_MAX];
258 
259 void
processor_update_offline_state_locked(processor_t processor,processor_offline_state_t new_state)260 processor_update_offline_state_locked(processor_t processor,
261     processor_offline_state_t new_state)
262 {
263 	simple_lock_assert(&sched_available_cores_lock, LCK_ASSERT_OWNED);
264 
265 	processor_offline_state_t old_state = processor->processor_offline_state;
266 
267 	uint cpuid = (uint)processor->cpu_id;
268 
269 	assert(old_state < PROCESSOR_OFFLINE_MAX);
270 	assert(new_state < PROCESSOR_OFFLINE_MAX);
271 
272 	processor->processor_offline_state = new_state;
273 
274 	bit_clear(processor_offline_state_map[old_state], cpuid);
275 	bit_set(processor_offline_state_map[new_state], cpuid);
276 }
277 
278 void
processor_update_offline_state(processor_t processor,processor_offline_state_t new_state)279 processor_update_offline_state(processor_t processor,
280     processor_offline_state_t new_state)
281 {
282 	spl_t s = splsched();
283 	simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
284 	processor_update_offline_state_locked(processor, new_state);
285 	simple_unlock(&sched_available_cores_lock);
286 	splx(s);
287 }
288 
289 void
processor_bootstrap(void)290 processor_bootstrap(void)
291 {
292 	simple_lock_init(&sched_available_cores_lock, 0);
293 	simple_lock_init(&processor_start_state_lock, 0);
294 
295 	/* Initialize boot pset and node */
296 #if __AMP__
297 	/*
298 	 * Since this is an AMP system, fill up cluster type and ID information; this should do the
299 	 * same kind of initialization done via ml_processor_register()
300 	 */
301 	const ml_topology_info_t *topology_info = ml_get_topology_info();
302 	assert3u(topology_info->num_clusters, <=, MAX_PSETS);
303 	sched_num_psets = (uint8_t)topology_info->num_clusters;
304 	assert3u(sched_num_psets, >, 0);
305 	assert3u(sched_num_psets, <=, MAX_PSETS);
306 	ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
307 	pset_cluster_type_t boot_cluster_type = cluster_type_to_pset_cluster_type(boot_cluster->cluster_type);
308 	assert3u(boot_cluster->cluster_id, <, sched_num_psets);
309 	sched_boot_pset = &pset_array_actual[boot_cluster->cluster_id]; /* makes sched_boot_pset work */
310 	sched_boot_pset->pset_id = boot_cluster->cluster_id;
311 	sched_boot_pset->pset_cluster_id = boot_cluster->cluster_id;
312 	pset_node0.pset_cluster_type = boot_cluster_type;
313 	sched_boot_pset->pset_cluster_type = boot_cluster_type;
314 	pset_node_set_for_pset_cluster_type(&pset_node0, boot_cluster_type);
315 #else /* !__AMP__ */
316 	sched_boot_pset = &pset_array_actual[0]; /* makes sched_boot_pset work */
317 	sched_boot_pset->pset_id = 0;
318 	sched_boot_pset->pset_cluster_id = 0;
319 	pset_node0.pset_cluster_type = PSET_SMP;
320 	sched_boot_pset->pset_cluster_type = PSET_SMP;
321 #endif /* !__AMP__ */
322 
323 	pset_init(sched_boot_pset, &pset_node0);
324 #if !SCHED_TEST_HARNESS
325 	queue_init(&tasks);
326 	queue_init(&terminated_tasks);
327 	queue_init(&threads);
328 	queue_init(&terminated_threads);
329 	queue_init(&corpse_tasks);
330 #endif /* !SCHED_TEST_HARNESS */
331 
332 	processor_init(master_processor, master_cpu, sched_boot_pset);
333 }
334 
335 /*
336  *	Initialize the given processor for the cpu
337  *	indicated by cpu_id, and assign to the
338  *	specified processor set.
339  */
340 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)341 processor_init(
342 	processor_t            processor,
343 	int                    cpu_id,
344 	processor_set_t        pset)
345 {
346 	spl_t           s;
347 
348 	assert(cpu_id < MAX_SCHED_CPUS);
349 	processor->cpu_id = cpu_id;
350 
351 	if (processor != master_processor) {
352 		/* Scheduler state for master_processor initialized in sched_init() */
353 		SCHED(processor_init)(processor);
354 		smr_cpu_init(processor);
355 	}
356 
357 	processor->state = PROCESSOR_OFF_LINE;
358 	processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
359 	processor->processor_set = pset;
360 	processor_state_update_idle(processor);
361 	processor->starting_pri = MINPRI;
362 	processor->quantum_end = UINT64_MAX;
363 	processor->deadline = UINT64_MAX;
364 	processor->first_timeslice = FALSE;
365 	processor->processor_online = false;
366 #if CONFIG_SCHED_SMT
367 	processor->processor_primary = processor; /* no SMT relationship known at this point */
368 	processor->processor_secondary = NULL;
369 	processor->is_SMT = false;
370 #endif /* CONFIG_SCHED_SMT */
371 	processor->processor_self = IP_NULL;
372 	processor->processor_list = NULL;
373 	processor->must_idle = false;
374 	processor->next_idle_short = false;
375 	processor->last_startup_reason = REASON_SYSTEM;
376 	processor->last_shutdown_reason = REASON_NONE;
377 	processor->shutdown_temporary = false;
378 	processor->processor_inshutdown = false;
379 	processor->processor_instartup = false;
380 	processor->last_derecommend_reason = REASON_NONE;
381 #if !SCHED_TEST_HARNESS
382 	processor->running_timers_active = false;
383 	for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
384 		timer_call_setup(&processor->running_timers[i],
385 		    running_timer_funcs[i], processor);
386 		running_timer_clear(processor, i);
387 	}
388 	recount_processor_init(processor);
389 #endif /* !SCHED_TEST_HARNESS */
390 
391 #if CONFIG_SCHED_EDGE
392 	os_atomic_init(&processor->stir_the_pot_inbox_cpu, -1);
393 #endif /* CONFIG_SCHED_EDGE */
394 
395 	s = splsched();
396 	simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
397 
398 	pset_lock(pset);
399 	bit_set(pset->cpu_bitmask, cpu_id);
400 	bit_set(pset->recommended_bitmask, cpu_id);
401 	atomic_bit_set(&pset->node->pset_recommended_map, pset->pset_id, memory_order_relaxed);
402 #if CONFIG_SCHED_SMT
403 	bit_set(pset->primary_map, cpu_id);
404 #endif /* CONFIG_SCHED_SMT */
405 	bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
406 	if (pset->cpu_set_count++ == 0) {
407 		pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
408 	} else {
409 		pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
410 		pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
411 	}
412 
413 	processor->last_recommend_reason = REASON_SYSTEM;
414 	sched_processor_change_mode_locked(processor, PCM_RECOMMENDED, true);
415 	pset_unlock(pset);
416 
417 	processor->processor_offline_state = PROCESSOR_OFFLINE_NOT_BOOTED;
418 	bit_set(processor_offline_state_map[processor->processor_offline_state], cpu_id);
419 
420 	if (processor == master_processor) {
421 		processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTING);
422 	}
423 
424 	simple_unlock(&sched_available_cores_lock);
425 	splx(s);
426 
427 	simple_lock(&processor_list_lock, LCK_GRP_NULL);
428 	if (processor_list == NULL) {
429 		processor_list = processor;
430 	} else {
431 		processor_list_tail->processor_list = processor;
432 	}
433 	processor_list_tail = processor;
434 	processor_count++;
435 	simple_unlock(&processor_list_lock);
436 	processor_array[cpu_id] = processor;
437 }
438 
439 #if CONFIG_SCHED_SMT
440 bool system_is_SMT = false;
441 
442 void
processor_set_primary(processor_t processor,processor_t primary)443 processor_set_primary(
444 	processor_t             processor,
445 	processor_t             primary)
446 {
447 	assert(processor->processor_primary == primary || processor->processor_primary == processor);
448 	/* Re-adjust primary point for this (possibly) secondary processor */
449 	processor->processor_primary = primary;
450 
451 	assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
452 	if (primary != processor) {
453 		/* Link primary to secondary, assumes a 2-way SMT model
454 		 * We'll need to move to a queue if any future architecture
455 		 * requires otherwise.
456 		 */
457 		assert(processor->processor_secondary == NULL);
458 		primary->processor_secondary = processor;
459 		/* Mark both processors as SMT siblings */
460 		primary->is_SMT = TRUE;
461 		processor->is_SMT = TRUE;
462 
463 		if (!system_is_SMT) {
464 			system_is_SMT = true;
465 			sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
466 		}
467 
468 		processor_set_t pset = processor->processor_set;
469 		spl_t s = splsched();
470 		pset_lock(pset);
471 		if (!pset->is_SMT) {
472 			pset->is_SMT = true;
473 		}
474 		bit_clear(pset->primary_map, processor->cpu_id);
475 		pset_unlock(pset);
476 		splx(s);
477 	}
478 }
479 #endif /* CONFIG_SCHED_SMT */
480 
481 processor_set_t
processor_pset(processor_t processor)482 processor_pset(
483 	processor_t     processor)
484 {
485 	return processor->processor_set;
486 }
487 
488 cpumap_t
pset_available_cpumap(processor_set_t pset)489 pset_available_cpumap(processor_set_t pset)
490 {
491 	return pset->cpu_available_map & pset->recommended_bitmask;
492 }
493 
494 #if CONFIG_SCHED_EDGE
495 
496 /* Returns the scheduling type for the pset */
497 cluster_type_t
pset_type_for_id(uint32_t cluster_id)498 pset_type_for_id(uint32_t cluster_id)
499 {
500 	return pset_array[cluster_id]->pset_type;
501 }
502 
503 /*
504  * Processor foreign threads
505  *
506  * With the Edge scheduler, each pset maintains a bitmap of processors running threads
507  * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
508  * if its of a different type than its preferred cluster type (E/P). The bitmap should
509  * be updated every time a new thread is assigned to run on a processor. Cluster shared
510  * resource intensive threads are also not counted as foreign threads since these
511  * threads should not be rebalanced when running on non-preferred clusters.
512  *
513  * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
514  * for rebalancing.
515  */
516 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)517 processor_state_update_running_foreign(processor_t processor, thread_t thread)
518 {
519 	cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
520 	cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
521 
522 	boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
523 	boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
524 	if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
525 		bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
526 	} else {
527 		bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
528 	}
529 }
530 
531 /*
532  * Cluster shared resource intensive threads
533  *
534  * With the Edge scheduler, each pset maintains a bitmap of processors running
535  * threads that are shared resource intensive. This per-thread property is set
536  * by the performance controller or explicitly via dispatch SPIs. The bitmap
537  * allows the Edge scheduler to calculate the cluster shared resource load on
538  * any given cluster and load balance intensive threads accordingly.
539  */
540 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)541 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
542 {
543 	if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
544 		bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
545 	} else {
546 		bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
547 	}
548 	if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
549 		bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
550 	} else {
551 		bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
552 	}
553 }
554 
555 #endif /* CONFIG_SCHED_EDGE */
556 
557 void
processor_state_update_idle(processor_t processor)558 processor_state_update_idle(processor_t processor)
559 {
560 	processor->current_pri = IDLEPRI;
561 	processor->current_sfi_class = SFI_CLASS_KERNEL;
562 	processor->current_recommended_pset_type = PSET_SMP;
563 #if CONFIG_THREAD_GROUPS
564 	processor->current_thread_group = NULL;
565 #endif
566 	processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
567 	processor->current_urgency = THREAD_URGENCY_NONE;
568 #if CONFIG_SCHED_SMT
569 	processor->current_is_NO_SMT = false;
570 #endif /* CONFIG_SCHED_SMT */
571 	processor->current_is_bound = false;
572 	processor->current_is_eagerpreempt = false;
573 #if CONFIG_SCHED_EDGE
574 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
575 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
576 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
577 	sched_edge_stir_the_pot_clear_registry_entry();
578 #endif /* CONFIG_SCHED_EDGE */
579 	SCHED(update_pset_load_average)(processor->processor_set, 0);
580 }
581 
582 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)583 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
584 {
585 	processor->current_pri = thread->sched_pri;
586 	processor->current_sfi_class = thread->sfi_class;
587 	processor->current_recommended_pset_type = recommended_pset_type(thread);
588 #if CONFIG_SCHED_EDGE
589 	processor_state_update_running_foreign(processor, thread);
590 	processor_state_update_running_cluster_shared_rsrc(processor, thread);
591 	/* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
592 	sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
593 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
594 	sched_edge_stir_the_pot_update_registry_state(thread);
595 #endif /* CONFIG_SCHED_EDGE */
596 
597 #if CONFIG_THREAD_GROUPS
598 	processor->current_thread_group = thread_group_get(thread);
599 #endif
600 	processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
601 	processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
602 #if CONFIG_SCHED_SMT
603 	processor->current_is_NO_SMT = thread_no_smt(thread);
604 #endif /* CONFIG_SCHED_SMT */
605 	processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
606 	processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
607 	if (pset_lock_held) {
608 		/* Only update the pset load average when the pset lock is held */
609 		SCHED(update_pset_load_average)(processor->processor_set, 0);
610 	}
611 }
612 
613 pset_node_t
pset_node_root(void)614 pset_node_root(void)
615 {
616 	return &pset_node0;
617 }
618 
619 #if __AMP__
620 
621 /*
622  * Only need to dynamically initialize pset nodes when
623  * there are multiple cluster types.
624  */
625 static pset_node_t
pset_node_create(cluster_type_t cluster_type)626 pset_node_create(cluster_type_t cluster_type)
627 {
628 	assert3u(cluster_type, !=, CLUSTER_TYPE_SMP);
629 	pset_cluster_type_t pset_cluster_type = cluster_type_to_pset_cluster_type(cluster_type);
630 	assert3u(next_pset_node_index, <, MAX_AMP_CLUSTER_TYPES);
631 	pset_node_t node = &pset_nodes[next_pset_node_index++];
632 	node->psets = PROCESSOR_SET_NULL;
633 	node->pset_cluster_type = pset_cluster_type;
634 	/* Insert into node linked list */
635 	pset_nodes[next_pset_node_index - 2].node_list = node;
636 	pset_node_set_for_pset_cluster_type(node, pset_cluster_type);
637 
638 	return node;
639 }
640 
641 #endif /* __AMP__*/
642 
643 processor_set_t
pset_create(cluster_type_t cluster_type,uint32_t pset_cluster_id,int pset_id)644 pset_create(
645 	cluster_type_t cluster_type,
646 	uint32_t pset_cluster_id,
647 	int      pset_id)
648 {
649 	/* some schedulers do not support multiple psets */
650 	if (SCHED(multiple_psets_enabled) == FALSE) {
651 		return processor_pset(master_processor);
652 	}
653 
654 	pset_node_t node;
655 	pset_cluster_type_t pset_cluster_type;
656 #if __AMP__
657 	pset_cluster_type = cluster_type_to_pset_cluster_type(cluster_type);
658 	node = pset_node_for_pset_cluster_type(pset_cluster_type);
659 	if (node == PSET_NODE_NULL) {
660 		/* First pset of this cluster type */
661 		node = pset_node_create(cluster_type);
662 	}
663 #else /* !__AMP__ */
664 	pset_cluster_type = PSET_SMP;
665 	node = &pset_node0;
666 	(void)cluster_type;
667 #endif /* !__AMP__ */
668 
669 	assert3u(pset_id, <, MAX_PSETS);
670 	assert3p(pset_array[pset_id], ==, PROCESSOR_SET_NULL);
671 	processor_set_t pset = &pset_array_actual[pset_id];
672 	pset->pset_cluster_type = pset_cluster_type;
673 	pset->pset_cluster_id = pset_cluster_id;
674 	pset->pset_id = pset_id;
675 	pset_init(pset, node);
676 
677 	return pset;
678 }
679 
680 /*
681  *	Initialize the given processor_set structure.
682  */
683 void
pset_init(processor_set_t pset,pset_node_t node)684 pset_init(
685 	processor_set_t         pset,
686 	pset_node_t                     node)
687 {
688 	pset->online_processor_count = 0;
689 #if CONFIG_SCHED_EDGE
690 	bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
691 	bzero(&pset->pset_runnable_depth, sizeof(pset->pset_runnable_depth));
692 #elif __AMP__
693 	pset->load_average = 0;
694 #endif /* !CONFIG_SCHED_EDGE && __AMP__ */
695 	pset->cpu_set_low = pset->cpu_set_hi = 0;
696 	pset->cpu_set_count = 0;
697 	pset->last_chosen = -1;
698 	pset->cpu_bitmask = 0;
699 	pset->recommended_bitmask = 0;
700 #if CONFIG_SCHED_SMT
701 	pset->primary_map = 0;
702 #endif /* CONFIG_SCHED_SMT */
703 	pset->realtime_map = 0;
704 	pset->cpu_available_map = 0;
705 
706 	for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
707 		pset->cpu_state_map[i] = 0;
708 	}
709 	pset->pending_AST_URGENT_cpu_mask = 0;
710 	pset->pending_AST_PREEMPT_cpu_mask = 0;
711 #if defined(CONFIG_SCHED_DEFERRED_AST)
712 	pset->pending_deferred_AST_cpu_mask = 0;
713 #endif
714 	pset->pending_spill_cpu_mask = 0;
715 	pset->rt_pending_spill_cpu_mask = 0;
716 	pset_lock_init(pset);
717 	pset->pset_self = IP_NULL;
718 	pset->pset_name_self = IP_NULL;
719 	pset->pset_list = PROCESSOR_SET_NULL;
720 #if CONFIG_SCHED_SMT
721 	pset->is_SMT = false;
722 #endif /* CONFIG_SCHED_SMT */
723 #if CONFIG_SCHED_EDGE
724 	bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
725 	pset->cpu_running_foreign = 0;
726 	for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
727 		pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
728 		pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
729 	}
730 #endif /* CONFIG_SCHED_EDGE */
731 
732 	/*
733 	 * No initial preferences or forced migrations, so use the least numbered
734 	 * available idle core when picking amongst idle cores in a cluster.
735 	 */
736 	pset->perfcontrol_cpu_preferred_bitmask = 0;
737 	pset->perfcontrol_cpu_migration_bitmask = 0;
738 	pset->cpu_preferred_last_chosen = -1;
739 
740 	if (pset != sched_boot_pset) {
741 		/*
742 		 * Scheduler runqueue initialization for non-boot psets.
743 		 * This initialization for the boot pset happens in sched_init().
744 		 */
745 		SCHED(pset_init)(pset);
746 		SCHED(rt_init_pset)(pset);
747 	}
748 
749 	/* Psets are initialized before any other processor starts running. */
750 	pset_array[pset->pset_id] =  pset;
751 
752 	/* Initialize pset node state regarding this pset */
753 	bit_set(node->pset_map, pset->pset_id);
754 	pset->node = node;
755 
756 	processor_set_t *prev = &node->psets;
757 	while (*prev != PROCESSOR_SET_NULL) {
758 		prev = &(*prev)->pset_list;
759 	}
760 	*prev = pset;
761 }
762 
763 #if !SCHED_TEST_HARNESS
764 
765 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)766 processor_info_count(
767 	processor_flavor_t              flavor,
768 	mach_msg_type_number_t  *count)
769 {
770 	switch (flavor) {
771 	case PROCESSOR_BASIC_INFO:
772 		*count = PROCESSOR_BASIC_INFO_COUNT;
773 		break;
774 
775 	case PROCESSOR_CPU_LOAD_INFO:
776 		*count = PROCESSOR_CPU_LOAD_INFO_COUNT;
777 		break;
778 
779 	default:
780 		return cpu_info_count(flavor, count);
781 	}
782 
783 	return KERN_SUCCESS;
784 }
785 
786 void
processor_cpu_load_info(processor_t processor,natural_t ticks[static CPU_STATE_MAX])787 processor_cpu_load_info(processor_t processor,
788     natural_t ticks[static CPU_STATE_MAX])
789 {
790 	struct recount_usage usage = { 0 };
791 	uint64_t idle_time = 0;
792 	recount_processor_usage(&processor->pr_recount, &usage, &idle_time);
793 
794 	ticks[CPU_STATE_USER] += (uint32_t)(usage.ru_metrics[RCT_LVL_USER].rm_time_mach /
795 	    hz_tick_interval);
796 	ticks[CPU_STATE_SYSTEM] += (uint32_t)(
797 		recount_usage_system_time_mach(&usage) / hz_tick_interval);
798 	ticks[CPU_STATE_IDLE] += (uint32_t)(idle_time / hz_tick_interval);
799 }
800 
801 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)802 processor_info(
803 	processor_t     processor,
804 	processor_flavor_t              flavor,
805 	host_t                                  *host,
806 	processor_info_t                info,
807 	mach_msg_type_number_t  *count)
808 {
809 	int     cpu_id, state;
810 	kern_return_t   result;
811 
812 	if (processor == PROCESSOR_NULL) {
813 		return KERN_INVALID_ARGUMENT;
814 	}
815 
816 	cpu_id = processor->cpu_id;
817 
818 	switch (flavor) {
819 	case PROCESSOR_BASIC_INFO:
820 	{
821 		processor_basic_info_t          basic_info;
822 
823 		if (*count < PROCESSOR_BASIC_INFO_COUNT) {
824 			return KERN_FAILURE;
825 		}
826 
827 		basic_info = (processor_basic_info_t) info;
828 		basic_info->cpu_type = slot_type(cpu_id);
829 		basic_info->cpu_subtype = slot_subtype(cpu_id);
830 		state = processor->state;
831 		if (((state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) && !processor->shutdown_temporary)
832 #if defined(__x86_64__)
833 		    || !processor->is_recommended
834 #endif
835 		    ) {
836 			basic_info->running = FALSE;
837 		} else {
838 			basic_info->running = TRUE;
839 		}
840 		basic_info->slot_num = cpu_id;
841 		if (processor == master_processor) {
842 			basic_info->is_master = TRUE;
843 		} else {
844 			basic_info->is_master = FALSE;
845 		}
846 
847 		*count = PROCESSOR_BASIC_INFO_COUNT;
848 		*host = &realhost;
849 
850 		return KERN_SUCCESS;
851 	}
852 
853 	case PROCESSOR_CPU_LOAD_INFO:
854 	{
855 		processor_cpu_load_info_t       cpu_load_info;
856 
857 		if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
858 			return KERN_FAILURE;
859 		}
860 
861 		cpu_load_info = (processor_cpu_load_info_t) info;
862 
863 		cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
864 		cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
865 		cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
866 		processor_cpu_load_info(processor, cpu_load_info->cpu_ticks);
867 		cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
868 
869 		*count = PROCESSOR_CPU_LOAD_INFO_COUNT;
870 		*host = &realhost;
871 
872 		return KERN_SUCCESS;
873 	}
874 
875 	default:
876 		result = cpu_info(flavor, cpu_id, info, count);
877 		if (result == KERN_SUCCESS) {
878 			*host = &realhost;
879 		}
880 
881 		return result;
882 	}
883 }
884 
885 #endif /* !SCHED_TEST_HARNESS */
886 
887 void
pset_update_processor_state(processor_set_t pset,processor_t processor,uint new_state)888 pset_update_processor_state(processor_set_t pset, processor_t processor, uint new_state)
889 {
890 	pset_assert_locked(pset);
891 
892 	uint old_state = processor->state;
893 	uint cpuid = (uint)processor->cpu_id;
894 
895 	assert(processor->processor_set == pset);
896 	assert(bit_test(pset->cpu_bitmask, cpuid));
897 
898 	assert(old_state < PROCESSOR_STATE_LEN);
899 	assert(new_state < PROCESSOR_STATE_LEN);
900 
901 	processor->state = new_state;
902 
903 	bit_clear(pset->cpu_state_map[old_state], cpuid);
904 	bit_set(pset->cpu_state_map[new_state], cpuid);
905 
906 	if (bit_test(pset->cpu_available_map, cpuid) && (new_state < PROCESSOR_IDLE)) {
907 		/* No longer available for scheduling */
908 		bit_clear(pset->cpu_available_map, cpuid);
909 	} else if (!bit_test(pset->cpu_available_map, cpuid) && (new_state >= PROCESSOR_IDLE)) {
910 		/* Newly available for scheduling */
911 		bit_set(pset->cpu_available_map, cpuid);
912 	}
913 
914 	if ((old_state == PROCESSOR_RUNNING) || (new_state == PROCESSOR_RUNNING)) {
915 		SCHED(update_pset_load_average)(pset, 0);
916 		if (new_state == PROCESSOR_RUNNING) {
917 			assert(processor == current_processor());
918 		}
919 	}
920 	if ((old_state == PROCESSOR_IDLE) || (new_state == PROCESSOR_IDLE)) {
921 		if (new_state == PROCESSOR_IDLE) {
922 			bit_clear(pset->realtime_map, cpuid);
923 		}
924 
925 		pset_node_t node = pset->node;
926 
927 		if (bit_count(node->pset_map) == 1) {
928 			/* Node has only a single pset, so skip node pset map updates */
929 			return;
930 		}
931 
932 		if (new_state == PROCESSOR_IDLE) {
933 #if CONFIG_SCHED_SMT
934 			if (processor->processor_primary == processor) {
935 				if (!bit_test(atomic_load(&node->pset_non_rt_primary_map), pset->pset_id)) {
936 					atomic_bit_set(&node->pset_non_rt_primary_map, pset->pset_id, memory_order_relaxed);
937 				}
938 			}
939 #endif /* CONFIG_SCHED_SMT */
940 			if (!bit_test(atomic_load(&node->pset_non_rt_map), pset->pset_id)) {
941 				atomic_bit_set(&node->pset_non_rt_map, pset->pset_id, memory_order_relaxed);
942 			}
943 			if (!bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
944 				atomic_bit_set(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
945 			}
946 		} else {
947 			cpumap_t idle_map = pset->cpu_state_map[PROCESSOR_IDLE];
948 			if (idle_map == 0) {
949 				/* No more IDLE CPUs */
950 				if (bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
951 					atomic_bit_clear(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
952 				}
953 			}
954 		}
955 	}
956 }
957 
958 #if !SCHED_TEST_HARNESS
959 
960 /*
961  * Now that we're enforcing all CPUs actually boot, we may need a way to
962  * relax the timeout.
963  */
964 TUNABLE(uint32_t, cpu_boot_timeout_secs, "cpu_boot_timeout_secs", 1); /* seconds, default to 1 second */
965 
966 static const char *
967     processor_start_panic_strings[] = {
968 	[PROCESSOR_FIRST_BOOT]                  = "boot for the first time",
969 	[PROCESSOR_BEFORE_ENTERING_SLEEP]       = "come online while entering system sleep",
970 	[PROCESSOR_WAKE_FROM_SLEEP]             = "come online after returning from system sleep",
971 	[PROCESSOR_CLUSTER_POWERDOWN_SUSPEND]   = "come online while disabling cluster powerdown",
972 	[PROCESSOR_CLUSTER_POWERDOWN_RESUME]    = "come online before enabling cluster powerdown",
973 	[PROCESSOR_POWERED_CORES_CHANGE]        = "come online during dynamic cluster power state change",
974 };
975 
976 void
processor_wait_for_start(processor_t processor,processor_start_kind_t start_kind)977 processor_wait_for_start(processor_t processor, processor_start_kind_t start_kind)
978 {
979 	if (!processor->processor_booted) {
980 		panic("processor_boot() missing for cpu %d", processor->cpu_id);
981 	}
982 
983 	uint32_t boot_timeout_extended = cpu_boot_timeout_secs *
984 	    debug_cpu_performance_degradation_factor;
985 
986 	spl_t s = splsched();
987 	simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
988 	while (processor->processor_instartup) {
989 		assert_wait_timeout((event_t)&processor->processor_instartup,
990 		    THREAD_UNINT, boot_timeout_extended, NSEC_PER_SEC);
991 		simple_unlock(&processor_start_state_lock);
992 		splx(s);
993 
994 		wait_result_t wait_result = thread_block(THREAD_CONTINUE_NULL);
995 		if (wait_result == THREAD_TIMED_OUT) {
996 			panic("cpu %d failed to %s, waited %d seconds\n",
997 			    processor->cpu_id,
998 			    processor_start_panic_strings[start_kind],
999 			    boot_timeout_extended);
1000 		}
1001 
1002 		s = splsched();
1003 		simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
1004 	}
1005 
1006 	if (processor->processor_inshutdown) {
1007 		panic("%s>cpu %d still in shutdown",
1008 		    __func__, processor->cpu_id);
1009 	}
1010 
1011 	simple_unlock(&processor_start_state_lock);
1012 
1013 	simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
1014 
1015 	if (!processor->processor_online) {
1016 		panic("%s>cpu %d not online",
1017 		    __func__, processor->cpu_id);
1018 	}
1019 
1020 	if (processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED) {
1021 		processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_RUNNING);
1022 	} else {
1023 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_RUNNING);
1024 	}
1025 
1026 	simple_unlock(&sched_available_cores_lock);
1027 	splx(s);
1028 }
1029 
1030 LCK_GRP_DECLARE(processor_updown_grp, "processor_updown");
1031 LCK_MTX_DECLARE(processor_updown_lock, &processor_updown_grp);
1032 
1033 static void
processor_dostartup(processor_t processor,bool first_boot)1034 processor_dostartup(
1035 	processor_t     processor,
1036 	bool            first_boot)
1037 {
1038 	if (!processor->processor_booted && !first_boot) {
1039 		panic("processor %d not booted", processor->cpu_id);
1040 	}
1041 
1042 	lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
1043 	lck_mtx_assert(&processor_updown_lock, LCK_MTX_ASSERT_OWNED);
1044 
1045 	processor_set_t pset = processor->processor_set;
1046 
1047 	assert(processor->processor_self);
1048 
1049 	spl_t s = splsched();
1050 
1051 	simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
1052 	assert(processor->processor_inshutdown || first_boot);
1053 	processor->processor_inshutdown = false;
1054 	assert(processor->processor_instartup == false);
1055 	processor->processor_instartup = true;
1056 	simple_unlock(&processor_start_state_lock);
1057 
1058 	simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
1059 
1060 	pset_lock(pset);
1061 
1062 	if (first_boot) {
1063 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_NOT_BOOTED);
1064 	} else {
1065 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_FULLY_OFFLINE);
1066 	}
1067 
1068 	processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTING);
1069 
1070 	assert(processor->state == PROCESSOR_OFF_LINE);
1071 
1072 	pset_update_processor_state(pset, processor, PROCESSOR_START);
1073 	pset_unlock(pset);
1074 
1075 	simple_unlock(&sched_available_cores_lock);
1076 
1077 	splx(s);
1078 
1079 	ml_cpu_power_enable(processor->cpu_id);
1080 	ml_cpu_begin_state_transition(processor->cpu_id);
1081 	ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
1082 
1083 	cpu_start(processor->cpu_id);
1084 
1085 	s = splsched();
1086 	simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
1087 
1088 	if (processor->processor_offline_state == PROCESSOR_OFFLINE_STARTING) {
1089 		processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTED_NOT_RUNNING);
1090 	} else {
1091 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
1092 	}
1093 
1094 	simple_unlock(&sched_available_cores_lock);
1095 	splx(s);
1096 
1097 	ml_cpu_end_state_transition(processor->cpu_id);
1098 	/*
1099 	 * Note: Because the actual wait-for-start happens sometime later,
1100 	 * this races with processor_up calling CPU_BOOTED.
1101 	 * To fix that, this should happen after the first wait for start
1102 	 * confirms the CPU has booted.
1103 	 */
1104 	ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
1105 }
1106 
1107 void
processor_exit_reason(processor_t processor,processor_reason_t reason,bool is_system_sleep)1108 processor_exit_reason(processor_t processor, processor_reason_t reason, bool is_system_sleep)
1109 {
1110 	assert(processor);
1111 	assert(processor->processor_set);
1112 
1113 	lck_mtx_lock(&processor_updown_lock);
1114 
1115 	if (sched_is_in_sleep()) {
1116 		assert(reason == REASON_SYSTEM);
1117 	}
1118 
1119 	assert((processor != master_processor) || (reason == REASON_SYSTEM) || support_bootcpu_shutdown);
1120 
1121 	processor->last_shutdown_reason = reason;
1122 
1123 	bool is_final_system_sleep = is_system_sleep && (processor == master_processor);
1124 
1125 	processor_doshutdown(processor, is_final_system_sleep);
1126 
1127 	lck_mtx_unlock(&processor_updown_lock);
1128 }
1129 
1130 /*
1131  * Called `processor_exit` in Unsupported KPI.
1132  * AppleARMCPU and AppleACPIPlatform call this in response to haltCPU().
1133  *
1134  * Behavior change: on both platforms, now xnu does the processor_sleep,
1135  * and ignores processor_exit calls from kexts.
1136  */
1137 kern_return_t
processor_exit_from_kext(__unused processor_t processor)1138 processor_exit_from_kext(
1139 	__unused processor_t processor)
1140 {
1141 	/* This is a no-op now. */
1142 	return KERN_FAILURE;
1143 }
1144 
1145 void
processor_sleep(processor_t processor)1146 processor_sleep(
1147 	processor_t     processor)
1148 {
1149 	lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
1150 
1151 	processor_exit_reason(processor, REASON_SYSTEM, true);
1152 }
1153 
1154 kern_return_t
processor_exit_from_user(processor_t processor)1155 processor_exit_from_user(
1156 	processor_t     processor)
1157 {
1158 	if (processor == PROCESSOR_NULL) {
1159 		return KERN_INVALID_ARGUMENT;
1160 	}
1161 
1162 	kern_return_t result;
1163 
1164 	lck_mtx_lock(&cluster_powerdown_lock);
1165 
1166 	result = sched_processor_exit_user(processor);
1167 
1168 	lck_mtx_unlock(&cluster_powerdown_lock);
1169 
1170 	return result;
1171 }
1172 
1173 void
processor_start_reason(processor_t processor,processor_reason_t reason)1174 processor_start_reason(processor_t processor, processor_reason_t reason)
1175 {
1176 	lck_mtx_lock(&processor_updown_lock);
1177 
1178 	assert(processor);
1179 	assert(processor->processor_set);
1180 	assert(processor->processor_booted);
1181 
1182 	if (sched_is_in_sleep()) {
1183 		assert(reason == REASON_SYSTEM);
1184 	}
1185 
1186 	processor->last_startup_reason = reason;
1187 
1188 	processor_dostartup(processor, false);
1189 
1190 	lck_mtx_unlock(&processor_updown_lock);
1191 }
1192 
1193 /*
1194  * Called `processor_start` in Unsupported KPI.
1195  * AppleARMCPU calls this to boot processors.
1196  * AppleACPIPlatform expects ml_processor_register to call processor_boot.
1197  *
1198  * Behavior change: now ml_processor_register also boots CPUs on ARM, and xnu
1199  * ignores processor_start calls from kexts.
1200  */
1201 kern_return_t
processor_start_from_kext(__unused processor_t processor)1202 processor_start_from_kext(
1203 	__unused processor_t processor)
1204 {
1205 	/* This is a no-op now. */
1206 	return KERN_FAILURE;
1207 }
1208 
1209 kern_return_t
processor_start_from_user(processor_t processor)1210 processor_start_from_user(
1211 	processor_t                     processor)
1212 {
1213 	if (processor == PROCESSOR_NULL) {
1214 		return KERN_INVALID_ARGUMENT;
1215 	}
1216 
1217 	kern_return_t result;
1218 
1219 	lck_mtx_lock(&cluster_powerdown_lock);
1220 
1221 	result = sched_processor_start_user(processor);
1222 
1223 	lck_mtx_unlock(&cluster_powerdown_lock);
1224 
1225 	return result;
1226 }
1227 
1228 /*
1229  * Boot up a processor for the first time.
1230  *
1231  * This will also be called against the main processor during system boot,
1232  * even though it's already running.
1233  */
1234 void
processor_boot(processor_t processor)1235 processor_boot(
1236 	processor_t                     processor)
1237 {
1238 	lck_mtx_lock(&cluster_powerdown_lock);
1239 	lck_mtx_lock(&processor_updown_lock);
1240 
1241 	assert(!sched_is_in_sleep());
1242 	assert(!sched_is_cpu_init_completed());
1243 
1244 	if (processor->processor_booted) {
1245 		panic("processor %d already booted", processor->cpu_id);
1246 	}
1247 
1248 	if (processor == master_processor) {
1249 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
1250 	} else {
1251 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_NOT_BOOTED);
1252 	}
1253 
1254 	/*
1255 	 *	Create the idle processor thread.
1256 	 */
1257 	if (processor->idle_thread == THREAD_NULL) {
1258 		idle_thread_create(processor, processor_start_thread);
1259 	}
1260 
1261 	if (processor->processor_self == IP_NULL) {
1262 		ipc_processor_init(processor);
1263 	}
1264 
1265 	if (processor == master_processor) {
1266 		processor->last_startup_reason = REASON_SYSTEM;
1267 
1268 		ml_cpu_power_enable(processor->cpu_id);
1269 
1270 		processor_t prev = thread_bind(processor);
1271 		thread_block(THREAD_CONTINUE_NULL);
1272 
1273 		cpu_start(processor->cpu_id);
1274 
1275 		assert(processor->state == PROCESSOR_RUNNING);
1276 		processor_update_offline_state(processor, PROCESSOR_OFFLINE_RUNNING);
1277 
1278 		thread_bind(prev);
1279 	} else {
1280 		processor->last_startup_reason = REASON_SYSTEM;
1281 
1282 		/*
1283 		 * We don't wait for startup to finish, so all CPUs can start
1284 		 * in parallel.
1285 		 */
1286 		processor_dostartup(processor, true);
1287 	}
1288 
1289 	processor->processor_booted = true;
1290 
1291 	lck_mtx_unlock(&processor_updown_lock);
1292 	lck_mtx_unlock(&cluster_powerdown_lock);
1293 }
1294 
1295 /*
1296  * Wake a previously booted processor from a temporarily powered off state.
1297  */
1298 void
processor_wake(processor_t processor)1299 processor_wake(
1300 	processor_t                     processor)
1301 {
1302 	lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
1303 
1304 	assert(processor->processor_booted);
1305 	processor_start_reason(processor, REASON_SYSTEM);
1306 }
1307 
1308 #if CONFIG_SCHED_SMT
1309 kern_return_t
enable_smt_processors(bool enable)1310 enable_smt_processors(bool enable)
1311 {
1312 	if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
1313 		/* Not an SMT system */
1314 		return KERN_INVALID_ARGUMENT;
1315 	}
1316 
1317 	int ncpus = machine_info.logical_cpu_max;
1318 
1319 	for (int i = 1; i < ncpus; i++) {
1320 		processor_t processor = processor_array[i];
1321 
1322 		if (processor->processor_primary != processor) {
1323 			if (enable) {
1324 				processor_start_from_user(processor);
1325 			} else { /* Disable */
1326 				processor_exit_from_user(processor);
1327 			}
1328 		}
1329 	}
1330 
1331 #define BSD_HOST 1
1332 	host_basic_info_data_t hinfo;
1333 	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1334 	kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
1335 	if (kret != KERN_SUCCESS) {
1336 		return kret;
1337 	}
1338 
1339 	if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
1340 		return KERN_FAILURE;
1341 	}
1342 
1343 	if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
1344 		return KERN_FAILURE;
1345 	}
1346 
1347 	return KERN_SUCCESS;
1348 }
1349 #endif /* CONFIG_SCHED_SMT */
1350 
1351 bool
processor_should_kprintf(processor_t processor,bool starting)1352 processor_should_kprintf(processor_t processor, bool starting)
1353 {
1354 	processor_reason_t reason = starting ? processor->last_startup_reason : processor->last_shutdown_reason;
1355 
1356 	return reason != REASON_CLPC_SYSTEM;
1357 }
1358 
1359 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1360 processor_control(
1361 	processor_t             processor,
1362 	processor_info_t        info,
1363 	mach_msg_type_number_t  count)
1364 {
1365 	if (processor == PROCESSOR_NULL) {
1366 		return KERN_INVALID_ARGUMENT;
1367 	}
1368 
1369 	return cpu_control(processor->cpu_id, info, count);
1370 }
1371 
1372 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1373 processor_get_assignment(
1374 	processor_t     processor,
1375 	processor_set_t *pset)
1376 {
1377 	int state;
1378 
1379 	if (processor == PROCESSOR_NULL) {
1380 		return KERN_INVALID_ARGUMENT;
1381 	}
1382 
1383 	state = processor->state;
1384 	if (state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) {
1385 		return KERN_FAILURE;
1386 	}
1387 
1388 	*pset = sched_boot_pset;
1389 
1390 	return KERN_SUCCESS;
1391 }
1392 
1393 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1394 processor_set_info(
1395 	processor_set_t         pset,
1396 	int                     flavor,
1397 	host_t                  *host,
1398 	processor_set_info_t    info,
1399 	mach_msg_type_number_t  *count)
1400 {
1401 	if (pset == PROCESSOR_SET_NULL) {
1402 		return KERN_INVALID_ARGUMENT;
1403 	}
1404 
1405 	if (flavor == PROCESSOR_SET_BASIC_INFO) {
1406 		processor_set_basic_info_t      basic_info;
1407 
1408 		if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1409 			return KERN_FAILURE;
1410 		}
1411 
1412 		basic_info = (processor_set_basic_info_t) info;
1413 #if defined(__x86_64__)
1414 		basic_info->processor_count = processor_avail_count_user;
1415 #else
1416 		basic_info->processor_count = processor_avail_count;
1417 #endif
1418 		basic_info->default_policy = POLICY_TIMESHARE;
1419 
1420 		*count = PROCESSOR_SET_BASIC_INFO_COUNT;
1421 		*host = &realhost;
1422 		return KERN_SUCCESS;
1423 	} else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1424 		policy_timeshare_base_t ts_base;
1425 
1426 		if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1427 			return KERN_FAILURE;
1428 		}
1429 
1430 		ts_base = (policy_timeshare_base_t) info;
1431 		ts_base->base_priority = BASEPRI_DEFAULT;
1432 
1433 		*count = POLICY_TIMESHARE_BASE_COUNT;
1434 		*host = &realhost;
1435 		return KERN_SUCCESS;
1436 	} else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1437 		policy_fifo_base_t              fifo_base;
1438 
1439 		if (*count < POLICY_FIFO_BASE_COUNT) {
1440 			return KERN_FAILURE;
1441 		}
1442 
1443 		fifo_base = (policy_fifo_base_t) info;
1444 		fifo_base->base_priority = BASEPRI_DEFAULT;
1445 
1446 		*count = POLICY_FIFO_BASE_COUNT;
1447 		*host = &realhost;
1448 		return KERN_SUCCESS;
1449 	} else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1450 		policy_rr_base_t                rr_base;
1451 
1452 		if (*count < POLICY_RR_BASE_COUNT) {
1453 			return KERN_FAILURE;
1454 		}
1455 
1456 		rr_base = (policy_rr_base_t) info;
1457 		rr_base->base_priority = BASEPRI_DEFAULT;
1458 		rr_base->quantum = 1;
1459 
1460 		*count = POLICY_RR_BASE_COUNT;
1461 		*host = &realhost;
1462 		return KERN_SUCCESS;
1463 	} else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1464 		policy_timeshare_limit_t        ts_limit;
1465 
1466 		if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1467 			return KERN_FAILURE;
1468 		}
1469 
1470 		ts_limit = (policy_timeshare_limit_t) info;
1471 		ts_limit->max_priority = MAXPRI_KERNEL;
1472 
1473 		*count = POLICY_TIMESHARE_LIMIT_COUNT;
1474 		*host = &realhost;
1475 		return KERN_SUCCESS;
1476 	} else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1477 		policy_fifo_limit_t             fifo_limit;
1478 
1479 		if (*count < POLICY_FIFO_LIMIT_COUNT) {
1480 			return KERN_FAILURE;
1481 		}
1482 
1483 		fifo_limit = (policy_fifo_limit_t) info;
1484 		fifo_limit->max_priority = MAXPRI_KERNEL;
1485 
1486 		*count = POLICY_FIFO_LIMIT_COUNT;
1487 		*host = &realhost;
1488 		return KERN_SUCCESS;
1489 	} else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1490 		policy_rr_limit_t               rr_limit;
1491 
1492 		if (*count < POLICY_RR_LIMIT_COUNT) {
1493 			return KERN_FAILURE;
1494 		}
1495 
1496 		rr_limit = (policy_rr_limit_t) info;
1497 		rr_limit->max_priority = MAXPRI_KERNEL;
1498 
1499 		*count = POLICY_RR_LIMIT_COUNT;
1500 		*host = &realhost;
1501 		return KERN_SUCCESS;
1502 	} else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1503 		int                             *enabled;
1504 
1505 		if (*count < (sizeof(*enabled) / sizeof(int))) {
1506 			return KERN_FAILURE;
1507 		}
1508 
1509 		enabled = (int *) info;
1510 		*enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1511 
1512 		*count = sizeof(*enabled) / sizeof(int);
1513 		*host = &realhost;
1514 		return KERN_SUCCESS;
1515 	}
1516 
1517 
1518 	*host = HOST_NULL;
1519 	return KERN_INVALID_ARGUMENT;
1520 }
1521 
1522 /*
1523  *	processor_set_statistics
1524  *
1525  *	Returns scheduling statistics for a processor set.
1526  */
1527 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1528 processor_set_statistics(
1529 	processor_set_t         pset,
1530 	int                     flavor,
1531 	processor_set_info_t    info,
1532 	mach_msg_type_number_t  *count)
1533 {
1534 	if (pset == PROCESSOR_SET_NULL || pset != sched_boot_pset) {
1535 		return KERN_INVALID_PROCESSOR_SET;
1536 	}
1537 
1538 	if (flavor == PROCESSOR_SET_LOAD_INFO) {
1539 		processor_set_load_info_t     load_info;
1540 
1541 		if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1542 			return KERN_FAILURE;
1543 		}
1544 
1545 		load_info = (processor_set_load_info_t) info;
1546 
1547 		load_info->mach_factor = sched_mach_factor;
1548 		load_info->load_average = sched_load_average;
1549 
1550 		load_info->task_count = tasks_count;
1551 		load_info->thread_count = threads_count;
1552 
1553 		*count = PROCESSOR_SET_LOAD_INFO_COUNT;
1554 		return KERN_SUCCESS;
1555 	}
1556 
1557 	return KERN_INVALID_ARGUMENT;
1558 }
1559 
1560 /*
1561  *	processor_set_things:
1562  *
1563  *	Common internals for processor_set_{threads,tasks}
1564  */
1565 static kern_return_t
processor_set_things(processor_set_t pset,mach_port_array_t * thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1566 processor_set_things(
1567 	processor_set_t         pset,
1568 	mach_port_array_t      *thing_list,
1569 	mach_msg_type_number_t *countp,
1570 	int                     type,
1571 	mach_task_flavor_t      flavor)
1572 {
1573 	unsigned int i;
1574 	task_t task;
1575 	thread_t thread;
1576 
1577 	mach_port_array_t task_addr;
1578 	task_t *task_list;
1579 	vm_size_t actual_tasks, task_count_cur, task_count_needed;
1580 
1581 	mach_port_array_t thread_addr;
1582 	thread_t *thread_list;
1583 	vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1584 
1585 	mach_port_array_t addr, newaddr;
1586 	vm_size_t count, count_needed;
1587 
1588 	if (pset == PROCESSOR_SET_NULL || pset != sched_boot_pset) {
1589 		return KERN_INVALID_ARGUMENT;
1590 	}
1591 
1592 	task_count_cur = 0;
1593 	task_count_needed = 0;
1594 	task_list = NULL;
1595 	task_addr = NULL;
1596 	actual_tasks = 0;
1597 
1598 	thread_count_cur = 0;
1599 	thread_count_needed = 0;
1600 	thread_list = NULL;
1601 	thread_addr = NULL;
1602 	actual_threads = 0;
1603 
1604 	for (;;) {
1605 		lck_mtx_lock(&tasks_threads_lock);
1606 
1607 		/* do we have the memory we need? */
1608 		if (type == PSET_THING_THREAD) {
1609 			thread_count_needed = threads_count;
1610 		}
1611 #if !CONFIG_MACF
1612 		else
1613 #endif
1614 		task_count_needed = tasks_count;
1615 
1616 		if (task_count_needed <= task_count_cur &&
1617 		    thread_count_needed <= thread_count_cur) {
1618 			break;
1619 		}
1620 
1621 		/* unlock and allocate more memory */
1622 		lck_mtx_unlock(&tasks_threads_lock);
1623 
1624 		/* grow task array */
1625 		if (task_count_needed > task_count_cur) {
1626 			mach_port_array_free(task_addr, task_count_cur);
1627 			assert(task_count_needed > 0);
1628 			task_count_cur = task_count_needed;
1629 
1630 			task_addr = mach_port_array_alloc(task_count_cur,
1631 			    Z_WAITOK | Z_ZERO);
1632 			if (task_addr == NULL) {
1633 				mach_port_array_free(thread_addr, thread_count_cur);
1634 				return KERN_RESOURCE_SHORTAGE;
1635 			}
1636 			task_list = (task_t *)task_addr;
1637 		}
1638 
1639 		/* grow thread array */
1640 		if (thread_count_needed > thread_count_cur) {
1641 			mach_port_array_free(thread_addr, thread_count_cur);
1642 			assert(thread_count_needed > 0);
1643 			thread_count_cur = thread_count_needed;
1644 
1645 			thread_addr = mach_port_array_alloc(thread_count_cur,
1646 			    Z_WAITOK | Z_ZERO);
1647 			if (thread_addr == NULL) {
1648 				mach_port_array_free(task_addr, task_count_cur);
1649 				return KERN_RESOURCE_SHORTAGE;
1650 			}
1651 			thread_list = (thread_t *)thread_addr;
1652 		}
1653 	}
1654 
1655 	/* OK, have memory and the list locked */
1656 
1657 	/* If we need it, get the thread list */
1658 	if (type == PSET_THING_THREAD) {
1659 		queue_iterate(&threads, thread, thread_t, threads) {
1660 			task = get_threadtask(thread);
1661 #if defined(SECURE_KERNEL)
1662 			if (task == kernel_task) {
1663 				/* skip threads belonging to kernel_task */
1664 				continue;
1665 			}
1666 #endif
1667 			if (!task->ipc_active || task_is_exec_copy(task)) {
1668 				/* skip threads in inactive tasks (in the middle of exec/fork/spawn) */
1669 				continue;
1670 			}
1671 
1672 			thread_reference(thread);
1673 			thread_list[actual_threads++] = thread;
1674 		}
1675 	}
1676 #if !CONFIG_MACF
1677 	else
1678 #endif
1679 	{
1680 		/* get a list of the tasks */
1681 		queue_iterate(&tasks, task, task_t, tasks) {
1682 #if defined(SECURE_KERNEL)
1683 			if (task == kernel_task) {
1684 				/* skip kernel_task */
1685 				continue;
1686 			}
1687 #endif
1688 			if (!task->ipc_active || task_is_exec_copy(task)) {
1689 				/* skip inactive tasks (in the middle of exec/fork/spawn) */
1690 				continue;
1691 			}
1692 
1693 			task_reference(task);
1694 			task_list[actual_tasks++] = task;
1695 		}
1696 	}
1697 
1698 	lck_mtx_unlock(&tasks_threads_lock);
1699 
1700 #if CONFIG_MACF
1701 	unsigned int j, used;
1702 
1703 	/* for each task, make sure we are allowed to examine it */
1704 	for (i = used = 0; i < actual_tasks; i++) {
1705 		if (mac_task_check_expose_task(task_list[i], flavor)) {
1706 			task_deallocate(task_list[i]);
1707 			continue;
1708 		}
1709 		task_list[used++] = task_list[i];
1710 	}
1711 	actual_tasks = used;
1712 	task_count_needed = actual_tasks;
1713 
1714 	if (type == PSET_THING_THREAD) {
1715 		/* for each thread (if any), make sure it's task is in the allowed list */
1716 		for (i = used = 0; i < actual_threads; i++) {
1717 			boolean_t found_task = FALSE;
1718 
1719 			task = get_threadtask(thread_list[i]);
1720 			for (j = 0; j < actual_tasks; j++) {
1721 				if (task_list[j] == task) {
1722 					found_task = TRUE;
1723 					break;
1724 				}
1725 			}
1726 			if (found_task) {
1727 				thread_list[used++] = thread_list[i];
1728 			} else {
1729 				thread_deallocate(thread_list[i]);
1730 			}
1731 		}
1732 		actual_threads = used;
1733 		thread_count_needed = actual_threads;
1734 
1735 		/* done with the task list */
1736 		for (i = 0; i < actual_tasks; i++) {
1737 			task_deallocate(task_list[i]);
1738 		}
1739 		mach_port_array_free(task_addr, task_count_cur);
1740 		task_list = NULL;
1741 		task_count_cur = 0;
1742 		actual_tasks = 0;
1743 	}
1744 #endif
1745 
1746 	if (type == PSET_THING_THREAD) {
1747 		if (actual_threads == 0) {
1748 			/* no threads available to return */
1749 			assert(task_count_cur == 0);
1750 			mach_port_array_free(thread_addr, thread_count_cur);
1751 			thread_list = NULL;
1752 			*thing_list = NULL;
1753 			*countp = 0;
1754 			return KERN_SUCCESS;
1755 		}
1756 		count_needed = actual_threads;
1757 		count = thread_count_cur;
1758 		addr = thread_addr;
1759 	} else {
1760 		if (actual_tasks == 0) {
1761 			/* no tasks available to return */
1762 			assert(thread_count_cur == 0);
1763 			mach_port_array_free(task_addr, task_count_cur);
1764 			*thing_list = NULL;
1765 			*countp = 0;
1766 			return KERN_SUCCESS;
1767 		}
1768 		count_needed = actual_tasks;
1769 		count = task_count_cur;
1770 		addr = task_addr;
1771 	}
1772 
1773 	/* if we allocated too much, must copy */
1774 	if (count_needed < count) {
1775 		newaddr = mach_port_array_alloc(count_needed, Z_WAITOK | Z_ZERO);
1776 		if (newaddr == NULL) {
1777 			for (i = 0; i < actual_tasks; i++) {
1778 				if (type == PSET_THING_THREAD) {
1779 					thread_deallocate(thread_list[i]);
1780 				} else {
1781 					task_deallocate(task_list[i]);
1782 				}
1783 			}
1784 			mach_port_array_free(addr, count);
1785 			return KERN_RESOURCE_SHORTAGE;
1786 		}
1787 
1788 		bcopy(addr, newaddr, count_needed * sizeof(void *));
1789 		mach_port_array_free(addr, count);
1790 
1791 		addr = newaddr;
1792 		count = count_needed;
1793 	}
1794 
1795 	*thing_list = addr;
1796 	*countp = (mach_msg_type_number_t)count;
1797 
1798 	return KERN_SUCCESS;
1799 }
1800 
1801 /*
1802  *	processor_set_tasks:
1803  *
1804  *	List all tasks in the processor set.
1805  */
1806 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1807 processor_set_tasks_internal(
1808 	processor_set_t         pset,
1809 	task_array_t            *task_list,
1810 	mach_msg_type_number_t  *count,
1811 	mach_task_flavor_t      flavor)
1812 {
1813 	kern_return_t ret;
1814 
1815 	ret = processor_set_things(pset, task_list, count, PSET_THING_TASK, flavor);
1816 	if (ret != KERN_SUCCESS) {
1817 		return ret;
1818 	}
1819 
1820 	/* do the conversion that Mig should handle */
1821 	convert_task_array_to_ports(*task_list, *count, flavor);
1822 	return KERN_SUCCESS;
1823 }
1824 
1825 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1826 processor_set_tasks(
1827 	processor_set_t         pset,
1828 	task_array_t            *task_list,
1829 	mach_msg_type_number_t  *count)
1830 {
1831 	return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1832 }
1833 
1834 /*
1835  *	processor_set_tasks_with_flavor:
1836  *
1837  *	Based on flavor, return task/inspect/read port to all tasks in the processor set.
1838  */
1839 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1840 processor_set_tasks_with_flavor(
1841 	processor_set_t         pset,
1842 	mach_task_flavor_t      flavor,
1843 	task_array_t            *task_list,
1844 	mach_msg_type_number_t  *count)
1845 {
1846 	switch (flavor) {
1847 	case TASK_FLAVOR_CONTROL:
1848 	case TASK_FLAVOR_READ:
1849 	case TASK_FLAVOR_INSPECT:
1850 	case TASK_FLAVOR_NAME:
1851 		return processor_set_tasks_internal(pset, task_list, count, flavor);
1852 	default:
1853 		return KERN_INVALID_ARGUMENT;
1854 	}
1855 }
1856 
1857 /*
1858  *	processor_set_threads:
1859  *
1860  *	List all threads in the processor set.
1861  */
1862 #if defined(SECURE_KERNEL)
1863 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_act_array_t * thread_list,__unused mach_msg_type_number_t * count)1864 processor_set_threads(
1865 	__unused processor_set_t         pset,
1866 	__unused thread_act_array_t     *thread_list,
1867 	__unused mach_msg_type_number_t *count)
1868 {
1869 	return KERN_FAILURE;
1870 }
1871 #elif !defined(XNU_TARGET_OS_OSX)
1872 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_act_array_t * thread_list,__unused mach_msg_type_number_t * count)1873 processor_set_threads(
1874 	__unused processor_set_t         pset,
1875 	__unused thread_act_array_t     *thread_list,
1876 	__unused mach_msg_type_number_t *count)
1877 {
1878 	return KERN_NOT_SUPPORTED;
1879 }
1880 #else
1881 kern_return_t
processor_set_threads(processor_set_t pset,thread_act_array_t * thread_list,mach_msg_type_number_t * count)1882 processor_set_threads(
1883 	processor_set_t         pset,
1884 	thread_act_array_t      *thread_list,
1885 	mach_msg_type_number_t  *count)
1886 {
1887 	kern_return_t ret;
1888 
1889 	ret = processor_set_things(pset, thread_list, count,
1890 	    PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1891 	if (ret != KERN_SUCCESS) {
1892 		return ret;
1893 	}
1894 
1895 	/* do the conversion that Mig should handle */
1896 	convert_thread_array_to_ports(*thread_list, *count, TASK_FLAVOR_CONTROL);
1897 	return KERN_SUCCESS;
1898 }
1899 #endif
1900 
1901 #endif /* !SCHED_TEST_HARNESS */
1902 
1903 pset_cluster_type_t
recommended_pset_type(thread_t thread)1904 recommended_pset_type(thread_t thread)
1905 {
1906 	/* Only used by the AMP scheduler policy */
1907 #if CONFIG_THREAD_GROUPS && __AMP__ && !CONFIG_SCHED_EDGE
1908 	if (thread == THREAD_NULL) {
1909 		return PSET_AMP_E;
1910 	}
1911 
1912 #if DEVELOPMENT || DEBUG
1913 	extern bool system_ecore_only;
1914 	extern int enable_task_set_cluster_type;
1915 	task_t task = get_threadtask(thread);
1916 	if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1917 		processor_set_t pset_hint = task->pset_hint;
1918 		if (pset_hint) {
1919 			return pset_hint->pset_cluster_type;
1920 		}
1921 	}
1922 
1923 	if (system_ecore_only) {
1924 		return PSET_AMP_E;
1925 	}
1926 #endif
1927 
1928 	if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1929 		return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1930 	}
1931 
1932 	if (thread->base_pri <= MAXPRI_THROTTLE) {
1933 		if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1934 			return PSET_AMP_E;
1935 		}
1936 	} else if (thread->base_pri <= BASEPRI_UTILITY) {
1937 		if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1938 			return PSET_AMP_E;
1939 		}
1940 	}
1941 
1942 	struct thread_group *tg = thread_group_get(thread);
1943 	cluster_type_t recommendation = thread_group_recommendation(tg);
1944 	switch (recommendation) {
1945 	case CLUSTER_TYPE_SMP:
1946 	default:
1947 		if (get_threadtask(thread) == kernel_task) {
1948 			return PSET_AMP_E;
1949 		}
1950 		return PSET_AMP_P;
1951 	case CLUSTER_TYPE_E:
1952 		return PSET_AMP_E;
1953 	case CLUSTER_TYPE_P:
1954 		return PSET_AMP_P;
1955 	}
1956 #else /* !CONFIG_THREAD_GROUPS || !__AMP__ || CONFIG_SCHED_EDGE */
1957 	(void)thread;
1958 	return PSET_SMP;
1959 #endif /* !CONFIG_THREAD_GROUPS || !__AMP__ || CONFIG_SCHED_EDGE */
1960 }
1961 
1962 #if __arm64__
1963 
1964 cluster_type_t
pset_cluster_type_to_cluster_type(pset_cluster_type_t pset_cluster_type)1965 pset_cluster_type_to_cluster_type(pset_cluster_type_t pset_cluster_type)
1966 {
1967 	switch (pset_cluster_type) {
1968 #if __AMP__
1969 	case PSET_AMP_E:
1970 		return CLUSTER_TYPE_E;
1971 	case PSET_AMP_P:
1972 		return CLUSTER_TYPE_P;
1973 #endif /* __AMP__ */
1974 	case PSET_SMP:
1975 		return CLUSTER_TYPE_SMP;
1976 	default:
1977 		panic("Unexpected pset cluster type %d", pset_cluster_type);
1978 	}
1979 }
1980 
1981 pset_cluster_type_t
cluster_type_to_pset_cluster_type(cluster_type_t cluster_type)1982 cluster_type_to_pset_cluster_type(cluster_type_t cluster_type)
1983 {
1984 	switch (cluster_type) {
1985 #if __AMP__
1986 	case CLUSTER_TYPE_E:
1987 		return PSET_AMP_E;
1988 	case CLUSTER_TYPE_P:
1989 		return PSET_AMP_P;
1990 #endif /* __AMP__ */
1991 	case CLUSTER_TYPE_SMP:
1992 		return PSET_SMP;
1993 	default:
1994 		panic("Unexpected cluster type %d", cluster_type);
1995 	}
1996 }
1997 
1998 #endif /* __arm64__ */
1999 
2000 #if CONFIG_THREAD_GROUPS && __AMP__ && !CONFIG_SCHED_EDGE
2001 
2002 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)2003 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
2004 {
2005 	sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
2006 
2007 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
2008 
2009 	switch (perfctl_class) {
2010 	case PERFCONTROL_CLASS_UTILITY:
2011 		os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
2012 		break;
2013 	case PERFCONTROL_CLASS_BACKGROUND:
2014 		os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
2015 		break;
2016 	default:
2017 		panic("perfctl_class invalid");
2018 		break;
2019 	}
2020 }
2021 
2022 #elif defined(__arm64__)
2023 
2024 /* Define a stub routine since this symbol is exported on all arm64 platforms */
2025 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)2026 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
2027 {
2028 }
2029 
2030 #endif /* defined(__arm64__) */
2031