xref: /xnu-8020.101.4/osfmk/kern/processor.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 /*
60  *	processor.c: processor and processor_set manipulation routines.
61  */
62 
63 #include <mach/boolean.h>
64 #include <mach/policy.h>
65 #include <mach/processor.h>
66 #include <mach/processor_info.h>
67 #include <mach/vm_param.h>
68 #include <kern/cpu_number.h>
69 #include <kern/host.h>
70 #include <kern/ipc_host.h>
71 #include <kern/ipc_tt.h>
72 #include <kern/kalloc.h>
73 #include <kern/machine.h>
74 #include <kern/misc_protos.h>
75 #include <kern/processor.h>
76 #include <kern/sched.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/timer.h>
80 #if KPERF
81 #include <kperf/kperf.h>
82 #endif /* KPERF */
83 #include <ipc/ipc_port.h>
84 
85 #include <security/mac_mach_internal.h>
86 
87 #if defined(CONFIG_XNUPOST)
88 
89 #include <tests/xnupost.h>
90 
91 #endif /* CONFIG_XNUPOST */
92 
93 /*
94  * Exported interface
95  */
96 #include <mach/mach_host_server.h>
97 #include <mach/processor_set_server.h>
98 #include <san/kcov.h>
99 
100 /*
101  * The first pset and the pset_node are created by default for all platforms.
102  * Those typically represent the boot-cluster. For AMP platforms, all clusters
103  * of the same type are part of the same pset_node. This allows for easier
104  * CPU selection logic.
105  */
106 struct processor_set    pset0;
107 struct pset_node        pset_node0;
108 
109 #if __AMP__
110 struct pset_node        pset_node1;
111 pset_node_t             ecore_node;
112 pset_node_t             pcore_node;
113 #endif
114 
115 LCK_SPIN_DECLARE(pset_node_lock, LCK_GRP_NULL);
116 
117 LCK_GRP_DECLARE(pset_lck_grp, "pset");
118 
119 queue_head_t            tasks;
120 queue_head_t            terminated_tasks;       /* To be used ONLY for stackshot. */
121 queue_head_t            corpse_tasks;
122 int                     tasks_count;
123 int                     terminated_tasks_count;
124 queue_head_t            threads;
125 queue_head_t            terminated_threads;
126 int                     threads_count;
127 int                     terminated_threads_count;
128 LCK_GRP_DECLARE(task_lck_grp, "task");
129 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
130 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
131 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
132 
133 processor_t             processor_list;
134 unsigned int            processor_count;
135 static processor_t      processor_list_tail;
136 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
137 
138 uint32_t                processor_avail_count;
139 uint32_t                processor_avail_count_user;
140 uint32_t                primary_processor_avail_count;
141 uint32_t                primary_processor_avail_count_user;
142 
143 SECURITY_READ_ONLY_LATE(int)    master_cpu = 0;
144 
145 struct processor        PERCPU_DATA(processor);
146 processor_t             processor_array[MAX_SCHED_CPUS] = { 0 };
147 processor_set_t         pset_array[MAX_PSETS] = { 0 };
148 
149 static timer_call_func_t running_timer_funcs[] = {
150 	[RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
151 	[RUNNING_TIMER_KPERF] = kperf_timer_expire,
152 };
153 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
154     == RUNNING_TIMER_MAX, "missing running timer function");
155 
156 #if defined(CONFIG_XNUPOST)
157 kern_return_t ipi_test(void);
158 extern void arm64_ipi_test(void);
159 
160 kern_return_t
ipi_test()161 ipi_test()
162 {
163 #if __arm64__
164 	processor_t p;
165 
166 	for (p = processor_list; p != NULL; p = p->processor_list) {
167 		thread_bind(p);
168 		thread_block(THREAD_CONTINUE_NULL);
169 		kprintf("Running IPI test on cpu %d\n", p->cpu_id);
170 		arm64_ipi_test();
171 	}
172 
173 	/* unbind thread from specific cpu */
174 	thread_bind(PROCESSOR_NULL);
175 	thread_block(THREAD_CONTINUE_NULL);
176 
177 	T_PASS("Done running IPI tests");
178 #else
179 	T_PASS("Unsupported platform. Not running IPI tests");
180 
181 #endif /* __arm64__ */
182 
183 	return KERN_SUCCESS;
184 }
185 #endif /* defined(CONFIG_XNUPOST) */
186 
187 int sched_enable_smt = 1;
188 
189 void
processor_bootstrap(void)190 processor_bootstrap(void)
191 {
192 	/* Initialize PSET node and PSET associated with boot cluster */
193 	pset_node0.psets = &pset0;
194 	pset_node0.pset_cluster_type = PSET_SMP;
195 
196 #if __AMP__
197 	const ml_topology_info_t *topology_info = ml_get_topology_info();
198 
199 	/*
200 	 * Since this is an AMP system, fill up cluster type and ID information; this should do the
201 	 * same kind of initialization done via ml_processor_register()
202 	 */
203 	ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
204 	pset0.pset_id = boot_cluster->cluster_id;
205 	pset0.pset_cluster_id = boot_cluster->cluster_id;
206 	if (boot_cluster->cluster_type == CLUSTER_TYPE_E) {
207 		pset0.pset_cluster_type      = PSET_AMP_E;
208 		pset_node0.pset_cluster_type = PSET_AMP_E;
209 		ecore_node = &pset_node0;
210 
211 		pset_node1.pset_cluster_type = PSET_AMP_P;
212 		pcore_node = &pset_node1;
213 	} else {
214 		pset0.pset_cluster_type      = PSET_AMP_P;
215 		pset_node0.pset_cluster_type = PSET_AMP_P;
216 		pcore_node = &pset_node0;
217 
218 		pset_node1.pset_cluster_type = PSET_AMP_E;
219 		ecore_node = &pset_node1;
220 	}
221 
222 	/* Link pset_node1 to pset_node0 */
223 	pset_node0.node_list = &pset_node1;
224 #endif
225 
226 	pset_init(&pset0, &pset_node0);
227 	queue_init(&tasks);
228 	queue_init(&terminated_tasks);
229 	queue_init(&threads);
230 	queue_init(&terminated_threads);
231 	queue_init(&corpse_tasks);
232 
233 	processor_init(master_processor, master_cpu, &pset0);
234 }
235 
236 /*
237  *	Initialize the given processor for the cpu
238  *	indicated by cpu_id, and assign to the
239  *	specified processor set.
240  */
241 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)242 processor_init(
243 	processor_t            processor,
244 	int                    cpu_id,
245 	processor_set_t        pset)
246 {
247 	spl_t           s;
248 
249 	assert(cpu_id < MAX_SCHED_CPUS);
250 	processor->cpu_id = cpu_id;
251 
252 	if (processor != master_processor) {
253 		/* Scheduler state for master_processor initialized in sched_init() */
254 		SCHED(processor_init)(processor);
255 	}
256 
257 	processor->state = PROCESSOR_OFF_LINE;
258 	processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
259 	processor->processor_set = pset;
260 	processor_state_update_idle(processor);
261 	processor->starting_pri = MINPRI;
262 	processor->quantum_end = UINT64_MAX;
263 	processor->deadline = UINT64_MAX;
264 	processor->first_timeslice = FALSE;
265 	processor->processor_offlined = false;
266 	processor->processor_primary = processor; /* no SMT relationship known at this point */
267 	processor->processor_secondary = NULL;
268 	processor->is_SMT = false;
269 	processor->is_recommended = true;
270 	processor->processor_self = IP_NULL;
271 	processor->processor_list = NULL;
272 	processor->must_idle = false;
273 	processor->running_timers_active = false;
274 	for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
275 		timer_call_setup(&processor->running_timers[i],
276 		    running_timer_funcs[i], processor);
277 		running_timer_clear(processor, i);
278 	}
279 
280 	timer_init(&processor->idle_state);
281 	timer_init(&processor->system_state);
282 	timer_init(&processor->user_state);
283 
284 	s = splsched();
285 	pset_lock(pset);
286 	bit_set(pset->cpu_bitmask, cpu_id);
287 	bit_set(pset->recommended_bitmask, cpu_id);
288 	bit_set(pset->primary_map, cpu_id);
289 	bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
290 	if (pset->cpu_set_count++ == 0) {
291 		pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
292 	} else {
293 		pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
294 		pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
295 	}
296 	pset_unlock(pset);
297 	splx(s);
298 
299 	simple_lock(&processor_list_lock, LCK_GRP_NULL);
300 	if (processor_list == NULL) {
301 		processor_list = processor;
302 	} else {
303 		processor_list_tail->processor_list = processor;
304 	}
305 	processor_list_tail = processor;
306 	processor_count++;
307 	simple_unlock(&processor_list_lock);
308 	processor_array[cpu_id] = processor;
309 }
310 
311 bool system_is_SMT = false;
312 
313 void
processor_set_primary(processor_t processor,processor_t primary)314 processor_set_primary(
315 	processor_t             processor,
316 	processor_t             primary)
317 {
318 	assert(processor->processor_primary == primary || processor->processor_primary == processor);
319 	/* Re-adjust primary point for this (possibly) secondary processor */
320 	processor->processor_primary = primary;
321 
322 	assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
323 	if (primary != processor) {
324 		/* Link primary to secondary, assumes a 2-way SMT model
325 		 * We'll need to move to a queue if any future architecture
326 		 * requires otherwise.
327 		 */
328 		assert(processor->processor_secondary == NULL);
329 		primary->processor_secondary = processor;
330 		/* Mark both processors as SMT siblings */
331 		primary->is_SMT = TRUE;
332 		processor->is_SMT = TRUE;
333 
334 		if (!system_is_SMT) {
335 			system_is_SMT = true;
336 			sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
337 		}
338 
339 		processor_set_t pset = processor->processor_set;
340 		spl_t s = splsched();
341 		pset_lock(pset);
342 		if (!pset->is_SMT) {
343 			pset->is_SMT = true;
344 		}
345 		bit_clear(pset->primary_map, processor->cpu_id);
346 		pset_unlock(pset);
347 		splx(s);
348 	}
349 }
350 
351 processor_set_t
processor_pset(processor_t processor)352 processor_pset(
353 	processor_t     processor)
354 {
355 	return processor->processor_set;
356 }
357 
358 #if CONFIG_SCHED_EDGE
359 
360 cluster_type_t
pset_type_for_id(uint32_t cluster_id)361 pset_type_for_id(uint32_t cluster_id)
362 {
363 	return pset_array[cluster_id]->pset_type;
364 }
365 
366 /*
367  * Processor foreign threads
368  *
369  * With the Edge scheduler, each pset maintains a bitmap of processors running threads
370  * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
371  * if its of a different type than its preferred cluster type (E/P). The bitmap should
372  * be updated every time a new thread is assigned to run on a processor. Cluster shared
373  * resource intensive threads are also not counted as foreign threads since these
374  * threads should not be rebalanced when running on non-preferred clusters.
375  *
376  * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
377  * for rebalancing.
378  */
379 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)380 processor_state_update_running_foreign(processor_t processor, thread_t thread)
381 {
382 	cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
383 	cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
384 
385 	boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
386 	boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
387 	if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
388 		bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
389 	} else {
390 		bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
391 	}
392 }
393 
394 /*
395  * Cluster shared resource intensive threads
396  *
397  * With the Edge scheduler, each pset maintains a bitmap of processors running
398  * threads that are shared resource intensive. This per-thread property is set
399  * by the performance controller or explicitly via dispatch SPIs. The bitmap
400  * allows the Edge scheduler to calculate the cluster shared resource load on
401  * any given cluster and load balance intensive threads accordingly.
402  */
403 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)404 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
405 {
406 	if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
407 		bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
408 	} else {
409 		bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
410 	}
411 	if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
412 		bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
413 	} else {
414 		bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
415 	}
416 }
417 
418 #endif /* CONFIG_SCHED_EDGE */
419 
420 void
processor_state_update_idle(processor_t processor)421 processor_state_update_idle(processor_t processor)
422 {
423 	processor->current_pri = IDLEPRI;
424 	processor->current_sfi_class = SFI_CLASS_KERNEL;
425 	processor->current_recommended_pset_type = PSET_SMP;
426 #if CONFIG_THREAD_GROUPS
427 	processor->current_thread_group = NULL;
428 #endif
429 	processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
430 	processor->current_urgency = THREAD_URGENCY_NONE;
431 	processor->current_is_NO_SMT = false;
432 	processor->current_is_bound = false;
433 	processor->current_is_eagerpreempt = false;
434 #if CONFIG_SCHED_EDGE
435 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
436 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
437 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
438 #endif /* CONFIG_SCHED_EDGE */
439 	sched_update_pset_load_average(processor->processor_set, 0);
440 }
441 
442 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)443 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
444 {
445 	processor->current_pri = thread->sched_pri;
446 	processor->current_sfi_class = thread->sfi_class;
447 	processor->current_recommended_pset_type = recommended_pset_type(thread);
448 #if CONFIG_SCHED_EDGE
449 	processor_state_update_running_foreign(processor, thread);
450 	processor_state_update_running_cluster_shared_rsrc(processor, thread);
451 	/* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
452 	sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
453 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
454 #endif /* CONFIG_SCHED_EDGE */
455 
456 #if CONFIG_THREAD_GROUPS
457 	processor->current_thread_group = thread_group_get(thread);
458 #endif
459 	processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
460 	processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
461 	processor->current_is_NO_SMT = thread_no_smt(thread);
462 	processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
463 	processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
464 	if (pset_lock_held) {
465 		/* Only update the pset load average when the pset lock is held */
466 		sched_update_pset_load_average(processor->processor_set, 0);
467 	}
468 }
469 
470 void
processor_state_update_explicit(processor_t processor,int pri,sfi_class_id_t sfi_class,pset_cluster_type_t pset_type,perfcontrol_class_t perfctl_class,thread_urgency_t urgency,__unused sched_bucket_t bucket)471 processor_state_update_explicit(processor_t processor, int pri, sfi_class_id_t sfi_class,
472     pset_cluster_type_t pset_type, perfcontrol_class_t perfctl_class, thread_urgency_t urgency, __unused sched_bucket_t bucket)
473 {
474 	processor->current_pri = pri;
475 	processor->current_sfi_class = sfi_class;
476 	processor->current_recommended_pset_type = pset_type;
477 	processor->current_perfctl_class = perfctl_class;
478 	processor->current_urgency = urgency;
479 #if CONFIG_SCHED_EDGE
480 	os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
481 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
482 	bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
483 #endif /* CONFIG_SCHED_EDGE */
484 }
485 
486 pset_node_t
pset_node_root(void)487 pset_node_root(void)
488 {
489 	return &pset_node0;
490 }
491 
492 processor_set_t
pset_create(pset_node_t node,pset_cluster_type_t pset_type,uint32_t pset_cluster_id,int pset_id)493 pset_create(
494 	pset_node_t node,
495 	pset_cluster_type_t pset_type,
496 	uint32_t pset_cluster_id,
497 	int      pset_id)
498 {
499 	/* some schedulers do not support multiple psets */
500 	if (SCHED(multiple_psets_enabled) == FALSE) {
501 		return processor_pset(master_processor);
502 	}
503 
504 	processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
505 
506 	if (pset != PROCESSOR_SET_NULL) {
507 		pset->pset_cluster_type = pset_type;
508 		pset->pset_cluster_id = pset_cluster_id;
509 		pset->pset_id = pset_id;
510 		pset_init(pset, node);
511 
512 		lck_spin_lock(&pset_node_lock);
513 
514 		prev = &node->psets;
515 		while (*prev != PROCESSOR_SET_NULL) {
516 			prev = &(*prev)->pset_list;
517 		}
518 
519 		*prev = pset;
520 
521 		lck_spin_unlock(&pset_node_lock);
522 	}
523 
524 	return pset;
525 }
526 
527 /*
528  *	Find processor set with specified cluster_id.
529  *	Returns default_pset if not found.
530  */
531 processor_set_t
pset_find(uint32_t cluster_id,processor_set_t default_pset)532 pset_find(
533 	uint32_t cluster_id,
534 	processor_set_t default_pset)
535 {
536 	lck_spin_lock(&pset_node_lock);
537 	pset_node_t node = &pset_node0;
538 	processor_set_t pset = NULL;
539 
540 	do {
541 		pset = node->psets;
542 		while (pset != NULL) {
543 			if (pset->pset_cluster_id == cluster_id) {
544 				break;
545 			}
546 			pset = pset->pset_list;
547 		}
548 	} while (pset == NULL && (node = node->node_list) != NULL);
549 	lck_spin_unlock(&pset_node_lock);
550 	if (pset == NULL) {
551 		return default_pset;
552 	}
553 	return pset;
554 }
555 
556 /*
557  *	Initialize the given processor_set structure.
558  */
559 void
pset_init(processor_set_t pset,pset_node_t node)560 pset_init(
561 	processor_set_t         pset,
562 	pset_node_t                     node)
563 {
564 	pset->online_processor_count = 0;
565 	pset->load_average = 0;
566 	bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
567 	pset->cpu_set_low = pset->cpu_set_hi = 0;
568 	pset->cpu_set_count = 0;
569 	pset->last_chosen = -1;
570 	pset->cpu_bitmask = 0;
571 	pset->recommended_bitmask = 0;
572 	pset->primary_map = 0;
573 	pset->realtime_map = 0;
574 
575 	for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
576 		pset->cpu_state_map[i] = 0;
577 	}
578 	pset->pending_AST_URGENT_cpu_mask = 0;
579 	pset->pending_AST_PREEMPT_cpu_mask = 0;
580 #if defined(CONFIG_SCHED_DEFERRED_AST)
581 	pset->pending_deferred_AST_cpu_mask = 0;
582 #endif
583 	pset->pending_spill_cpu_mask = 0;
584 	pset->rt_pending_spill_cpu_mask = 0;
585 	pset_lock_init(pset);
586 	pset->pset_self = IP_NULL;
587 	pset->pset_name_self = IP_NULL;
588 	pset->pset_list = PROCESSOR_SET_NULL;
589 	pset->is_SMT = false;
590 #if CONFIG_SCHED_EDGE
591 	bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
592 	pset->cpu_running_foreign = 0;
593 	for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
594 		pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
595 		pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
596 	}
597 #endif /* CONFIG_SCHED_EDGE */
598 	pset->stealable_rt_threads_earliest_deadline = UINT64_MAX;
599 
600 	if (pset != &pset0) {
601 		/*
602 		 * Scheduler runqueue initialization for non-boot psets.
603 		 * This initialization for pset0 happens in sched_init().
604 		 */
605 		SCHED(pset_init)(pset);
606 		SCHED(rt_init)(pset);
607 	}
608 	pset_array[pset->pset_id] = pset;
609 	lck_spin_lock(&pset_node_lock);
610 	bit_set(node->pset_map, pset->pset_id);
611 	pset->node = node;
612 	lck_spin_unlock(&pset_node_lock);
613 }
614 
615 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)616 processor_info_count(
617 	processor_flavor_t              flavor,
618 	mach_msg_type_number_t  *count)
619 {
620 	switch (flavor) {
621 	case PROCESSOR_BASIC_INFO:
622 		*count = PROCESSOR_BASIC_INFO_COUNT;
623 		break;
624 
625 	case PROCESSOR_CPU_LOAD_INFO:
626 		*count = PROCESSOR_CPU_LOAD_INFO_COUNT;
627 		break;
628 
629 	default:
630 		return cpu_info_count(flavor, count);
631 	}
632 
633 	return KERN_SUCCESS;
634 }
635 
636 
637 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)638 processor_info(
639 	processor_t     processor,
640 	processor_flavor_t              flavor,
641 	host_t                                  *host,
642 	processor_info_t                info,
643 	mach_msg_type_number_t  *count)
644 {
645 	int     cpu_id, state;
646 	kern_return_t   result;
647 
648 	if (processor == PROCESSOR_NULL) {
649 		return KERN_INVALID_ARGUMENT;
650 	}
651 
652 	cpu_id = processor->cpu_id;
653 
654 	switch (flavor) {
655 	case PROCESSOR_BASIC_INFO:
656 	{
657 		processor_basic_info_t          basic_info;
658 
659 		if (*count < PROCESSOR_BASIC_INFO_COUNT) {
660 			return KERN_FAILURE;
661 		}
662 
663 		basic_info = (processor_basic_info_t) info;
664 		basic_info->cpu_type = slot_type(cpu_id);
665 		basic_info->cpu_subtype = slot_subtype(cpu_id);
666 		state = processor->state;
667 		if (state == PROCESSOR_OFF_LINE
668 #if defined(__x86_64__)
669 		    || !processor->is_recommended
670 #endif
671 		    ) {
672 			basic_info->running = FALSE;
673 		} else {
674 			basic_info->running = TRUE;
675 		}
676 		basic_info->slot_num = cpu_id;
677 		if (processor == master_processor) {
678 			basic_info->is_master = TRUE;
679 		} else {
680 			basic_info->is_master = FALSE;
681 		}
682 
683 		*count = PROCESSOR_BASIC_INFO_COUNT;
684 		*host = &realhost;
685 
686 		return KERN_SUCCESS;
687 	}
688 
689 	case PROCESSOR_CPU_LOAD_INFO:
690 	{
691 		processor_cpu_load_info_t       cpu_load_info;
692 		timer_t         idle_state;
693 		uint64_t        idle_time_snapshot1, idle_time_snapshot2;
694 		uint64_t        idle_time_tstamp1, idle_time_tstamp2;
695 
696 		/*
697 		 * We capture the accumulated idle time twice over
698 		 * the course of this function, as well as the timestamps
699 		 * when each were last updated. Since these are
700 		 * all done using non-atomic racy mechanisms, the
701 		 * most we can infer is whether values are stable.
702 		 * timer_grab() is the only function that can be
703 		 * used reliably on another processor's per-processor
704 		 * data.
705 		 */
706 
707 		if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
708 			return KERN_FAILURE;
709 		}
710 
711 		cpu_load_info = (processor_cpu_load_info_t) info;
712 		if (precise_user_kernel_time) {
713 			cpu_load_info->cpu_ticks[CPU_STATE_USER] =
714 			    (uint32_t)(timer_grab(&processor->user_state) / hz_tick_interval);
715 			cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] =
716 			    (uint32_t)(timer_grab(&processor->system_state) / hz_tick_interval);
717 		} else {
718 			uint64_t tval = timer_grab(&processor->user_state) +
719 			    timer_grab(&processor->system_state);
720 
721 			cpu_load_info->cpu_ticks[CPU_STATE_USER] = (uint32_t)(tval / hz_tick_interval);
722 			cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
723 		}
724 
725 		idle_state = &processor->idle_state;
726 		idle_time_snapshot1 = timer_grab(idle_state);
727 		idle_time_tstamp1 = idle_state->tstamp;
728 
729 		/*
730 		 * Idle processors are not continually updating their
731 		 * per-processor idle timer, so it may be extremely
732 		 * out of date, resulting in an over-representation
733 		 * of non-idle time between two measurement
734 		 * intervals by e.g. top(1). If we are non-idle, or
735 		 * have evidence that the timer is being updated
736 		 * concurrently, we consider its value up-to-date.
737 		 */
738 		if (processor->current_state != idle_state) {
739 			cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
740 			    (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
741 		} else if ((idle_time_snapshot1 != (idle_time_snapshot2 = timer_grab(idle_state))) ||
742 		    (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))) {
743 			/* Idle timer is being updated concurrently, second stamp is good enough */
744 			cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
745 			    (uint32_t)(idle_time_snapshot2 / hz_tick_interval);
746 		} else {
747 			/*
748 			 * Idle timer may be very stale. Fortunately we have established
749 			 * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging
750 			 */
751 			idle_time_snapshot1 += mach_absolute_time() - idle_time_tstamp1;
752 
753 			cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
754 			    (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
755 		}
756 
757 		cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
758 
759 		*count = PROCESSOR_CPU_LOAD_INFO_COUNT;
760 		*host = &realhost;
761 
762 		return KERN_SUCCESS;
763 	}
764 
765 	default:
766 		result = cpu_info(flavor, cpu_id, info, count);
767 		if (result == KERN_SUCCESS) {
768 			*host = &realhost;
769 		}
770 
771 		return result;
772 	}
773 }
774 
775 kern_return_t
processor_start(processor_t processor)776 processor_start(
777 	processor_t                     processor)
778 {
779 	processor_set_t         pset;
780 	thread_t                        thread;
781 	kern_return_t           result;
782 	spl_t                           s;
783 
784 	if (processor == PROCESSOR_NULL || processor->processor_set == PROCESSOR_SET_NULL) {
785 		return KERN_INVALID_ARGUMENT;
786 	}
787 
788 	if (processor == master_processor) {
789 		processor_t             prev;
790 
791 		prev = thread_bind(processor);
792 		thread_block(THREAD_CONTINUE_NULL);
793 
794 		result = cpu_start(processor->cpu_id);
795 
796 		thread_bind(prev);
797 
798 		return result;
799 	}
800 
801 	bool scheduler_disable = false;
802 
803 	if ((processor->processor_primary != processor) && (sched_enable_smt == 0)) {
804 		if (cpu_can_exit(processor->cpu_id)) {
805 			return KERN_SUCCESS;
806 		}
807 		/*
808 		 * This secondary SMT processor must start in order to service interrupts,
809 		 * so instead it will be disabled at the scheduler level.
810 		 */
811 		scheduler_disable = true;
812 	}
813 
814 	ml_cpu_begin_state_transition(processor->cpu_id);
815 	s = splsched();
816 	pset = processor->processor_set;
817 	pset_lock(pset);
818 	if (processor->state != PROCESSOR_OFF_LINE) {
819 		pset_unlock(pset);
820 		splx(s);
821 		ml_cpu_end_state_transition(processor->cpu_id);
822 
823 		return KERN_FAILURE;
824 	}
825 
826 	pset_update_processor_state(pset, processor, PROCESSOR_START);
827 	pset_unlock(pset);
828 	splx(s);
829 
830 	/*
831 	 *	Create the idle processor thread.
832 	 */
833 	if (processor->idle_thread == THREAD_NULL) {
834 		result = idle_thread_create(processor);
835 		if (result != KERN_SUCCESS) {
836 			s = splsched();
837 			pset_lock(pset);
838 			pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
839 			pset_unlock(pset);
840 			splx(s);
841 			ml_cpu_end_state_transition(processor->cpu_id);
842 
843 			return result;
844 		}
845 	}
846 
847 	/*
848 	 *	If there is no active thread, the processor
849 	 *	has never been started.  Create a dedicated
850 	 *	start up thread.
851 	 */
852 	if (processor->active_thread == THREAD_NULL &&
853 	    processor->startup_thread == THREAD_NULL) {
854 		result = kernel_thread_create(processor_start_thread, NULL, MAXPRI_KERNEL, &thread);
855 		if (result != KERN_SUCCESS) {
856 			s = splsched();
857 			pset_lock(pset);
858 			pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
859 			pset_unlock(pset);
860 			splx(s);
861 			ml_cpu_end_state_transition(processor->cpu_id);
862 
863 			return result;
864 		}
865 
866 		s = splsched();
867 		thread_lock(thread);
868 		thread->bound_processor = processor;
869 		processor->startup_thread = thread;
870 		thread->state = TH_RUN;
871 		thread->last_made_runnable_time = thread->last_basepri_change_time = mach_absolute_time();
872 		thread_unlock(thread);
873 		splx(s);
874 
875 		thread_deallocate(thread);
876 	}
877 
878 	if (processor->processor_self == IP_NULL) {
879 		ipc_processor_init(processor);
880 	}
881 
882 	ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
883 	result = cpu_start(processor->cpu_id);
884 	if (result != KERN_SUCCESS) {
885 		s = splsched();
886 		pset_lock(pset);
887 		pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
888 		pset_unlock(pset);
889 		splx(s);
890 		ml_cpu_end_state_transition(processor->cpu_id);
891 
892 		return result;
893 	}
894 	if (scheduler_disable) {
895 		assert(processor->processor_primary != processor);
896 		sched_processor_enable(processor, FALSE);
897 	}
898 
899 	ml_cpu_end_state_transition(processor->cpu_id);
900 	ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
901 
902 #if CONFIG_KCOV
903 	kcov_start_cpu(processor->cpu_id);
904 #endif
905 
906 	return KERN_SUCCESS;
907 }
908 
909 
910 kern_return_t
processor_exit(processor_t processor)911 processor_exit(
912 	processor_t     processor)
913 {
914 	if (processor == PROCESSOR_NULL) {
915 		return KERN_INVALID_ARGUMENT;
916 	}
917 
918 	return processor_shutdown(processor);
919 }
920 
921 
922 kern_return_t
processor_start_from_user(processor_t processor)923 processor_start_from_user(
924 	processor_t                     processor)
925 {
926 	kern_return_t ret;
927 
928 	if (processor == PROCESSOR_NULL) {
929 		return KERN_INVALID_ARGUMENT;
930 	}
931 
932 	if (!cpu_can_exit(processor->cpu_id)) {
933 		ret = sched_processor_enable(processor, TRUE);
934 	} else {
935 		ret = processor_start(processor);
936 	}
937 
938 	return ret;
939 }
940 
941 kern_return_t
processor_exit_from_user(processor_t processor)942 processor_exit_from_user(
943 	processor_t     processor)
944 {
945 	kern_return_t ret;
946 
947 	if (processor == PROCESSOR_NULL) {
948 		return KERN_INVALID_ARGUMENT;
949 	}
950 
951 	if (!cpu_can_exit(processor->cpu_id)) {
952 		ret = sched_processor_enable(processor, FALSE);
953 	} else {
954 		ret = processor_shutdown(processor);
955 	}
956 
957 	return ret;
958 }
959 
960 kern_return_t
enable_smt_processors(bool enable)961 enable_smt_processors(bool enable)
962 {
963 	if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
964 		/* Not an SMT system */
965 		return KERN_INVALID_ARGUMENT;
966 	}
967 
968 	int ncpus = machine_info.logical_cpu_max;
969 
970 	for (int i = 1; i < ncpus; i++) {
971 		processor_t processor = processor_array[i];
972 
973 		if (processor->processor_primary != processor) {
974 			if (enable) {
975 				processor_start_from_user(processor);
976 			} else { /* Disable */
977 				processor_exit_from_user(processor);
978 			}
979 		}
980 	}
981 
982 #define BSD_HOST 1
983 	host_basic_info_data_t hinfo;
984 	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
985 	kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
986 	if (kret != KERN_SUCCESS) {
987 		return kret;
988 	}
989 
990 	if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
991 		return KERN_FAILURE;
992 	}
993 
994 	if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
995 		return KERN_FAILURE;
996 	}
997 
998 	return KERN_SUCCESS;
999 }
1000 
1001 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1002 processor_control(
1003 	processor_t             processor,
1004 	processor_info_t        info,
1005 	mach_msg_type_number_t  count)
1006 {
1007 	if (processor == PROCESSOR_NULL) {
1008 		return KERN_INVALID_ARGUMENT;
1009 	}
1010 
1011 	return cpu_control(processor->cpu_id, info, count);
1012 }
1013 
1014 kern_return_t
processor_set_create(__unused host_t host,__unused processor_set_t * new_set,__unused processor_set_t * new_name)1015 processor_set_create(
1016 	__unused host_t         host,
1017 	__unused processor_set_t        *new_set,
1018 	__unused processor_set_t        *new_name)
1019 {
1020 	return KERN_FAILURE;
1021 }
1022 
1023 kern_return_t
processor_set_destroy(__unused processor_set_t pset)1024 processor_set_destroy(
1025 	__unused processor_set_t        pset)
1026 {
1027 	return KERN_FAILURE;
1028 }
1029 
1030 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1031 processor_get_assignment(
1032 	processor_t     processor,
1033 	processor_set_t *pset)
1034 {
1035 	int state;
1036 
1037 	if (processor == PROCESSOR_NULL) {
1038 		return KERN_INVALID_ARGUMENT;
1039 	}
1040 
1041 	state = processor->state;
1042 	if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE) {
1043 		return KERN_FAILURE;
1044 	}
1045 
1046 	*pset = &pset0;
1047 
1048 	return KERN_SUCCESS;
1049 }
1050 
1051 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1052 processor_set_info(
1053 	processor_set_t         pset,
1054 	int                     flavor,
1055 	host_t                  *host,
1056 	processor_set_info_t    info,
1057 	mach_msg_type_number_t  *count)
1058 {
1059 	if (pset == PROCESSOR_SET_NULL) {
1060 		return KERN_INVALID_ARGUMENT;
1061 	}
1062 
1063 	if (flavor == PROCESSOR_SET_BASIC_INFO) {
1064 		processor_set_basic_info_t      basic_info;
1065 
1066 		if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1067 			return KERN_FAILURE;
1068 		}
1069 
1070 		basic_info = (processor_set_basic_info_t) info;
1071 #if defined(__x86_64__)
1072 		basic_info->processor_count = processor_avail_count_user;
1073 #else
1074 		basic_info->processor_count = processor_avail_count;
1075 #endif
1076 		basic_info->default_policy = POLICY_TIMESHARE;
1077 
1078 		*count = PROCESSOR_SET_BASIC_INFO_COUNT;
1079 		*host = &realhost;
1080 		return KERN_SUCCESS;
1081 	} else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1082 		policy_timeshare_base_t ts_base;
1083 
1084 		if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1085 			return KERN_FAILURE;
1086 		}
1087 
1088 		ts_base = (policy_timeshare_base_t) info;
1089 		ts_base->base_priority = BASEPRI_DEFAULT;
1090 
1091 		*count = POLICY_TIMESHARE_BASE_COUNT;
1092 		*host = &realhost;
1093 		return KERN_SUCCESS;
1094 	} else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1095 		policy_fifo_base_t              fifo_base;
1096 
1097 		if (*count < POLICY_FIFO_BASE_COUNT) {
1098 			return KERN_FAILURE;
1099 		}
1100 
1101 		fifo_base = (policy_fifo_base_t) info;
1102 		fifo_base->base_priority = BASEPRI_DEFAULT;
1103 
1104 		*count = POLICY_FIFO_BASE_COUNT;
1105 		*host = &realhost;
1106 		return KERN_SUCCESS;
1107 	} else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1108 		policy_rr_base_t                rr_base;
1109 
1110 		if (*count < POLICY_RR_BASE_COUNT) {
1111 			return KERN_FAILURE;
1112 		}
1113 
1114 		rr_base = (policy_rr_base_t) info;
1115 		rr_base->base_priority = BASEPRI_DEFAULT;
1116 		rr_base->quantum = 1;
1117 
1118 		*count = POLICY_RR_BASE_COUNT;
1119 		*host = &realhost;
1120 		return KERN_SUCCESS;
1121 	} else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1122 		policy_timeshare_limit_t        ts_limit;
1123 
1124 		if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1125 			return KERN_FAILURE;
1126 		}
1127 
1128 		ts_limit = (policy_timeshare_limit_t) info;
1129 		ts_limit->max_priority = MAXPRI_KERNEL;
1130 
1131 		*count = POLICY_TIMESHARE_LIMIT_COUNT;
1132 		*host = &realhost;
1133 		return KERN_SUCCESS;
1134 	} else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1135 		policy_fifo_limit_t             fifo_limit;
1136 
1137 		if (*count < POLICY_FIFO_LIMIT_COUNT) {
1138 			return KERN_FAILURE;
1139 		}
1140 
1141 		fifo_limit = (policy_fifo_limit_t) info;
1142 		fifo_limit->max_priority = MAXPRI_KERNEL;
1143 
1144 		*count = POLICY_FIFO_LIMIT_COUNT;
1145 		*host = &realhost;
1146 		return KERN_SUCCESS;
1147 	} else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1148 		policy_rr_limit_t               rr_limit;
1149 
1150 		if (*count < POLICY_RR_LIMIT_COUNT) {
1151 			return KERN_FAILURE;
1152 		}
1153 
1154 		rr_limit = (policy_rr_limit_t) info;
1155 		rr_limit->max_priority = MAXPRI_KERNEL;
1156 
1157 		*count = POLICY_RR_LIMIT_COUNT;
1158 		*host = &realhost;
1159 		return KERN_SUCCESS;
1160 	} else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1161 		int                             *enabled;
1162 
1163 		if (*count < (sizeof(*enabled) / sizeof(int))) {
1164 			return KERN_FAILURE;
1165 		}
1166 
1167 		enabled = (int *) info;
1168 		*enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1169 
1170 		*count = sizeof(*enabled) / sizeof(int);
1171 		*host = &realhost;
1172 		return KERN_SUCCESS;
1173 	}
1174 
1175 
1176 	*host = HOST_NULL;
1177 	return KERN_INVALID_ARGUMENT;
1178 }
1179 
1180 /*
1181  *	processor_set_statistics
1182  *
1183  *	Returns scheduling statistics for a processor set.
1184  */
1185 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1186 processor_set_statistics(
1187 	processor_set_t         pset,
1188 	int                     flavor,
1189 	processor_set_info_t    info,
1190 	mach_msg_type_number_t  *count)
1191 {
1192 	if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1193 		return KERN_INVALID_PROCESSOR_SET;
1194 	}
1195 
1196 	if (flavor == PROCESSOR_SET_LOAD_INFO) {
1197 		processor_set_load_info_t     load_info;
1198 
1199 		if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1200 			return KERN_FAILURE;
1201 		}
1202 
1203 		load_info = (processor_set_load_info_t) info;
1204 
1205 		load_info->mach_factor = sched_mach_factor;
1206 		load_info->load_average = sched_load_average;
1207 
1208 		load_info->task_count = tasks_count;
1209 		load_info->thread_count = threads_count;
1210 
1211 		*count = PROCESSOR_SET_LOAD_INFO_COUNT;
1212 		return KERN_SUCCESS;
1213 	}
1214 
1215 	return KERN_INVALID_ARGUMENT;
1216 }
1217 
1218 /*
1219  *	processor_set_max_priority:
1220  *
1221  *	Specify max priority permitted on processor set.  This affects
1222  *	newly created and assigned threads.  Optionally change existing
1223  *      ones.
1224  */
1225 kern_return_t
processor_set_max_priority(__unused processor_set_t pset,__unused int max_priority,__unused boolean_t change_threads)1226 processor_set_max_priority(
1227 	__unused processor_set_t        pset,
1228 	__unused int                    max_priority,
1229 	__unused boolean_t              change_threads)
1230 {
1231 	return KERN_INVALID_ARGUMENT;
1232 }
1233 
1234 /*
1235  *	processor_set_policy_enable:
1236  *
1237  *	Allow indicated policy on processor set.
1238  */
1239 
1240 kern_return_t
processor_set_policy_enable(__unused processor_set_t pset,__unused int policy)1241 processor_set_policy_enable(
1242 	__unused processor_set_t        pset,
1243 	__unused int                    policy)
1244 {
1245 	return KERN_INVALID_ARGUMENT;
1246 }
1247 
1248 /*
1249  *	processor_set_policy_disable:
1250  *
1251  *	Forbid indicated policy on processor set.  Time sharing cannot
1252  *	be forbidden.
1253  */
1254 kern_return_t
processor_set_policy_disable(__unused processor_set_t pset,__unused int policy,__unused boolean_t change_threads)1255 processor_set_policy_disable(
1256 	__unused processor_set_t        pset,
1257 	__unused int                    policy,
1258 	__unused boolean_t              change_threads)
1259 {
1260 	return KERN_INVALID_ARGUMENT;
1261 }
1262 
1263 /*
1264  *	processor_set_things:
1265  *
1266  *	Common internals for processor_set_{threads,tasks}
1267  */
1268 static kern_return_t
processor_set_things(processor_set_t pset,void ** thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1269 processor_set_things(
1270 	processor_set_t pset,
1271 	void **thing_list,
1272 	mach_msg_type_number_t *countp,
1273 	int type,
1274 	mach_task_flavor_t flavor)
1275 {
1276 	unsigned int i;
1277 	task_t task;
1278 	thread_t thread;
1279 
1280 	task_t *task_list;
1281 	vm_size_t actual_tasks, task_count_cur, task_count_needed;
1282 
1283 	thread_t *thread_list;
1284 	vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1285 
1286 	void *addr, *newaddr;
1287 	vm_size_t count, count_needed;
1288 
1289 	if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1290 		return KERN_INVALID_ARGUMENT;
1291 	}
1292 
1293 	task_count_cur = 0;
1294 	task_count_needed = 0;
1295 	task_list = NULL;
1296 	actual_tasks = 0;
1297 
1298 	thread_count_cur = 0;
1299 	thread_count_needed = 0;
1300 	thread_list = NULL;
1301 	actual_threads = 0;
1302 
1303 	for (;;) {
1304 		lck_mtx_lock(&tasks_threads_lock);
1305 
1306 		/* do we have the memory we need? */
1307 		if (type == PSET_THING_THREAD) {
1308 			thread_count_needed = threads_count;
1309 		}
1310 #if !CONFIG_MACF
1311 		else
1312 #endif
1313 		task_count_needed = tasks_count;
1314 
1315 		if (task_count_needed <= task_count_cur &&
1316 		    thread_count_needed <= thread_count_cur) {
1317 			break;
1318 		}
1319 
1320 		/* unlock and allocate more memory */
1321 		lck_mtx_unlock(&tasks_threads_lock);
1322 
1323 		/* grow task array */
1324 		if (task_count_needed > task_count_cur) {
1325 			kfree_type(task_t, task_count_cur, task_list);
1326 			assert(task_count_needed > 0);
1327 			task_count_cur = task_count_needed;
1328 
1329 			task_list = kalloc_type(task_t, task_count_cur, Z_WAITOK | Z_ZERO);
1330 			if (task_list == NULL) {
1331 				kfree_type(thread_t, thread_count_cur, thread_list);
1332 				return KERN_RESOURCE_SHORTAGE;
1333 			}
1334 		}
1335 
1336 		/* grow thread array */
1337 		if (thread_count_needed > thread_count_cur) {
1338 			kfree_type(thread_t, thread_count_cur, thread_list);
1339 
1340 			assert(thread_count_needed > 0);
1341 			thread_count_cur = thread_count_needed;
1342 
1343 			thread_list = kalloc_type(thread_t, thread_count_cur, Z_WAITOK | Z_ZERO);
1344 			if (thread_list == NULL) {
1345 				kfree_type(task_t, task_count_cur, task_list);
1346 				return KERN_RESOURCE_SHORTAGE;
1347 			}
1348 		}
1349 	}
1350 
1351 	/* OK, have memory and the list locked */
1352 
1353 	/* If we need it, get the thread list */
1354 	if (type == PSET_THING_THREAD) {
1355 		queue_iterate(&threads, thread, thread_t, threads) {
1356 			task = get_threadtask(thread);
1357 #if defined(SECURE_KERNEL)
1358 			if (task == kernel_task) {
1359 				/* skip threads belonging to kernel_task */
1360 				continue;
1361 			}
1362 #endif
1363 			if (task_is_exec_copy_internal(task)) {
1364 				/* skip threads belonging to tasks in the middle of exec */
1365 				continue;
1366 			}
1367 
1368 			thread_reference(thread);
1369 			thread_list[actual_threads++] = thread;
1370 		}
1371 	}
1372 #if !CONFIG_MACF
1373 	else
1374 #endif
1375 	{
1376 		/* get a list of the tasks */
1377 		queue_iterate(&tasks, task, task_t, tasks) {
1378 #if defined(SECURE_KERNEL)
1379 			if (task == kernel_task) {
1380 				/* skip kernel_task */
1381 				continue;
1382 			}
1383 #endif
1384 			if (task_is_exec_copy_internal(task)) {
1385 				/* skip new tasks created in the middle of exec */
1386 				continue;
1387 			}
1388 
1389 			task_reference(task);
1390 			task_list[actual_tasks++] = task;
1391 		}
1392 	}
1393 
1394 	lck_mtx_unlock(&tasks_threads_lock);
1395 
1396 #if CONFIG_MACF
1397 	unsigned int j, used;
1398 
1399 	/* for each task, make sure we are allowed to examine it */
1400 	for (i = used = 0; i < actual_tasks; i++) {
1401 		if (mac_task_check_expose_task(task_list[i], flavor)) {
1402 			task_deallocate(task_list[i]);
1403 			continue;
1404 		}
1405 		task_list[used++] = task_list[i];
1406 	}
1407 	actual_tasks = used;
1408 	task_count_needed = actual_tasks;
1409 
1410 	if (type == PSET_THING_THREAD) {
1411 		/* for each thread (if any), make sure it's task is in the allowed list */
1412 		for (i = used = 0; i < actual_threads; i++) {
1413 			boolean_t found_task = FALSE;
1414 
1415 			task = get_threadtask(thread_list[i]);
1416 			for (j = 0; j < actual_tasks; j++) {
1417 				if (task_list[j] == task) {
1418 					found_task = TRUE;
1419 					break;
1420 				}
1421 			}
1422 			if (found_task) {
1423 				thread_list[used++] = thread_list[i];
1424 			} else {
1425 				thread_deallocate(thread_list[i]);
1426 			}
1427 		}
1428 		actual_threads = used;
1429 		thread_count_needed = actual_threads;
1430 
1431 		/* done with the task list */
1432 		for (i = 0; i < actual_tasks; i++) {
1433 			task_deallocate(task_list[i]);
1434 		}
1435 		kfree_type(task_t, task_count_cur, task_list);
1436 		task_count_cur = 0;
1437 		actual_tasks = 0;
1438 		task_list = NULL;
1439 	}
1440 #endif
1441 
1442 	if (type == PSET_THING_THREAD) {
1443 		if (actual_threads == 0) {
1444 			/* no threads available to return */
1445 			assert(task_count_cur == 0);
1446 			kfree_type(thread_t, thread_count_cur, thread_list);
1447 			*thing_list = NULL;
1448 			*countp = 0;
1449 			return KERN_SUCCESS;
1450 		}
1451 		count_needed = actual_threads;
1452 		count = thread_count_cur;
1453 		addr = thread_list;
1454 	} else {
1455 		if (actual_tasks == 0) {
1456 			/* no tasks available to return */
1457 			assert(thread_count_cur == 0);
1458 			kfree_type(task_t, task_count_cur, task_list);
1459 			*thing_list = NULL;
1460 			*countp = 0;
1461 			return KERN_SUCCESS;
1462 		}
1463 		count_needed = actual_tasks;
1464 		count = task_count_cur;
1465 		addr = task_list;
1466 	}
1467 
1468 	/* if we allocated too much, must copy */
1469 	if (count_needed < count) {
1470 		newaddr = kalloc_type(void *, count_needed, Z_WAITOK | Z_ZERO);
1471 		if (newaddr == 0) {
1472 			for (i = 0; i < actual_tasks; i++) {
1473 				if (type == PSET_THING_THREAD) {
1474 					thread_deallocate(thread_list[i]);
1475 				} else {
1476 					task_deallocate(task_list[i]);
1477 				}
1478 			}
1479 			kfree_type(void *, count, addr);
1480 			return KERN_RESOURCE_SHORTAGE;
1481 		}
1482 
1483 		bcopy(addr, newaddr, count_needed * sizeof(void *));
1484 		kfree_type(void *, count, addr);
1485 
1486 		addr = newaddr;
1487 		count = count_needed;
1488 	}
1489 
1490 	*thing_list = (void **)addr;
1491 	*countp = (mach_msg_type_number_t)count;
1492 
1493 	return KERN_SUCCESS;
1494 }
1495 
1496 /*
1497  *	processor_set_tasks:
1498  *
1499  *	List all tasks in the processor set.
1500  */
1501 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1502 processor_set_tasks_internal(
1503 	processor_set_t         pset,
1504 	task_array_t            *task_list,
1505 	mach_msg_type_number_t  *count,
1506 	mach_task_flavor_t      flavor)
1507 {
1508 	kern_return_t ret;
1509 	mach_msg_type_number_t i;
1510 
1511 	ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK, flavor);
1512 	if (ret != KERN_SUCCESS) {
1513 		return ret;
1514 	}
1515 
1516 	/* do the conversion that Mig should handle */
1517 	switch (flavor) {
1518 	case TASK_FLAVOR_CONTROL:
1519 		for (i = 0; i < *count; i++) {
1520 			if ((*task_list)[i] == current_task()) {
1521 				/* if current_task(), return pinned port */
1522 				(*task_list)[i] = (task_t)convert_task_to_port_pinned((*task_list)[i]);
1523 			} else {
1524 				(*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
1525 			}
1526 		}
1527 		break;
1528 	case TASK_FLAVOR_READ:
1529 		for (i = 0; i < *count; i++) {
1530 			(*task_list)[i] = (task_t)convert_task_read_to_port((*task_list)[i]);
1531 		}
1532 		break;
1533 	case TASK_FLAVOR_INSPECT:
1534 		for (i = 0; i < *count; i++) {
1535 			(*task_list)[i] = (task_t)convert_task_inspect_to_port((*task_list)[i]);
1536 		}
1537 		break;
1538 	case TASK_FLAVOR_NAME:
1539 		for (i = 0; i < *count; i++) {
1540 			(*task_list)[i] = (task_t)convert_task_name_to_port((*task_list)[i]);
1541 		}
1542 		break;
1543 	default:
1544 		return KERN_INVALID_ARGUMENT;
1545 	}
1546 
1547 	return KERN_SUCCESS;
1548 }
1549 
1550 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1551 processor_set_tasks(
1552 	processor_set_t         pset,
1553 	task_array_t            *task_list,
1554 	mach_msg_type_number_t  *count)
1555 {
1556 	return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1557 }
1558 
1559 /*
1560  *	processor_set_tasks_with_flavor:
1561  *
1562  *	Based on flavor, return task/inspect/read port to all tasks in the processor set.
1563  */
1564 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1565 processor_set_tasks_with_flavor(
1566 	processor_set_t         pset,
1567 	mach_task_flavor_t      flavor,
1568 	task_array_t            *task_list,
1569 	mach_msg_type_number_t  *count)
1570 {
1571 	switch (flavor) {
1572 	case TASK_FLAVOR_CONTROL:
1573 	case TASK_FLAVOR_READ:
1574 	case TASK_FLAVOR_INSPECT:
1575 	case TASK_FLAVOR_NAME:
1576 		return processor_set_tasks_internal(pset, task_list, count, flavor);
1577 	default:
1578 		return KERN_INVALID_ARGUMENT;
1579 	}
1580 }
1581 
1582 /*
1583  *	processor_set_threads:
1584  *
1585  *	List all threads in the processor set.
1586  */
1587 #if defined(SECURE_KERNEL)
1588 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1589 processor_set_threads(
1590 	__unused processor_set_t                pset,
1591 	__unused thread_array_t         *thread_list,
1592 	__unused mach_msg_type_number_t *count)
1593 {
1594 	return KERN_FAILURE;
1595 }
1596 #elif !defined(XNU_TARGET_OS_OSX)
1597 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1598 processor_set_threads(
1599 	__unused processor_set_t                pset,
1600 	__unused thread_array_t         *thread_list,
1601 	__unused mach_msg_type_number_t *count)
1602 {
1603 	return KERN_NOT_SUPPORTED;
1604 }
1605 #else
1606 kern_return_t
processor_set_threads(processor_set_t pset,thread_array_t * thread_list,mach_msg_type_number_t * count)1607 processor_set_threads(
1608 	processor_set_t         pset,
1609 	thread_array_t          *thread_list,
1610 	mach_msg_type_number_t  *count)
1611 {
1612 	kern_return_t ret;
1613 	mach_msg_type_number_t i;
1614 
1615 	ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1616 	if (ret != KERN_SUCCESS) {
1617 		return ret;
1618 	}
1619 
1620 	/* do the conversion that Mig should handle */
1621 	for (i = 0; i < *count; i++) {
1622 		(*thread_list)[i] = (thread_t)convert_thread_to_port((*thread_list)[i]);
1623 	}
1624 	return KERN_SUCCESS;
1625 }
1626 #endif
1627 
1628 /*
1629  *	processor_set_policy_control
1630  *
1631  *	Controls the scheduling attributes governing the processor set.
1632  *	Allows control of enabled policies, and per-policy base and limit
1633  *	priorities.
1634  */
1635 kern_return_t
processor_set_policy_control(__unused processor_set_t pset,__unused int flavor,__unused processor_set_info_t policy_info,__unused mach_msg_type_number_t count,__unused boolean_t change)1636 processor_set_policy_control(
1637 	__unused processor_set_t                pset,
1638 	__unused int                            flavor,
1639 	__unused processor_set_info_t   policy_info,
1640 	__unused mach_msg_type_number_t count,
1641 	__unused boolean_t                      change)
1642 {
1643 	return KERN_INVALID_ARGUMENT;
1644 }
1645 
1646 #undef pset_deallocate
1647 void pset_deallocate(processor_set_t pset);
1648 void
pset_deallocate(__unused processor_set_t pset)1649 pset_deallocate(
1650 	__unused processor_set_t        pset)
1651 {
1652 	return;
1653 }
1654 
1655 #undef pset_reference
1656 void pset_reference(processor_set_t pset);
1657 void
pset_reference(__unused processor_set_t pset)1658 pset_reference(
1659 	__unused processor_set_t        pset)
1660 {
1661 	return;
1662 }
1663 
1664 #if CONFIG_THREAD_GROUPS
1665 
1666 pset_cluster_type_t
thread_group_pset_recommendation(__unused struct thread_group * tg,__unused cluster_type_t recommendation)1667 thread_group_pset_recommendation(__unused struct thread_group *tg, __unused cluster_type_t recommendation)
1668 {
1669 #if __AMP__
1670 	switch (recommendation) {
1671 	case CLUSTER_TYPE_SMP:
1672 	default:
1673 		/*
1674 		 * In case of SMP recommendations, check if the thread
1675 		 * group has special flags which restrict it to the E
1676 		 * cluster.
1677 		 */
1678 		if (thread_group_smp_restricted(tg)) {
1679 			return PSET_AMP_E;
1680 		}
1681 		return PSET_AMP_P;
1682 	case CLUSTER_TYPE_E:
1683 		return PSET_AMP_E;
1684 	case CLUSTER_TYPE_P:
1685 		return PSET_AMP_P;
1686 	}
1687 #else /* __AMP__ */
1688 	return PSET_SMP;
1689 #endif /* __AMP__ */
1690 }
1691 
1692 #endif
1693 
1694 pset_cluster_type_t
recommended_pset_type(thread_t thread)1695 recommended_pset_type(thread_t thread)
1696 {
1697 #if CONFIG_THREAD_GROUPS && __AMP__
1698 	if (thread == THREAD_NULL) {
1699 		return PSET_AMP_E;
1700 	}
1701 
1702 #if DEVELOPMENT || DEBUG
1703 	extern bool system_ecore_only;
1704 	extern int enable_task_set_cluster_type;
1705 	task_t task = get_threadtask(thread);
1706 	if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1707 		processor_set_t pset_hint = task->pset_hint;
1708 		if (pset_hint) {
1709 			return pset_hint->pset_cluster_type;
1710 		}
1711 	}
1712 
1713 	if (system_ecore_only) {
1714 		return PSET_AMP_E;
1715 	}
1716 #endif
1717 
1718 	if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1719 		return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1720 	}
1721 
1722 	if (thread->base_pri <= MAXPRI_THROTTLE) {
1723 		if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1724 			return PSET_AMP_E;
1725 		}
1726 	} else if (thread->base_pri <= BASEPRI_UTILITY) {
1727 		if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1728 			return PSET_AMP_E;
1729 		}
1730 	}
1731 
1732 	struct thread_group *tg = thread_group_get(thread);
1733 	cluster_type_t recommendation = thread_group_recommendation(tg);
1734 	switch (recommendation) {
1735 	case CLUSTER_TYPE_SMP:
1736 	default:
1737 		if (get_threadtask(thread) == kernel_task) {
1738 			return PSET_AMP_E;
1739 		}
1740 		return PSET_AMP_P;
1741 	case CLUSTER_TYPE_E:
1742 		return PSET_AMP_E;
1743 	case CLUSTER_TYPE_P:
1744 		return PSET_AMP_P;
1745 	}
1746 #else
1747 	(void)thread;
1748 	return PSET_SMP;
1749 #endif
1750 }
1751 
1752 #if CONFIG_THREAD_GROUPS && __AMP__
1753 
1754 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)1755 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1756 {
1757 	sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1758 
1759 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1760 
1761 	switch (perfctl_class) {
1762 	case PERFCONTROL_CLASS_UTILITY:
1763 		os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1764 		break;
1765 	case PERFCONTROL_CLASS_BACKGROUND:
1766 		os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1767 		break;
1768 	default:
1769 		panic("perfctl_class invalid");
1770 		break;
1771 	}
1772 }
1773 
1774 #elif defined(__arm64__)
1775 
1776 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1777 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)1778 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1779 {
1780 }
1781 
1782 #endif /* defined(__arm64__) */
1783