1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * processor.c: processor and processor_set manipulation routines.
61 */
62
63 #include <kern/processor.h>
64
65 #if !SCHED_TEST_HARNESS
66
67 #include <mach/boolean.h>
68 #include <mach/policy.h>
69 #include <mach/processor.h>
70 #include <mach/processor_info.h>
71 #include <mach/vm_param.h>
72 #include <kern/bits.h>
73 #include <kern/cpu_number.h>
74 #include <kern/host.h>
75 #include <kern/ipc_host.h>
76 #include <kern/ipc_tt.h>
77 #include <kern/kalloc.h>
78 #include <kern/kern_types.h>
79 #include <kern/machine.h>
80 #include <kern/misc_protos.h>
81 #include <kern/sched.h>
82 #include <kern/smr.h>
83 #include <kern/task.h>
84 #include <kern/thread.h>
85 #include <kern/timer.h>
86 #if KPERF
87 #include <kperf/kperf.h>
88 #endif /* KPERF */
89 #include <ipc/ipc_port.h>
90 #include <machine/commpage.h>
91
92 #include <security/mac_mach_internal.h>
93
94 #if defined(CONFIG_XNUPOST)
95
96 #include <tests/xnupost.h>
97
98 #endif /* CONFIG_XNUPOST */
99
100 /*
101 * Exported interface
102 */
103 #include <mach/mach_host_server.h>
104 #include <mach/processor_set_server.h>
105 #include <san/kcov.h>
106
107 #endif /* !SCHED_TEST_HARNESS */
108
109
110 #if __AMP__
111
112 /*
113 * For AMP platforms, all psets of the same type are part of
114 * the same pset_node. This allows for easier CPU selection logic.
115 */
116 struct pset_node pset_nodes[MAX_AMP_CLUSTER_TYPES];
117 static int next_pset_node_index = 1;
118 static pset_node_t pset_nodes_by_cluster_type[MAX_AMP_CLUSTER_TYPES];
119
120 static void
pset_node_set_for_pset_cluster_type(pset_node_t node,pset_cluster_type_t pset_cluster_type)121 pset_node_set_for_pset_cluster_type(pset_node_t node, pset_cluster_type_t pset_cluster_type)
122 {
123 assert3p(pset_nodes_by_cluster_type[pset_cluster_type - 1], ==, PSET_NODE_NULL);
124 pset_nodes_by_cluster_type[pset_cluster_type - 1] = node;
125 }
126
127 pset_node_t
pset_node_for_pset_cluster_type(pset_cluster_type_t pset_cluster_type)128 pset_node_for_pset_cluster_type(pset_cluster_type_t pset_cluster_type)
129 {
130 assert3u(pset_cluster_type, !=, PSET_SMP);
131 return os_atomic_load(&pset_nodes_by_cluster_type[pset_cluster_type - 1], acquire);
132 }
133
134 #else /* !__AMP__ */
135
136 /* The boot node */
137 struct pset_node pset_node0;
138
139 #endif /* !__AMP__ */
140
141 /* The boot pset */
142 SECURITY_READ_ONLY_LATE(processor_set_t) sched_boot_pset = PROCESSOR_SET_NULL;
143
144 #if !SCHED_TEST_HARNESS
145
146 LCK_GRP_DECLARE(pset_lck_grp, "pset");
147
148 queue_head_t tasks;
149 queue_head_t terminated_tasks; /* To be used ONLY for stackshot. */
150 queue_head_t corpse_tasks;
151 int tasks_count;
152 int terminated_tasks_count;
153 queue_head_t threads;
154 queue_head_t terminated_threads;
155 int threads_count;
156 int terminated_threads_count;
157 LCK_GRP_DECLARE(task_lck_grp, "task");
158 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
159 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
160 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
161
162 #endif /* !SCHED_TEST_HARNESS */
163
164 processor_t processor_list;
165 unsigned int processor_count;
166 static processor_t processor_list_tail;
167 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
168 SIMPLE_LOCK_DECLARE(processor_start_state_lock, 0);
169
170 uint32_t processor_avail_count;
171 uint32_t processor_avail_count_user;
172 #if CONFIG_SCHED_SMT
173 uint32_t primary_processor_avail_count_user;
174 #endif /* CONFIG_SCHED_SMT */
175
176 #if XNU_SUPPORT_BOOTCPU_SHUTDOWN
177 TUNABLE(bool, support_bootcpu_shutdown, "support_bootcpu_shutdown", true);
178 #else
179 TUNABLE(bool, support_bootcpu_shutdown, "support_bootcpu_shutdown", false);
180 #endif
181
182 #if __x86_64__ || XNU_ENABLE_PROCESSOR_EXIT
183 TUNABLE(bool, enable_processor_exit, "processor_exit", true);
184 #else
185 TUNABLE(bool, enable_processor_exit, "processor_exit", false);
186 #endif
187
188 SECURITY_READ_ONLY_LATE(int) master_cpu = 0;
189
190 processor_t processor_array[MAX_SCHED_CPUS] = { 0 };
191 processor_set_t pset_array[MAX_PSETS] = { 0 };
192 struct processor_set pset_array_actual[MAX_PSETS] = { 0 };
193
194 processor_set_t
pset_for_id_checked(pset_id_t id)195 pset_for_id_checked(pset_id_t id)
196 {
197 #if __AMP__
198 /* sched_num_psets only exists on AMP platforms, but it should be valid
199 * before accessing pset_array entries. */
200 assert3u(sched_num_psets, >, 0);
201 assert3u(sched_num_psets, <=, MAX_PSETS);
202 assert3u(id, <, sched_num_psets);
203 #else /* !__AMP__ */
204 assert3u(id, <, MAX_PSETS);
205 #endif /* __AMP__ */
206 assert(pset_array[id] != PROCESSOR_SET_NULL); /* check if pset is initialized */
207 return pset_for_id(id);
208 }
209
210 #if !SCHED_TEST_HARNESS
211
212 struct processor PERCPU_DATA(processor);
213 static timer_call_func_t running_timer_funcs[] = {
214 [RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
215 [RUNNING_TIMER_PREEMPT] = thread_preempt_expire,
216 [RUNNING_TIMER_KPERF] = kperf_timer_expire,
217 [RUNNING_TIMER_PERFCONTROL] = perfcontrol_timer_expire,
218 };
219 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
220 == RUNNING_TIMER_MAX, "missing running timer function");
221
222 #if defined(CONFIG_XNUPOST)
223 kern_return_t ipi_test(void);
224 extern void arm64_ipi_test(void);
225
226 kern_return_t
ipi_test()227 ipi_test()
228 {
229 #if __arm64__
230 processor_t p;
231
232 for (p = processor_list; p != NULL; p = p->processor_list) {
233 thread_bind(p);
234 thread_block(THREAD_CONTINUE_NULL);
235 kprintf("Running IPI test on cpu %d\n", p->cpu_id);
236 arm64_ipi_test();
237 }
238
239 /* unbind thread from specific cpu */
240 thread_bind(PROCESSOR_NULL);
241 thread_block(THREAD_CONTINUE_NULL);
242
243 T_PASS("Done running IPI tests");
244 #else
245 T_PASS("Unsupported platform. Not running IPI tests");
246
247 #endif /* __arm64__ */
248
249 return KERN_SUCCESS;
250 }
251 #endif /* defined(CONFIG_XNUPOST) */
252
253 int sched_enable_smt = 1;
254
255 #endif /* !SCHED_TEST_HARNESS */
256
257 cpumap_t processor_offline_state_map[PROCESSOR_OFFLINE_MAX];
258
259 void
processor_update_offline_state_locked(processor_t processor,processor_offline_state_t new_state)260 processor_update_offline_state_locked(processor_t processor,
261 processor_offline_state_t new_state)
262 {
263 simple_lock_assert(&sched_available_cores_lock, LCK_ASSERT_OWNED);
264
265 processor_offline_state_t old_state = processor->processor_offline_state;
266
267 uint cpuid = (uint)processor->cpu_id;
268
269 assert(old_state < PROCESSOR_OFFLINE_MAX);
270 assert(new_state < PROCESSOR_OFFLINE_MAX);
271
272 processor->processor_offline_state = new_state;
273
274 bit_clear(processor_offline_state_map[old_state], cpuid);
275 bit_set(processor_offline_state_map[new_state], cpuid);
276 }
277
278 void
processor_update_offline_state(processor_t processor,processor_offline_state_t new_state)279 processor_update_offline_state(processor_t processor,
280 processor_offline_state_t new_state)
281 {
282 spl_t s = splsched();
283 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
284 processor_update_offline_state_locked(processor, new_state);
285 simple_unlock(&sched_available_cores_lock);
286 splx(s);
287 }
288
289 void
processor_bootstrap(void)290 processor_bootstrap(void)
291 {
292 simple_lock_init(&sched_available_cores_lock, 0);
293 simple_lock_init(&processor_start_state_lock, 0);
294
295 /* Initialize boot pset and node */
296 #if __AMP__
297 /*
298 * Since this is an AMP system, fill up cluster type and ID information; this should do the
299 * same kind of initialization done via ml_processor_register()
300 */
301 const ml_topology_info_t *topology_info = ml_get_topology_info();
302 assert3u(topology_info->num_clusters, <=, MAX_PSETS);
303 sched_num_psets = (uint8_t)topology_info->num_clusters;
304 assert3u(sched_num_psets, >, 0);
305 assert3u(sched_num_psets, <=, MAX_PSETS);
306 ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
307 pset_cluster_type_t boot_cluster_type = cluster_type_to_pset_cluster_type(boot_cluster->cluster_type);
308 assert3u(boot_cluster->cluster_id, <, sched_num_psets);
309 sched_boot_pset = &pset_array_actual[boot_cluster->cluster_id]; /* makes sched_boot_pset work */
310 sched_boot_pset->pset_id = boot_cluster->cluster_id;
311 sched_boot_pset->pset_cluster_id = boot_cluster->cluster_id;
312 pset_node0.pset_cluster_type = boot_cluster_type;
313 sched_boot_pset->pset_cluster_type = boot_cluster_type;
314 pset_node_set_for_pset_cluster_type(&pset_node0, boot_cluster_type);
315 #else /* !__AMP__ */
316 sched_boot_pset = &pset_array_actual[0]; /* makes sched_boot_pset work */
317 sched_boot_pset->pset_id = 0;
318 sched_boot_pset->pset_cluster_id = 0;
319 pset_node0.pset_cluster_type = PSET_SMP;
320 sched_boot_pset->pset_cluster_type = PSET_SMP;
321 #endif /* !__AMP__ */
322
323 pset_init(sched_boot_pset, &pset_node0);
324 #if !SCHED_TEST_HARNESS
325 queue_init(&tasks);
326 queue_init(&terminated_tasks);
327 queue_init(&threads);
328 queue_init(&terminated_threads);
329 queue_init(&corpse_tasks);
330 #endif /* !SCHED_TEST_HARNESS */
331
332 processor_init(master_processor, master_cpu, sched_boot_pset);
333 }
334
335 /*
336 * Initialize the given processor for the cpu
337 * indicated by cpu_id, and assign to the
338 * specified processor set.
339 */
340 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)341 processor_init(
342 processor_t processor,
343 int cpu_id,
344 processor_set_t pset)
345 {
346 spl_t s;
347
348 assert(cpu_id < MAX_SCHED_CPUS);
349 processor->cpu_id = cpu_id;
350
351 if (processor != master_processor) {
352 /* Scheduler state for master_processor initialized in sched_init() */
353 SCHED(processor_init)(processor);
354 smr_cpu_init(processor);
355 }
356
357 processor->state = PROCESSOR_OFF_LINE;
358 processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
359 processor->processor_set = pset;
360 processor_state_update_idle(processor);
361 processor->starting_pri = MINPRI;
362 processor->quantum_end = UINT64_MAX;
363 processor->deadline = UINT64_MAX;
364 processor->first_timeslice = FALSE;
365 processor->processor_online = false;
366 #if CONFIG_SCHED_SMT
367 processor->processor_primary = processor; /* no SMT relationship known at this point */
368 processor->processor_secondary = NULL;
369 processor->is_SMT = false;
370 #endif /* CONFIG_SCHED_SMT */
371 processor->processor_self = IP_NULL;
372 processor->processor_list = NULL;
373 processor->must_idle = false;
374 processor->next_idle_short = false;
375 processor->last_startup_reason = REASON_SYSTEM;
376 processor->last_shutdown_reason = REASON_NONE;
377 processor->shutdown_temporary = false;
378 processor->processor_inshutdown = false;
379 processor->processor_instartup = false;
380 processor->last_derecommend_reason = REASON_NONE;
381 #if !SCHED_TEST_HARNESS
382 processor->running_timers_active = false;
383 for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
384 timer_call_setup(&processor->running_timers[i],
385 running_timer_funcs[i], processor);
386 running_timer_clear(processor, i);
387 }
388 recount_processor_init(processor);
389 #endif /* !SCHED_TEST_HARNESS */
390
391 #if CONFIG_SCHED_EDGE
392 os_atomic_init(&processor->stir_the_pot_inbox_cpu, -1);
393 #endif /* CONFIG_SCHED_EDGE */
394
395 s = splsched();
396 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
397
398 pset_lock(pset);
399 bit_set(pset->cpu_bitmask, cpu_id);
400 bit_set(pset->recommended_bitmask, cpu_id);
401 atomic_bit_set(&pset->node->pset_recommended_map, pset->pset_id, memory_order_relaxed);
402 #if CONFIG_SCHED_SMT
403 bit_set(pset->primary_map, cpu_id);
404 #endif /* CONFIG_SCHED_SMT */
405 bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
406 if (pset->cpu_set_count++ == 0) {
407 pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
408 } else {
409 pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
410 pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
411 }
412
413 processor->last_recommend_reason = REASON_SYSTEM;
414 sched_processor_change_mode_locked(processor, PCM_RECOMMENDED, true);
415 pset_unlock(pset);
416
417 processor->processor_offline_state = PROCESSOR_OFFLINE_NOT_BOOTED;
418 bit_set(processor_offline_state_map[processor->processor_offline_state], cpu_id);
419
420 if (processor == master_processor) {
421 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTING);
422 }
423
424 simple_unlock(&sched_available_cores_lock);
425 splx(s);
426
427 simple_lock(&processor_list_lock, LCK_GRP_NULL);
428 if (processor_list == NULL) {
429 processor_list = processor;
430 } else {
431 processor_list_tail->processor_list = processor;
432 }
433 processor_list_tail = processor;
434 processor_count++;
435 simple_unlock(&processor_list_lock);
436 processor_array[cpu_id] = processor;
437 }
438
439 #if CONFIG_SCHED_SMT
440 bool system_is_SMT = false;
441
442 void
processor_set_primary(processor_t processor,processor_t primary)443 processor_set_primary(
444 processor_t processor,
445 processor_t primary)
446 {
447 assert(processor->processor_primary == primary || processor->processor_primary == processor);
448 /* Re-adjust primary point for this (possibly) secondary processor */
449 processor->processor_primary = primary;
450
451 assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
452 if (primary != processor) {
453 /* Link primary to secondary, assumes a 2-way SMT model
454 * We'll need to move to a queue if any future architecture
455 * requires otherwise.
456 */
457 assert(processor->processor_secondary == NULL);
458 primary->processor_secondary = processor;
459 /* Mark both processors as SMT siblings */
460 primary->is_SMT = TRUE;
461 processor->is_SMT = TRUE;
462
463 if (!system_is_SMT) {
464 system_is_SMT = true;
465 sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
466 }
467
468 processor_set_t pset = processor->processor_set;
469 spl_t s = splsched();
470 pset_lock(pset);
471 if (!pset->is_SMT) {
472 pset->is_SMT = true;
473 }
474 bit_clear(pset->primary_map, processor->cpu_id);
475 pset_unlock(pset);
476 splx(s);
477 }
478 }
479 #endif /* CONFIG_SCHED_SMT */
480
481 processor_set_t
processor_pset(processor_t processor)482 processor_pset(
483 processor_t processor)
484 {
485 return processor->processor_set;
486 }
487
488 cpumap_t
pset_available_cpumap(processor_set_t pset)489 pset_available_cpumap(processor_set_t pset)
490 {
491 return pset->cpu_available_map & pset->recommended_bitmask;
492 }
493
494 #if CONFIG_SCHED_EDGE
495
496 /* Returns the scheduling type for the pset */
497 cluster_type_t
pset_type_for_id(uint32_t cluster_id)498 pset_type_for_id(uint32_t cluster_id)
499 {
500 return pset_array[cluster_id]->pset_type;
501 }
502
503 /*
504 * Processor foreign threads
505 *
506 * With the Edge scheduler, each pset maintains a bitmap of processors running threads
507 * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
508 * if its of a different type than its preferred cluster type (E/P). The bitmap should
509 * be updated every time a new thread is assigned to run on a processor. Cluster shared
510 * resource intensive threads are also not counted as foreign threads since these
511 * threads should not be rebalanced when running on non-preferred clusters.
512 *
513 * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
514 * for rebalancing.
515 */
516 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)517 processor_state_update_running_foreign(processor_t processor, thread_t thread)
518 {
519 cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
520 cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
521
522 boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
523 boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
524 if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
525 bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
526 } else {
527 bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
528 }
529 }
530
531 /*
532 * Cluster shared resource intensive threads
533 *
534 * With the Edge scheduler, each pset maintains a bitmap of processors running
535 * threads that are shared resource intensive. This per-thread property is set
536 * by the performance controller or explicitly via dispatch SPIs. The bitmap
537 * allows the Edge scheduler to calculate the cluster shared resource load on
538 * any given cluster and load balance intensive threads accordingly.
539 */
540 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)541 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
542 {
543 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
544 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
545 } else {
546 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
547 }
548 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
549 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
550 } else {
551 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
552 }
553 }
554
555 #endif /* CONFIG_SCHED_EDGE */
556
557 void
processor_state_update_idle(processor_t processor)558 processor_state_update_idle(processor_t processor)
559 {
560 processor->current_pri = IDLEPRI;
561 processor->current_sfi_class = SFI_CLASS_KERNEL;
562 processor->current_recommended_pset_type = PSET_SMP;
563 #if CONFIG_THREAD_GROUPS
564 processor->current_thread_group = NULL;
565 #endif
566 processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
567 processor->current_urgency = THREAD_URGENCY_NONE;
568 #if CONFIG_SCHED_SMT
569 processor->current_is_NO_SMT = false;
570 #endif /* CONFIG_SCHED_SMT */
571 processor->current_is_bound = false;
572 processor->current_is_eagerpreempt = false;
573 #if CONFIG_SCHED_EDGE
574 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
575 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
576 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
577 sched_edge_stir_the_pot_clear_registry_entry();
578 #endif /* CONFIG_SCHED_EDGE */
579 SCHED(update_pset_load_average)(processor->processor_set, 0);
580 }
581
582 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)583 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
584 {
585 processor->current_pri = thread->sched_pri;
586 processor->current_sfi_class = thread->sfi_class;
587 processor->current_recommended_pset_type = recommended_pset_type(thread);
588 #if CONFIG_SCHED_EDGE
589 processor_state_update_running_foreign(processor, thread);
590 processor_state_update_running_cluster_shared_rsrc(processor, thread);
591 /* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
592 sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
593 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
594 sched_edge_stir_the_pot_update_registry_state(thread);
595 #endif /* CONFIG_SCHED_EDGE */
596
597 #if CONFIG_THREAD_GROUPS
598 processor->current_thread_group = thread_group_get(thread);
599 #endif
600 processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
601 processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
602 #if CONFIG_SCHED_SMT
603 processor->current_is_NO_SMT = thread_no_smt(thread);
604 #endif /* CONFIG_SCHED_SMT */
605 processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
606 processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
607 if (pset_lock_held) {
608 /* Only update the pset load average when the pset lock is held */
609 SCHED(update_pset_load_average)(processor->processor_set, 0);
610 }
611 }
612
613 pset_node_t
pset_node_root(void)614 pset_node_root(void)
615 {
616 return &pset_node0;
617 }
618
619 #if __AMP__
620
621 /*
622 * Only need to dynamically initialize pset nodes when
623 * there are multiple cluster types.
624 */
625 static pset_node_t
pset_node_create(cluster_type_t cluster_type)626 pset_node_create(cluster_type_t cluster_type)
627 {
628 assert3u(cluster_type, !=, CLUSTER_TYPE_SMP);
629 pset_cluster_type_t pset_cluster_type = cluster_type_to_pset_cluster_type(cluster_type);
630 assert3u(next_pset_node_index, <, MAX_AMP_CLUSTER_TYPES);
631 pset_node_t node = &pset_nodes[next_pset_node_index++];
632 node->psets = PROCESSOR_SET_NULL;
633 node->pset_cluster_type = pset_cluster_type;
634 /* Insert into node linked list */
635 pset_nodes[next_pset_node_index - 2].node_list = node;
636 pset_node_set_for_pset_cluster_type(node, pset_cluster_type);
637
638 return node;
639 }
640
641 #endif /* __AMP__*/
642
643 processor_set_t
pset_create(cluster_type_t cluster_type,uint32_t pset_cluster_id,int pset_id)644 pset_create(
645 cluster_type_t cluster_type,
646 uint32_t pset_cluster_id,
647 int pset_id)
648 {
649 /* some schedulers do not support multiple psets */
650 if (SCHED(multiple_psets_enabled) == FALSE) {
651 return processor_pset(master_processor);
652 }
653
654 pset_node_t node;
655 pset_cluster_type_t pset_cluster_type;
656 #if __AMP__
657 pset_cluster_type = cluster_type_to_pset_cluster_type(cluster_type);
658 node = pset_node_for_pset_cluster_type(pset_cluster_type);
659 if (node == PSET_NODE_NULL) {
660 /* First pset of this cluster type */
661 node = pset_node_create(cluster_type);
662 }
663 #else /* !__AMP__ */
664 pset_cluster_type = PSET_SMP;
665 node = &pset_node0;
666 (void)cluster_type;
667 #endif /* !__AMP__ */
668
669 assert3u(pset_id, <, MAX_PSETS);
670 assert3p(pset_array[pset_id], ==, PROCESSOR_SET_NULL);
671 processor_set_t pset = &pset_array_actual[pset_id];
672 pset->pset_cluster_type = pset_cluster_type;
673 pset->pset_cluster_id = pset_cluster_id;
674 pset->pset_id = pset_id;
675 pset_init(pset, node);
676
677 return pset;
678 }
679
680 /*
681 * Initialize the given processor_set structure.
682 */
683 void
pset_init(processor_set_t pset,pset_node_t node)684 pset_init(
685 processor_set_t pset,
686 pset_node_t node)
687 {
688 pset->online_processor_count = 0;
689 #if CONFIG_SCHED_EDGE
690 bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
691 bzero(&pset->pset_runnable_depth, sizeof(pset->pset_runnable_depth));
692 #elif __AMP__
693 pset->load_average = 0;
694 #endif /* !CONFIG_SCHED_EDGE && __AMP__ */
695 pset->cpu_set_low = pset->cpu_set_hi = 0;
696 pset->cpu_set_count = 0;
697 pset->last_chosen = -1;
698 pset->cpu_bitmask = 0;
699 pset->recommended_bitmask = 0;
700 #if CONFIG_SCHED_SMT
701 pset->primary_map = 0;
702 #endif /* CONFIG_SCHED_SMT */
703 pset->realtime_map = 0;
704 pset->cpu_available_map = 0;
705
706 for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
707 pset->cpu_state_map[i] = 0;
708 }
709 pset->pending_AST_URGENT_cpu_mask = 0;
710 pset->pending_AST_PREEMPT_cpu_mask = 0;
711 #if defined(CONFIG_SCHED_DEFERRED_AST)
712 pset->pending_deferred_AST_cpu_mask = 0;
713 #endif
714 pset->pending_spill_cpu_mask = 0;
715 pset->rt_pending_spill_cpu_mask = 0;
716 pset_lock_init(pset);
717 pset->pset_self = IP_NULL;
718 pset->pset_name_self = IP_NULL;
719 pset->pset_list = PROCESSOR_SET_NULL;
720 #if CONFIG_SCHED_SMT
721 pset->is_SMT = false;
722 #endif /* CONFIG_SCHED_SMT */
723 #if CONFIG_SCHED_EDGE
724 bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
725 pset->cpu_running_foreign = 0;
726 for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
727 pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
728 pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
729 }
730 #endif /* CONFIG_SCHED_EDGE */
731
732 /*
733 * No initial preferences or forced migrations, so use the least numbered
734 * available idle core when picking amongst idle cores in a cluster.
735 */
736 pset->perfcontrol_cpu_preferred_bitmask = 0;
737 pset->perfcontrol_cpu_migration_bitmask = 0;
738 pset->cpu_preferred_last_chosen = -1;
739
740 if (pset != sched_boot_pset) {
741 /*
742 * Scheduler runqueue initialization for non-boot psets.
743 * This initialization for the boot pset happens in sched_init().
744 */
745 SCHED(pset_init)(pset);
746 SCHED(rt_init_pset)(pset);
747 }
748
749 /* Psets are initialized before any other processor starts running. */
750 pset_array[pset->pset_id] = pset;
751
752 /* Initialize pset node state regarding this pset */
753 bit_set(node->pset_map, pset->pset_id);
754 pset->node = node;
755
756 processor_set_t *prev = &node->psets;
757 while (*prev != PROCESSOR_SET_NULL) {
758 prev = &(*prev)->pset_list;
759 }
760 *prev = pset;
761 }
762
763 #if !SCHED_TEST_HARNESS
764
765 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)766 processor_info_count(
767 processor_flavor_t flavor,
768 mach_msg_type_number_t *count)
769 {
770 switch (flavor) {
771 case PROCESSOR_BASIC_INFO:
772 *count = PROCESSOR_BASIC_INFO_COUNT;
773 break;
774
775 case PROCESSOR_CPU_LOAD_INFO:
776 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
777 break;
778
779 default:
780 return cpu_info_count(flavor, count);
781 }
782
783 return KERN_SUCCESS;
784 }
785
786 void
processor_cpu_load_info(processor_t processor,natural_t ticks[static CPU_STATE_MAX])787 processor_cpu_load_info(processor_t processor,
788 natural_t ticks[static CPU_STATE_MAX])
789 {
790 struct recount_usage usage = { 0 };
791 uint64_t idle_time = 0;
792 recount_processor_usage(&processor->pr_recount, &usage, &idle_time);
793
794 ticks[CPU_STATE_USER] += (uint32_t)(usage.ru_metrics[RCT_LVL_USER].rm_time_mach /
795 hz_tick_interval);
796 ticks[CPU_STATE_SYSTEM] += (uint32_t)(
797 recount_usage_system_time_mach(&usage) / hz_tick_interval);
798 ticks[CPU_STATE_IDLE] += (uint32_t)(idle_time / hz_tick_interval);
799 }
800
801 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)802 processor_info(
803 processor_t processor,
804 processor_flavor_t flavor,
805 host_t *host,
806 processor_info_t info,
807 mach_msg_type_number_t *count)
808 {
809 int cpu_id, state;
810 kern_return_t result;
811
812 if (processor == PROCESSOR_NULL) {
813 return KERN_INVALID_ARGUMENT;
814 }
815
816 cpu_id = processor->cpu_id;
817
818 switch (flavor) {
819 case PROCESSOR_BASIC_INFO:
820 {
821 processor_basic_info_t basic_info;
822
823 if (*count < PROCESSOR_BASIC_INFO_COUNT) {
824 return KERN_FAILURE;
825 }
826
827 basic_info = (processor_basic_info_t) info;
828 basic_info->cpu_type = slot_type(cpu_id);
829 basic_info->cpu_subtype = slot_subtype(cpu_id);
830 state = processor->state;
831 if (((state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) && !processor->shutdown_temporary)
832 #if defined(__x86_64__)
833 || !processor->is_recommended
834 #endif
835 ) {
836 basic_info->running = FALSE;
837 } else {
838 basic_info->running = TRUE;
839 }
840 basic_info->slot_num = cpu_id;
841 if (processor == master_processor) {
842 basic_info->is_master = TRUE;
843 } else {
844 basic_info->is_master = FALSE;
845 }
846
847 *count = PROCESSOR_BASIC_INFO_COUNT;
848 *host = &realhost;
849
850 return KERN_SUCCESS;
851 }
852
853 case PROCESSOR_CPU_LOAD_INFO:
854 {
855 processor_cpu_load_info_t cpu_load_info;
856
857 if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
858 return KERN_FAILURE;
859 }
860
861 cpu_load_info = (processor_cpu_load_info_t) info;
862
863 cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
864 cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
865 cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
866 processor_cpu_load_info(processor, cpu_load_info->cpu_ticks);
867 cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
868
869 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
870 *host = &realhost;
871
872 return KERN_SUCCESS;
873 }
874
875 default:
876 result = cpu_info(flavor, cpu_id, info, count);
877 if (result == KERN_SUCCESS) {
878 *host = &realhost;
879 }
880
881 return result;
882 }
883 }
884
885 #endif /* !SCHED_TEST_HARNESS */
886
887 void
pset_update_processor_state(processor_set_t pset,processor_t processor,uint new_state)888 pset_update_processor_state(processor_set_t pset, processor_t processor, uint new_state)
889 {
890 pset_assert_locked(pset);
891
892 uint old_state = processor->state;
893 uint cpuid = (uint)processor->cpu_id;
894
895 assert(processor->processor_set == pset);
896 assert(bit_test(pset->cpu_bitmask, cpuid));
897
898 assert(old_state < PROCESSOR_STATE_LEN);
899 assert(new_state < PROCESSOR_STATE_LEN);
900
901 processor->state = new_state;
902
903 bit_clear(pset->cpu_state_map[old_state], cpuid);
904 bit_set(pset->cpu_state_map[new_state], cpuid);
905
906 if (bit_test(pset->cpu_available_map, cpuid) && (new_state < PROCESSOR_IDLE)) {
907 /* No longer available for scheduling */
908 bit_clear(pset->cpu_available_map, cpuid);
909 } else if (!bit_test(pset->cpu_available_map, cpuid) && (new_state >= PROCESSOR_IDLE)) {
910 /* Newly available for scheduling */
911 bit_set(pset->cpu_available_map, cpuid);
912 }
913
914 if ((old_state == PROCESSOR_RUNNING) || (new_state == PROCESSOR_RUNNING)) {
915 SCHED(update_pset_load_average)(pset, 0);
916 if (new_state == PROCESSOR_RUNNING) {
917 assert(processor == current_processor());
918 }
919 }
920 if ((old_state == PROCESSOR_IDLE) || (new_state == PROCESSOR_IDLE)) {
921 if (new_state == PROCESSOR_IDLE) {
922 bit_clear(pset->realtime_map, cpuid);
923 }
924
925 pset_node_t node = pset->node;
926
927 if (bit_count(node->pset_map) == 1) {
928 /* Node has only a single pset, so skip node pset map updates */
929 return;
930 }
931
932 if (new_state == PROCESSOR_IDLE) {
933 #if CONFIG_SCHED_SMT
934 if (processor->processor_primary == processor) {
935 if (!bit_test(atomic_load(&node->pset_non_rt_primary_map), pset->pset_id)) {
936 atomic_bit_set(&node->pset_non_rt_primary_map, pset->pset_id, memory_order_relaxed);
937 }
938 }
939 #endif /* CONFIG_SCHED_SMT */
940 if (!bit_test(atomic_load(&node->pset_non_rt_map), pset->pset_id)) {
941 atomic_bit_set(&node->pset_non_rt_map, pset->pset_id, memory_order_relaxed);
942 }
943 if (!bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
944 atomic_bit_set(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
945 }
946 } else {
947 cpumap_t idle_map = pset->cpu_state_map[PROCESSOR_IDLE];
948 if (idle_map == 0) {
949 /* No more IDLE CPUs */
950 if (bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
951 atomic_bit_clear(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
952 }
953 }
954 }
955 }
956 }
957
958 #if !SCHED_TEST_HARNESS
959
960 /*
961 * Now that we're enforcing all CPUs actually boot, we may need a way to
962 * relax the timeout.
963 */
964 TUNABLE(uint32_t, cpu_boot_timeout_secs, "cpu_boot_timeout_secs", 1); /* seconds, default to 1 second */
965
966 static const char *
967 processor_start_panic_strings[] = {
968 [PROCESSOR_FIRST_BOOT] = "boot for the first time",
969 [PROCESSOR_BEFORE_ENTERING_SLEEP] = "come online while entering system sleep",
970 [PROCESSOR_WAKE_FROM_SLEEP] = "come online after returning from system sleep",
971 [PROCESSOR_CLUSTER_POWERDOWN_SUSPEND] = "come online while disabling cluster powerdown",
972 [PROCESSOR_CLUSTER_POWERDOWN_RESUME] = "come online before enabling cluster powerdown",
973 [PROCESSOR_POWERED_CORES_CHANGE] = "come online during dynamic cluster power state change",
974 };
975
976 void
processor_wait_for_start(processor_t processor,processor_start_kind_t start_kind)977 processor_wait_for_start(processor_t processor, processor_start_kind_t start_kind)
978 {
979 if (!processor->processor_booted) {
980 panic("processor_boot() missing for cpu %d", processor->cpu_id);
981 }
982
983 uint32_t boot_timeout_extended = cpu_boot_timeout_secs *
984 debug_cpu_performance_degradation_factor;
985
986 spl_t s = splsched();
987 simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
988 while (processor->processor_instartup) {
989 assert_wait_timeout((event_t)&processor->processor_instartup,
990 THREAD_UNINT, boot_timeout_extended, NSEC_PER_SEC);
991 simple_unlock(&processor_start_state_lock);
992 splx(s);
993
994 wait_result_t wait_result = thread_block(THREAD_CONTINUE_NULL);
995 if (wait_result == THREAD_TIMED_OUT) {
996 panic("cpu %d failed to %s, waited %d seconds\n",
997 processor->cpu_id,
998 processor_start_panic_strings[start_kind],
999 boot_timeout_extended);
1000 }
1001
1002 s = splsched();
1003 simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
1004 }
1005
1006 if (processor->processor_inshutdown) {
1007 panic("%s>cpu %d still in shutdown",
1008 __func__, processor->cpu_id);
1009 }
1010
1011 simple_unlock(&processor_start_state_lock);
1012
1013 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
1014
1015 if (!processor->processor_online) {
1016 panic("%s>cpu %d not online",
1017 __func__, processor->cpu_id);
1018 }
1019
1020 if (processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED) {
1021 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_RUNNING);
1022 } else {
1023 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_RUNNING);
1024 }
1025
1026 simple_unlock(&sched_available_cores_lock);
1027 splx(s);
1028 }
1029
1030 LCK_GRP_DECLARE(processor_updown_grp, "processor_updown");
1031 LCK_MTX_DECLARE(processor_updown_lock, &processor_updown_grp);
1032
1033 static void
processor_dostartup(processor_t processor,bool first_boot)1034 processor_dostartup(
1035 processor_t processor,
1036 bool first_boot)
1037 {
1038 if (!processor->processor_booted && !first_boot) {
1039 panic("processor %d not booted", processor->cpu_id);
1040 }
1041
1042 lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
1043 lck_mtx_assert(&processor_updown_lock, LCK_MTX_ASSERT_OWNED);
1044
1045 processor_set_t pset = processor->processor_set;
1046
1047 assert(processor->processor_self);
1048
1049 spl_t s = splsched();
1050
1051 simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
1052 assert(processor->processor_inshutdown || first_boot);
1053 processor->processor_inshutdown = false;
1054 assert(processor->processor_instartup == false);
1055 processor->processor_instartup = true;
1056 simple_unlock(&processor_start_state_lock);
1057
1058 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
1059
1060 pset_lock(pset);
1061
1062 if (first_boot) {
1063 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_NOT_BOOTED);
1064 } else {
1065 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_FULLY_OFFLINE);
1066 }
1067
1068 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTING);
1069
1070 assert(processor->state == PROCESSOR_OFF_LINE);
1071
1072 pset_update_processor_state(pset, processor, PROCESSOR_START);
1073 pset_unlock(pset);
1074
1075 simple_unlock(&sched_available_cores_lock);
1076
1077 splx(s);
1078
1079 ml_cpu_power_enable(processor->cpu_id);
1080 ml_cpu_begin_state_transition(processor->cpu_id);
1081 ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
1082
1083 cpu_start(processor->cpu_id);
1084
1085 s = splsched();
1086 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
1087
1088 if (processor->processor_offline_state == PROCESSOR_OFFLINE_STARTING) {
1089 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTED_NOT_RUNNING);
1090 } else {
1091 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
1092 }
1093
1094 simple_unlock(&sched_available_cores_lock);
1095 splx(s);
1096
1097 ml_cpu_end_state_transition(processor->cpu_id);
1098 /*
1099 * Note: Because the actual wait-for-start happens sometime later,
1100 * this races with processor_up calling CPU_BOOTED.
1101 * To fix that, this should happen after the first wait for start
1102 * confirms the CPU has booted.
1103 */
1104 ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
1105 }
1106
1107 void
processor_exit_reason(processor_t processor,processor_reason_t reason,bool is_system_sleep)1108 processor_exit_reason(processor_t processor, processor_reason_t reason, bool is_system_sleep)
1109 {
1110 assert(processor);
1111 assert(processor->processor_set);
1112
1113 lck_mtx_lock(&processor_updown_lock);
1114
1115 if (sched_is_in_sleep()) {
1116 assert(reason == REASON_SYSTEM);
1117 }
1118
1119 assert((processor != master_processor) || (reason == REASON_SYSTEM) || support_bootcpu_shutdown);
1120
1121 processor->last_shutdown_reason = reason;
1122
1123 bool is_final_system_sleep = is_system_sleep && (processor == master_processor);
1124
1125 processor_doshutdown(processor, is_final_system_sleep);
1126
1127 lck_mtx_unlock(&processor_updown_lock);
1128 }
1129
1130 /*
1131 * Called `processor_exit` in Unsupported KPI.
1132 * AppleARMCPU and AppleACPIPlatform call this in response to haltCPU().
1133 *
1134 * Behavior change: on both platforms, now xnu does the processor_sleep,
1135 * and ignores processor_exit calls from kexts.
1136 */
1137 kern_return_t
processor_exit_from_kext(__unused processor_t processor)1138 processor_exit_from_kext(
1139 __unused processor_t processor)
1140 {
1141 /* This is a no-op now. */
1142 return KERN_FAILURE;
1143 }
1144
1145 void
processor_sleep(processor_t processor)1146 processor_sleep(
1147 processor_t processor)
1148 {
1149 lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
1150
1151 processor_exit_reason(processor, REASON_SYSTEM, true);
1152 }
1153
1154 kern_return_t
processor_exit_from_user(processor_t processor)1155 processor_exit_from_user(
1156 processor_t processor)
1157 {
1158 if (processor == PROCESSOR_NULL) {
1159 return KERN_INVALID_ARGUMENT;
1160 }
1161
1162 kern_return_t result;
1163
1164 lck_mtx_lock(&cluster_powerdown_lock);
1165
1166 result = sched_processor_exit_user(processor);
1167
1168 lck_mtx_unlock(&cluster_powerdown_lock);
1169
1170 return result;
1171 }
1172
1173 void
processor_start_reason(processor_t processor,processor_reason_t reason)1174 processor_start_reason(processor_t processor, processor_reason_t reason)
1175 {
1176 lck_mtx_lock(&processor_updown_lock);
1177
1178 assert(processor);
1179 assert(processor->processor_set);
1180 assert(processor->processor_booted);
1181
1182 if (sched_is_in_sleep()) {
1183 assert(reason == REASON_SYSTEM);
1184 }
1185
1186 processor->last_startup_reason = reason;
1187
1188 processor_dostartup(processor, false);
1189
1190 lck_mtx_unlock(&processor_updown_lock);
1191 }
1192
1193 /*
1194 * Called `processor_start` in Unsupported KPI.
1195 * AppleARMCPU calls this to boot processors.
1196 * AppleACPIPlatform expects ml_processor_register to call processor_boot.
1197 *
1198 * Behavior change: now ml_processor_register also boots CPUs on ARM, and xnu
1199 * ignores processor_start calls from kexts.
1200 */
1201 kern_return_t
processor_start_from_kext(__unused processor_t processor)1202 processor_start_from_kext(
1203 __unused processor_t processor)
1204 {
1205 /* This is a no-op now. */
1206 return KERN_FAILURE;
1207 }
1208
1209 kern_return_t
processor_start_from_user(processor_t processor)1210 processor_start_from_user(
1211 processor_t processor)
1212 {
1213 if (processor == PROCESSOR_NULL) {
1214 return KERN_INVALID_ARGUMENT;
1215 }
1216
1217 kern_return_t result;
1218
1219 lck_mtx_lock(&cluster_powerdown_lock);
1220
1221 result = sched_processor_start_user(processor);
1222
1223 lck_mtx_unlock(&cluster_powerdown_lock);
1224
1225 return result;
1226 }
1227
1228 /*
1229 * Boot up a processor for the first time.
1230 *
1231 * This will also be called against the main processor during system boot,
1232 * even though it's already running.
1233 */
1234 void
processor_boot(processor_t processor)1235 processor_boot(
1236 processor_t processor)
1237 {
1238 lck_mtx_lock(&cluster_powerdown_lock);
1239 lck_mtx_lock(&processor_updown_lock);
1240
1241 assert(!sched_is_in_sleep());
1242 assert(!sched_is_cpu_init_completed());
1243
1244 if (processor->processor_booted) {
1245 panic("processor %d already booted", processor->cpu_id);
1246 }
1247
1248 if (processor == master_processor) {
1249 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
1250 } else {
1251 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_NOT_BOOTED);
1252 }
1253
1254 /*
1255 * Create the idle processor thread.
1256 */
1257 if (processor->idle_thread == THREAD_NULL) {
1258 idle_thread_create(processor, processor_start_thread);
1259 }
1260
1261 if (processor->processor_self == IP_NULL) {
1262 ipc_processor_init(processor);
1263 }
1264
1265 if (processor == master_processor) {
1266 processor->last_startup_reason = REASON_SYSTEM;
1267
1268 ml_cpu_power_enable(processor->cpu_id);
1269
1270 processor_t prev = thread_bind(processor);
1271 thread_block(THREAD_CONTINUE_NULL);
1272
1273 cpu_start(processor->cpu_id);
1274
1275 assert(processor->state == PROCESSOR_RUNNING);
1276 processor_update_offline_state(processor, PROCESSOR_OFFLINE_RUNNING);
1277
1278 thread_bind(prev);
1279 } else {
1280 processor->last_startup_reason = REASON_SYSTEM;
1281
1282 /*
1283 * We don't wait for startup to finish, so all CPUs can start
1284 * in parallel.
1285 */
1286 processor_dostartup(processor, true);
1287 }
1288
1289 processor->processor_booted = true;
1290
1291 lck_mtx_unlock(&processor_updown_lock);
1292 lck_mtx_unlock(&cluster_powerdown_lock);
1293 }
1294
1295 /*
1296 * Wake a previously booted processor from a temporarily powered off state.
1297 */
1298 void
processor_wake(processor_t processor)1299 processor_wake(
1300 processor_t processor)
1301 {
1302 lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
1303
1304 assert(processor->processor_booted);
1305 processor_start_reason(processor, REASON_SYSTEM);
1306 }
1307
1308 #if CONFIG_SCHED_SMT
1309 kern_return_t
enable_smt_processors(bool enable)1310 enable_smt_processors(bool enable)
1311 {
1312 if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
1313 /* Not an SMT system */
1314 return KERN_INVALID_ARGUMENT;
1315 }
1316
1317 int ncpus = machine_info.logical_cpu_max;
1318
1319 for (int i = 1; i < ncpus; i++) {
1320 processor_t processor = processor_array[i];
1321
1322 if (processor->processor_primary != processor) {
1323 if (enable) {
1324 processor_start_from_user(processor);
1325 } else { /* Disable */
1326 processor_exit_from_user(processor);
1327 }
1328 }
1329 }
1330
1331 #define BSD_HOST 1
1332 host_basic_info_data_t hinfo;
1333 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1334 kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
1335 if (kret != KERN_SUCCESS) {
1336 return kret;
1337 }
1338
1339 if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
1340 return KERN_FAILURE;
1341 }
1342
1343 if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
1344 return KERN_FAILURE;
1345 }
1346
1347 return KERN_SUCCESS;
1348 }
1349 #endif /* CONFIG_SCHED_SMT */
1350
1351 bool
processor_should_kprintf(processor_t processor,bool starting)1352 processor_should_kprintf(processor_t processor, bool starting)
1353 {
1354 processor_reason_t reason = starting ? processor->last_startup_reason : processor->last_shutdown_reason;
1355
1356 return reason != REASON_CLPC_SYSTEM;
1357 }
1358
1359 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1360 processor_control(
1361 processor_t processor,
1362 processor_info_t info,
1363 mach_msg_type_number_t count)
1364 {
1365 if (processor == PROCESSOR_NULL) {
1366 return KERN_INVALID_ARGUMENT;
1367 }
1368
1369 return cpu_control(processor->cpu_id, info, count);
1370 }
1371
1372 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1373 processor_get_assignment(
1374 processor_t processor,
1375 processor_set_t *pset)
1376 {
1377 int state;
1378
1379 if (processor == PROCESSOR_NULL) {
1380 return KERN_INVALID_ARGUMENT;
1381 }
1382
1383 state = processor->state;
1384 if (state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) {
1385 return KERN_FAILURE;
1386 }
1387
1388 *pset = sched_boot_pset;
1389
1390 return KERN_SUCCESS;
1391 }
1392
1393 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1394 processor_set_info(
1395 processor_set_t pset,
1396 int flavor,
1397 host_t *host,
1398 processor_set_info_t info,
1399 mach_msg_type_number_t *count)
1400 {
1401 if (pset == PROCESSOR_SET_NULL) {
1402 return KERN_INVALID_ARGUMENT;
1403 }
1404
1405 if (flavor == PROCESSOR_SET_BASIC_INFO) {
1406 processor_set_basic_info_t basic_info;
1407
1408 if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1409 return KERN_FAILURE;
1410 }
1411
1412 basic_info = (processor_set_basic_info_t) info;
1413 #if defined(__x86_64__)
1414 basic_info->processor_count = processor_avail_count_user;
1415 #else
1416 basic_info->processor_count = processor_avail_count;
1417 #endif
1418 basic_info->default_policy = POLICY_TIMESHARE;
1419
1420 *count = PROCESSOR_SET_BASIC_INFO_COUNT;
1421 *host = &realhost;
1422 return KERN_SUCCESS;
1423 } else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1424 policy_timeshare_base_t ts_base;
1425
1426 if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1427 return KERN_FAILURE;
1428 }
1429
1430 ts_base = (policy_timeshare_base_t) info;
1431 ts_base->base_priority = BASEPRI_DEFAULT;
1432
1433 *count = POLICY_TIMESHARE_BASE_COUNT;
1434 *host = &realhost;
1435 return KERN_SUCCESS;
1436 } else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1437 policy_fifo_base_t fifo_base;
1438
1439 if (*count < POLICY_FIFO_BASE_COUNT) {
1440 return KERN_FAILURE;
1441 }
1442
1443 fifo_base = (policy_fifo_base_t) info;
1444 fifo_base->base_priority = BASEPRI_DEFAULT;
1445
1446 *count = POLICY_FIFO_BASE_COUNT;
1447 *host = &realhost;
1448 return KERN_SUCCESS;
1449 } else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1450 policy_rr_base_t rr_base;
1451
1452 if (*count < POLICY_RR_BASE_COUNT) {
1453 return KERN_FAILURE;
1454 }
1455
1456 rr_base = (policy_rr_base_t) info;
1457 rr_base->base_priority = BASEPRI_DEFAULT;
1458 rr_base->quantum = 1;
1459
1460 *count = POLICY_RR_BASE_COUNT;
1461 *host = &realhost;
1462 return KERN_SUCCESS;
1463 } else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1464 policy_timeshare_limit_t ts_limit;
1465
1466 if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1467 return KERN_FAILURE;
1468 }
1469
1470 ts_limit = (policy_timeshare_limit_t) info;
1471 ts_limit->max_priority = MAXPRI_KERNEL;
1472
1473 *count = POLICY_TIMESHARE_LIMIT_COUNT;
1474 *host = &realhost;
1475 return KERN_SUCCESS;
1476 } else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1477 policy_fifo_limit_t fifo_limit;
1478
1479 if (*count < POLICY_FIFO_LIMIT_COUNT) {
1480 return KERN_FAILURE;
1481 }
1482
1483 fifo_limit = (policy_fifo_limit_t) info;
1484 fifo_limit->max_priority = MAXPRI_KERNEL;
1485
1486 *count = POLICY_FIFO_LIMIT_COUNT;
1487 *host = &realhost;
1488 return KERN_SUCCESS;
1489 } else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1490 policy_rr_limit_t rr_limit;
1491
1492 if (*count < POLICY_RR_LIMIT_COUNT) {
1493 return KERN_FAILURE;
1494 }
1495
1496 rr_limit = (policy_rr_limit_t) info;
1497 rr_limit->max_priority = MAXPRI_KERNEL;
1498
1499 *count = POLICY_RR_LIMIT_COUNT;
1500 *host = &realhost;
1501 return KERN_SUCCESS;
1502 } else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1503 int *enabled;
1504
1505 if (*count < (sizeof(*enabled) / sizeof(int))) {
1506 return KERN_FAILURE;
1507 }
1508
1509 enabled = (int *) info;
1510 *enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1511
1512 *count = sizeof(*enabled) / sizeof(int);
1513 *host = &realhost;
1514 return KERN_SUCCESS;
1515 }
1516
1517
1518 *host = HOST_NULL;
1519 return KERN_INVALID_ARGUMENT;
1520 }
1521
1522 /*
1523 * processor_set_statistics
1524 *
1525 * Returns scheduling statistics for a processor set.
1526 */
1527 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1528 processor_set_statistics(
1529 processor_set_t pset,
1530 int flavor,
1531 processor_set_info_t info,
1532 mach_msg_type_number_t *count)
1533 {
1534 if (pset == PROCESSOR_SET_NULL || pset != sched_boot_pset) {
1535 return KERN_INVALID_PROCESSOR_SET;
1536 }
1537
1538 if (flavor == PROCESSOR_SET_LOAD_INFO) {
1539 processor_set_load_info_t load_info;
1540
1541 if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1542 return KERN_FAILURE;
1543 }
1544
1545 load_info = (processor_set_load_info_t) info;
1546
1547 load_info->mach_factor = sched_mach_factor;
1548 load_info->load_average = sched_load_average;
1549
1550 load_info->task_count = tasks_count;
1551 load_info->thread_count = threads_count;
1552
1553 *count = PROCESSOR_SET_LOAD_INFO_COUNT;
1554 return KERN_SUCCESS;
1555 }
1556
1557 return KERN_INVALID_ARGUMENT;
1558 }
1559
1560 /*
1561 * processor_set_things:
1562 *
1563 * Common internals for processor_set_{threads,tasks}
1564 */
1565 static kern_return_t
processor_set_things(processor_set_t pset,mach_port_array_t * thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1566 processor_set_things(
1567 processor_set_t pset,
1568 mach_port_array_t *thing_list,
1569 mach_msg_type_number_t *countp,
1570 int type,
1571 mach_task_flavor_t flavor)
1572 {
1573 unsigned int i;
1574 task_t task;
1575 thread_t thread;
1576
1577 mach_port_array_t task_addr;
1578 task_t *task_list;
1579 vm_size_t actual_tasks, task_count_cur, task_count_needed;
1580
1581 mach_port_array_t thread_addr;
1582 thread_t *thread_list;
1583 vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1584
1585 mach_port_array_t addr, newaddr;
1586 vm_size_t count, count_needed;
1587
1588 if (pset == PROCESSOR_SET_NULL || pset != sched_boot_pset) {
1589 return KERN_INVALID_ARGUMENT;
1590 }
1591
1592 task_count_cur = 0;
1593 task_count_needed = 0;
1594 task_list = NULL;
1595 task_addr = NULL;
1596 actual_tasks = 0;
1597
1598 thread_count_cur = 0;
1599 thread_count_needed = 0;
1600 thread_list = NULL;
1601 thread_addr = NULL;
1602 actual_threads = 0;
1603
1604 for (;;) {
1605 lck_mtx_lock(&tasks_threads_lock);
1606
1607 /* do we have the memory we need? */
1608 if (type == PSET_THING_THREAD) {
1609 thread_count_needed = threads_count;
1610 }
1611 #if !CONFIG_MACF
1612 else
1613 #endif
1614 task_count_needed = tasks_count;
1615
1616 if (task_count_needed <= task_count_cur &&
1617 thread_count_needed <= thread_count_cur) {
1618 break;
1619 }
1620
1621 /* unlock and allocate more memory */
1622 lck_mtx_unlock(&tasks_threads_lock);
1623
1624 /* grow task array */
1625 if (task_count_needed > task_count_cur) {
1626 mach_port_array_free(task_addr, task_count_cur);
1627 assert(task_count_needed > 0);
1628 task_count_cur = task_count_needed;
1629
1630 task_addr = mach_port_array_alloc(task_count_cur,
1631 Z_WAITOK | Z_ZERO);
1632 if (task_addr == NULL) {
1633 mach_port_array_free(thread_addr, thread_count_cur);
1634 return KERN_RESOURCE_SHORTAGE;
1635 }
1636 task_list = (task_t *)task_addr;
1637 }
1638
1639 /* grow thread array */
1640 if (thread_count_needed > thread_count_cur) {
1641 mach_port_array_free(thread_addr, thread_count_cur);
1642 assert(thread_count_needed > 0);
1643 thread_count_cur = thread_count_needed;
1644
1645 thread_addr = mach_port_array_alloc(thread_count_cur,
1646 Z_WAITOK | Z_ZERO);
1647 if (thread_addr == NULL) {
1648 mach_port_array_free(task_addr, task_count_cur);
1649 return KERN_RESOURCE_SHORTAGE;
1650 }
1651 thread_list = (thread_t *)thread_addr;
1652 }
1653 }
1654
1655 /* OK, have memory and the list locked */
1656
1657 /* If we need it, get the thread list */
1658 if (type == PSET_THING_THREAD) {
1659 queue_iterate(&threads, thread, thread_t, threads) {
1660 task = get_threadtask(thread);
1661 #if defined(SECURE_KERNEL)
1662 if (task == kernel_task) {
1663 /* skip threads belonging to kernel_task */
1664 continue;
1665 }
1666 #endif
1667 if (!task->ipc_active || task_is_exec_copy(task)) {
1668 /* skip threads in inactive tasks (in the middle of exec/fork/spawn) */
1669 continue;
1670 }
1671
1672 thread_reference(thread);
1673 thread_list[actual_threads++] = thread;
1674 }
1675 }
1676 #if !CONFIG_MACF
1677 else
1678 #endif
1679 {
1680 /* get a list of the tasks */
1681 queue_iterate(&tasks, task, task_t, tasks) {
1682 #if defined(SECURE_KERNEL)
1683 if (task == kernel_task) {
1684 /* skip kernel_task */
1685 continue;
1686 }
1687 #endif
1688 if (!task->ipc_active || task_is_exec_copy(task)) {
1689 /* skip inactive tasks (in the middle of exec/fork/spawn) */
1690 continue;
1691 }
1692
1693 task_reference(task);
1694 task_list[actual_tasks++] = task;
1695 }
1696 }
1697
1698 lck_mtx_unlock(&tasks_threads_lock);
1699
1700 #if CONFIG_MACF
1701 unsigned int j, used;
1702
1703 /* for each task, make sure we are allowed to examine it */
1704 for (i = used = 0; i < actual_tasks; i++) {
1705 if (mac_task_check_expose_task(task_list[i], flavor)) {
1706 task_deallocate(task_list[i]);
1707 continue;
1708 }
1709 task_list[used++] = task_list[i];
1710 }
1711 actual_tasks = used;
1712 task_count_needed = actual_tasks;
1713
1714 if (type == PSET_THING_THREAD) {
1715 /* for each thread (if any), make sure it's task is in the allowed list */
1716 for (i = used = 0; i < actual_threads; i++) {
1717 boolean_t found_task = FALSE;
1718
1719 task = get_threadtask(thread_list[i]);
1720 for (j = 0; j < actual_tasks; j++) {
1721 if (task_list[j] == task) {
1722 found_task = TRUE;
1723 break;
1724 }
1725 }
1726 if (found_task) {
1727 thread_list[used++] = thread_list[i];
1728 } else {
1729 thread_deallocate(thread_list[i]);
1730 }
1731 }
1732 actual_threads = used;
1733 thread_count_needed = actual_threads;
1734
1735 /* done with the task list */
1736 for (i = 0; i < actual_tasks; i++) {
1737 task_deallocate(task_list[i]);
1738 }
1739 mach_port_array_free(task_addr, task_count_cur);
1740 task_list = NULL;
1741 task_count_cur = 0;
1742 actual_tasks = 0;
1743 }
1744 #endif
1745
1746 if (type == PSET_THING_THREAD) {
1747 if (actual_threads == 0) {
1748 /* no threads available to return */
1749 assert(task_count_cur == 0);
1750 mach_port_array_free(thread_addr, thread_count_cur);
1751 thread_list = NULL;
1752 *thing_list = NULL;
1753 *countp = 0;
1754 return KERN_SUCCESS;
1755 }
1756 count_needed = actual_threads;
1757 count = thread_count_cur;
1758 addr = thread_addr;
1759 } else {
1760 if (actual_tasks == 0) {
1761 /* no tasks available to return */
1762 assert(thread_count_cur == 0);
1763 mach_port_array_free(task_addr, task_count_cur);
1764 *thing_list = NULL;
1765 *countp = 0;
1766 return KERN_SUCCESS;
1767 }
1768 count_needed = actual_tasks;
1769 count = task_count_cur;
1770 addr = task_addr;
1771 }
1772
1773 /* if we allocated too much, must copy */
1774 if (count_needed < count) {
1775 newaddr = mach_port_array_alloc(count_needed, Z_WAITOK | Z_ZERO);
1776 if (newaddr == NULL) {
1777 for (i = 0; i < actual_tasks; i++) {
1778 if (type == PSET_THING_THREAD) {
1779 thread_deallocate(thread_list[i]);
1780 } else {
1781 task_deallocate(task_list[i]);
1782 }
1783 }
1784 mach_port_array_free(addr, count);
1785 return KERN_RESOURCE_SHORTAGE;
1786 }
1787
1788 bcopy(addr, newaddr, count_needed * sizeof(void *));
1789 mach_port_array_free(addr, count);
1790
1791 addr = newaddr;
1792 count = count_needed;
1793 }
1794
1795 *thing_list = addr;
1796 *countp = (mach_msg_type_number_t)count;
1797
1798 return KERN_SUCCESS;
1799 }
1800
1801 /*
1802 * processor_set_tasks:
1803 *
1804 * List all tasks in the processor set.
1805 */
1806 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1807 processor_set_tasks_internal(
1808 processor_set_t pset,
1809 task_array_t *task_list,
1810 mach_msg_type_number_t *count,
1811 mach_task_flavor_t flavor)
1812 {
1813 kern_return_t ret;
1814
1815 ret = processor_set_things(pset, task_list, count, PSET_THING_TASK, flavor);
1816 if (ret != KERN_SUCCESS) {
1817 return ret;
1818 }
1819
1820 /* do the conversion that Mig should handle */
1821 convert_task_array_to_ports(*task_list, *count, flavor);
1822 return KERN_SUCCESS;
1823 }
1824
1825 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1826 processor_set_tasks(
1827 processor_set_t pset,
1828 task_array_t *task_list,
1829 mach_msg_type_number_t *count)
1830 {
1831 return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1832 }
1833
1834 /*
1835 * processor_set_tasks_with_flavor:
1836 *
1837 * Based on flavor, return task/inspect/read port to all tasks in the processor set.
1838 */
1839 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1840 processor_set_tasks_with_flavor(
1841 processor_set_t pset,
1842 mach_task_flavor_t flavor,
1843 task_array_t *task_list,
1844 mach_msg_type_number_t *count)
1845 {
1846 switch (flavor) {
1847 case TASK_FLAVOR_CONTROL:
1848 case TASK_FLAVOR_READ:
1849 case TASK_FLAVOR_INSPECT:
1850 case TASK_FLAVOR_NAME:
1851 return processor_set_tasks_internal(pset, task_list, count, flavor);
1852 default:
1853 return KERN_INVALID_ARGUMENT;
1854 }
1855 }
1856
1857 /*
1858 * processor_set_threads:
1859 *
1860 * List all threads in the processor set.
1861 */
1862 #if defined(SECURE_KERNEL)
1863 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_act_array_t * thread_list,__unused mach_msg_type_number_t * count)1864 processor_set_threads(
1865 __unused processor_set_t pset,
1866 __unused thread_act_array_t *thread_list,
1867 __unused mach_msg_type_number_t *count)
1868 {
1869 return KERN_FAILURE;
1870 }
1871 #elif !defined(XNU_TARGET_OS_OSX)
1872 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_act_array_t * thread_list,__unused mach_msg_type_number_t * count)1873 processor_set_threads(
1874 __unused processor_set_t pset,
1875 __unused thread_act_array_t *thread_list,
1876 __unused mach_msg_type_number_t *count)
1877 {
1878 return KERN_NOT_SUPPORTED;
1879 }
1880 #else
1881 kern_return_t
processor_set_threads(processor_set_t pset,thread_act_array_t * thread_list,mach_msg_type_number_t * count)1882 processor_set_threads(
1883 processor_set_t pset,
1884 thread_act_array_t *thread_list,
1885 mach_msg_type_number_t *count)
1886 {
1887 kern_return_t ret;
1888
1889 ret = processor_set_things(pset, thread_list, count,
1890 PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1891 if (ret != KERN_SUCCESS) {
1892 return ret;
1893 }
1894
1895 /* do the conversion that Mig should handle */
1896 convert_thread_array_to_ports(*thread_list, *count, TASK_FLAVOR_CONTROL);
1897 return KERN_SUCCESS;
1898 }
1899 #endif
1900
1901 #endif /* !SCHED_TEST_HARNESS */
1902
1903 pset_cluster_type_t
recommended_pset_type(thread_t thread)1904 recommended_pset_type(thread_t thread)
1905 {
1906 /* Only used by the AMP scheduler policy */
1907 #if CONFIG_THREAD_GROUPS && __AMP__ && !CONFIG_SCHED_EDGE
1908 if (thread == THREAD_NULL) {
1909 return PSET_AMP_E;
1910 }
1911
1912 #if DEVELOPMENT || DEBUG
1913 extern bool system_ecore_only;
1914 extern int enable_task_set_cluster_type;
1915 task_t task = get_threadtask(thread);
1916 if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1917 processor_set_t pset_hint = task->pset_hint;
1918 if (pset_hint) {
1919 return pset_hint->pset_cluster_type;
1920 }
1921 }
1922
1923 if (system_ecore_only) {
1924 return PSET_AMP_E;
1925 }
1926 #endif
1927
1928 if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1929 return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1930 }
1931
1932 if (thread->base_pri <= MAXPRI_THROTTLE) {
1933 if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1934 return PSET_AMP_E;
1935 }
1936 } else if (thread->base_pri <= BASEPRI_UTILITY) {
1937 if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1938 return PSET_AMP_E;
1939 }
1940 }
1941
1942 struct thread_group *tg = thread_group_get(thread);
1943 cluster_type_t recommendation = thread_group_recommendation(tg);
1944 switch (recommendation) {
1945 case CLUSTER_TYPE_SMP:
1946 default:
1947 if (get_threadtask(thread) == kernel_task) {
1948 return PSET_AMP_E;
1949 }
1950 return PSET_AMP_P;
1951 case CLUSTER_TYPE_E:
1952 return PSET_AMP_E;
1953 case CLUSTER_TYPE_P:
1954 return PSET_AMP_P;
1955 }
1956 #else /* !CONFIG_THREAD_GROUPS || !__AMP__ || CONFIG_SCHED_EDGE */
1957 (void)thread;
1958 return PSET_SMP;
1959 #endif /* !CONFIG_THREAD_GROUPS || !__AMP__ || CONFIG_SCHED_EDGE */
1960 }
1961
1962 #if __arm64__
1963
1964 cluster_type_t
pset_cluster_type_to_cluster_type(pset_cluster_type_t pset_cluster_type)1965 pset_cluster_type_to_cluster_type(pset_cluster_type_t pset_cluster_type)
1966 {
1967 switch (pset_cluster_type) {
1968 #if __AMP__
1969 case PSET_AMP_E:
1970 return CLUSTER_TYPE_E;
1971 case PSET_AMP_P:
1972 return CLUSTER_TYPE_P;
1973 #endif /* __AMP__ */
1974 case PSET_SMP:
1975 return CLUSTER_TYPE_SMP;
1976 default:
1977 panic("Unexpected pset cluster type %d", pset_cluster_type);
1978 }
1979 }
1980
1981 pset_cluster_type_t
cluster_type_to_pset_cluster_type(cluster_type_t cluster_type)1982 cluster_type_to_pset_cluster_type(cluster_type_t cluster_type)
1983 {
1984 switch (cluster_type) {
1985 #if __AMP__
1986 case CLUSTER_TYPE_E:
1987 return PSET_AMP_E;
1988 case CLUSTER_TYPE_P:
1989 return PSET_AMP_P;
1990 #endif /* __AMP__ */
1991 case CLUSTER_TYPE_SMP:
1992 return PSET_SMP;
1993 default:
1994 panic("Unexpected cluster type %d", cluster_type);
1995 }
1996 }
1997
1998 #endif /* __arm64__ */
1999
2000 #if CONFIG_THREAD_GROUPS && __AMP__ && !CONFIG_SCHED_EDGE
2001
2002 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)2003 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
2004 {
2005 sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
2006
2007 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
2008
2009 switch (perfctl_class) {
2010 case PERFCONTROL_CLASS_UTILITY:
2011 os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
2012 break;
2013 case PERFCONTROL_CLASS_BACKGROUND:
2014 os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
2015 break;
2016 default:
2017 panic("perfctl_class invalid");
2018 break;
2019 }
2020 }
2021
2022 #elif defined(__arm64__)
2023
2024 /* Define a stub routine since this symbol is exported on all arm64 platforms */
2025 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)2026 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
2027 {
2028 }
2029
2030 #endif /* defined(__arm64__) */
2031