1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * processor.c: processor and processor_set manipulation routines.
61 */
62
63 #include <mach/boolean.h>
64 #include <mach/policy.h>
65 #include <mach/processor.h>
66 #include <mach/processor_info.h>
67 #include <mach/vm_param.h>
68 #include <kern/cpu_number.h>
69 #include <kern/host.h>
70 #include <kern/ipc_host.h>
71 #include <kern/ipc_tt.h>
72 #include <kern/kalloc.h>
73 #include <kern/machine.h>
74 #include <kern/misc_protos.h>
75 #include <kern/processor.h>
76 #include <kern/sched.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/timer.h>
80 #if KPERF
81 #include <kperf/kperf.h>
82 #endif /* KPERF */
83 #include <ipc/ipc_port.h>
84 #include <machine/commpage.h>
85
86 #include <security/mac_mach_internal.h>
87
88 #if defined(CONFIG_XNUPOST)
89
90 #include <tests/xnupost.h>
91
92 #endif /* CONFIG_XNUPOST */
93
94 /*
95 * Exported interface
96 */
97 #include <mach/mach_host_server.h>
98 #include <mach/processor_set_server.h>
99 #include <san/kcov.h>
100
101 /* The boot pset and pset node */
102 struct processor_set pset0;
103 struct pset_node pset_node0;
104
105 #if __AMP__
106 /* Additional AMP node */
107 static struct pset_node pset_node1;
108 /*
109 * For AMP platforms, all clusters of the same type are part of
110 * the same pset_node. This allows for easier CPU selection logic.
111 */
112 pset_node_t ecore_node;
113 pset_node_t pcore_node;
114 #endif /* __AMP__ */
115
116 LCK_SPIN_DECLARE(pset_node_lock, LCK_GRP_NULL);
117
118 LCK_GRP_DECLARE(pset_lck_grp, "pset");
119
120 queue_head_t tasks;
121 queue_head_t terminated_tasks; /* To be used ONLY for stackshot. */
122 queue_head_t corpse_tasks;
123 int tasks_count;
124 int terminated_tasks_count;
125 queue_head_t threads;
126 queue_head_t terminated_threads;
127 int threads_count;
128 int terminated_threads_count;
129 LCK_GRP_DECLARE(task_lck_grp, "task");
130 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
131 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
132 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
133
134 processor_t processor_list;
135 unsigned int processor_count;
136 static processor_t processor_list_tail;
137 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
138 SIMPLE_LOCK_DECLARE(processor_start_state_lock, 0);
139
140 uint32_t processor_avail_count;
141 uint32_t processor_avail_count_user;
142 #if CONFIG_SCHED_SMT
143 uint32_t primary_processor_avail_count_user;
144 #endif /* CONFIG_SCHED_SMT */
145
146 #if XNU_SUPPORT_BOOTCPU_SHUTDOWN
147 TUNABLE(bool, support_bootcpu_shutdown, "support_bootcpu_shutdown", true);
148 #else
149 TUNABLE(bool, support_bootcpu_shutdown, "support_bootcpu_shutdown", false);
150 #endif
151
152 #if __x86_64__ || XNU_ENABLE_PROCESSOR_EXIT
153 TUNABLE(bool, enable_processor_exit, "processor_exit", true);
154 #else
155 TUNABLE(bool, enable_processor_exit, "processor_exit", false);
156 #endif
157
158 SECURITY_READ_ONLY_LATE(int) master_cpu = 0;
159
160 struct processor PERCPU_DATA(processor);
161 processor_t processor_array[MAX_SCHED_CPUS] = { 0 };
162 processor_set_t pset_array[MAX_PSETS] = { 0 };
163
164 static timer_call_func_t running_timer_funcs[] = {
165 [RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
166 [RUNNING_TIMER_PREEMPT] = thread_preempt_expire,
167 [RUNNING_TIMER_KPERF] = kperf_timer_expire,
168 };
169 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
170 == RUNNING_TIMER_MAX, "missing running timer function");
171
172 #if defined(CONFIG_XNUPOST)
173 kern_return_t ipi_test(void);
174 extern void arm64_ipi_test(void);
175
176 kern_return_t
ipi_test()177 ipi_test()
178 {
179 #if __arm64__
180 processor_t p;
181
182 for (p = processor_list; p != NULL; p = p->processor_list) {
183 thread_bind(p);
184 thread_block(THREAD_CONTINUE_NULL);
185 kprintf("Running IPI test on cpu %d\n", p->cpu_id);
186 arm64_ipi_test();
187 }
188
189 /* unbind thread from specific cpu */
190 thread_bind(PROCESSOR_NULL);
191 thread_block(THREAD_CONTINUE_NULL);
192
193 T_PASS("Done running IPI tests");
194 #else
195 T_PASS("Unsupported platform. Not running IPI tests");
196
197 #endif /* __arm64__ */
198
199 return KERN_SUCCESS;
200 }
201 #endif /* defined(CONFIG_XNUPOST) */
202
203 int sched_enable_smt = 1;
204
205 cpumap_t processor_offline_state_map[PROCESSOR_OFFLINE_MAX];
206
207 void
processor_update_offline_state_locked(processor_t processor,processor_offline_state_t new_state)208 processor_update_offline_state_locked(processor_t processor,
209 processor_offline_state_t new_state)
210 {
211 simple_lock_assert(&sched_available_cores_lock, LCK_ASSERT_OWNED);
212
213 processor_offline_state_t old_state = processor->processor_offline_state;
214
215 uint cpuid = (uint)processor->cpu_id;
216
217 assert(old_state < PROCESSOR_OFFLINE_MAX);
218 assert(new_state < PROCESSOR_OFFLINE_MAX);
219
220 processor->processor_offline_state = new_state;
221
222 bit_clear(processor_offline_state_map[old_state], cpuid);
223 bit_set(processor_offline_state_map[new_state], cpuid);
224 }
225
226 void
processor_update_offline_state(processor_t processor,processor_offline_state_t new_state)227 processor_update_offline_state(processor_t processor,
228 processor_offline_state_t new_state)
229 {
230 spl_t s = splsched();
231 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
232 processor_update_offline_state_locked(processor, new_state);
233 simple_unlock(&sched_available_cores_lock);
234 splx(s);
235 }
236
237 void
processor_bootstrap(void)238 processor_bootstrap(void)
239 {
240 simple_lock_init(&sched_available_cores_lock, 0);
241 simple_lock_init(&processor_start_state_lock, 0);
242
243 /* Initialize boot pset node */
244 pset_node0.psets = &pset0;
245 pset_node0.pset_cluster_type = PSET_SMP;
246
247 #if __AMP__
248 const ml_topology_info_t *topology_info = ml_get_topology_info();
249
250 /*
251 * Continue initializing boot pset and node.
252 * Since this is an AMP system, fill up cluster type and ID information; this should do the
253 * same kind of initialization done via ml_processor_register()
254 */
255 ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
256 pset0.pset_id = boot_cluster->cluster_id;
257 pset0.pset_cluster_id = boot_cluster->cluster_id;
258 pset_cluster_type_t boot_type = cluster_type_to_pset_cluster_type(boot_cluster->cluster_type);
259 pset0.pset_cluster_type = boot_type;
260 pset_node0.pset_cluster_type = boot_type;
261
262 /* Initialize pset node pointers according to their type */
263 switch (boot_type) {
264 case PSET_AMP_P:
265 pcore_node = &pset_node0;
266 ecore_node = &pset_node1;
267 break;
268 case PSET_AMP_E:
269 ecore_node = &pset_node0;
270 pcore_node = &pset_node1;
271 break;
272 default:
273 panic("Unexpected boot pset cluster type %d", boot_type);
274 }
275 ecore_node->pset_cluster_type = PSET_AMP_E;
276 pcore_node->pset_cluster_type = PSET_AMP_P;
277
278 /* Link pset_node1 to pset_node0 */
279 pset_node0.node_list = &pset_node1;
280 #endif /* __AMP__ */
281
282 pset_init(&pset0, &pset_node0);
283 queue_init(&tasks);
284 queue_init(&terminated_tasks);
285 queue_init(&threads);
286 queue_init(&terminated_threads);
287 queue_init(&corpse_tasks);
288
289 processor_init(master_processor, master_cpu, &pset0);
290 }
291
292 /*
293 * Initialize the given processor for the cpu
294 * indicated by cpu_id, and assign to the
295 * specified processor set.
296 */
297 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)298 processor_init(
299 processor_t processor,
300 int cpu_id,
301 processor_set_t pset)
302 {
303 spl_t s;
304
305 assert(cpu_id < MAX_SCHED_CPUS);
306 processor->cpu_id = cpu_id;
307
308 if (processor != master_processor) {
309 /* Scheduler state for master_processor initialized in sched_init() */
310 SCHED(processor_init)(processor);
311 smr_cpu_init(processor);
312 }
313
314 processor->state = PROCESSOR_OFF_LINE;
315 processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
316 processor->processor_set = pset;
317 processor_state_update_idle(processor);
318 processor->starting_pri = MINPRI;
319 processor->quantum_end = UINT64_MAX;
320 processor->deadline = UINT64_MAX;
321 processor->first_timeslice = FALSE;
322 processor->processor_online = false;
323 #if CONFIG_SCHED_SMT
324 processor->processor_primary = processor; /* no SMT relationship known at this point */
325 processor->processor_secondary = NULL;
326 processor->is_SMT = false;
327 #endif /* CONFIG_SCHED_SMT */
328 processor->processor_self = IP_NULL;
329 processor->processor_list = NULL;
330 processor->must_idle = false;
331 processor->next_idle_short = false;
332 processor->last_startup_reason = REASON_SYSTEM;
333 processor->last_shutdown_reason = REASON_NONE;
334 processor->shutdown_temporary = false;
335 processor->processor_inshutdown = false;
336 processor->processor_instartup = false;
337 processor->last_derecommend_reason = REASON_NONE;
338 processor->running_timers_active = false;
339 for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
340 timer_call_setup(&processor->running_timers[i],
341 running_timer_funcs[i], processor);
342 running_timer_clear(processor, i);
343 }
344 recount_processor_init(processor);
345
346 s = splsched();
347 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
348
349 pset_lock(pset);
350 bit_set(pset->cpu_bitmask, cpu_id);
351 bit_set(pset->recommended_bitmask, cpu_id);
352 atomic_bit_set(&pset->node->pset_recommended_map, pset->pset_id, memory_order_relaxed);
353 #if CONFIG_SCHED_SMT
354 bit_set(pset->primary_map, cpu_id);
355 #endif /* CONFIG_SCHED_SMT */
356 bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
357 if (pset->cpu_set_count++ == 0) {
358 pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
359 } else {
360 pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
361 pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
362 }
363
364 processor->last_recommend_reason = REASON_SYSTEM;
365 sched_processor_change_mode_locked(processor, PCM_RECOMMENDED, true);
366 pset_unlock(pset);
367
368 processor->processor_offline_state = PROCESSOR_OFFLINE_NOT_BOOTED;
369 bit_set(processor_offline_state_map[processor->processor_offline_state], cpu_id);
370
371 if (processor == master_processor) {
372 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTING);
373 }
374
375 simple_unlock(&sched_available_cores_lock);
376 splx(s);
377
378 simple_lock(&processor_list_lock, LCK_GRP_NULL);
379 if (processor_list == NULL) {
380 processor_list = processor;
381 } else {
382 processor_list_tail->processor_list = processor;
383 }
384 processor_list_tail = processor;
385 processor_count++;
386 simple_unlock(&processor_list_lock);
387 processor_array[cpu_id] = processor;
388 }
389
390 #if CONFIG_SCHED_SMT
391 bool system_is_SMT = false;
392
393 void
processor_set_primary(processor_t processor,processor_t primary)394 processor_set_primary(
395 processor_t processor,
396 processor_t primary)
397 {
398 assert(processor->processor_primary == primary || processor->processor_primary == processor);
399 /* Re-adjust primary point for this (possibly) secondary processor */
400 processor->processor_primary = primary;
401
402 assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
403 if (primary != processor) {
404 /* Link primary to secondary, assumes a 2-way SMT model
405 * We'll need to move to a queue if any future architecture
406 * requires otherwise.
407 */
408 assert(processor->processor_secondary == NULL);
409 primary->processor_secondary = processor;
410 /* Mark both processors as SMT siblings */
411 primary->is_SMT = TRUE;
412 processor->is_SMT = TRUE;
413
414 if (!system_is_SMT) {
415 system_is_SMT = true;
416 sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
417 }
418
419 processor_set_t pset = processor->processor_set;
420 spl_t s = splsched();
421 pset_lock(pset);
422 if (!pset->is_SMT) {
423 pset->is_SMT = true;
424 }
425 bit_clear(pset->primary_map, processor->cpu_id);
426 pset_unlock(pset);
427 splx(s);
428 }
429 }
430 #endif /* CONFIG_SCHED_SMT */
431
432 processor_set_t
processor_pset(processor_t processor)433 processor_pset(
434 processor_t processor)
435 {
436 return processor->processor_set;
437 }
438
439 #if CONFIG_SCHED_EDGE
440
441 /* Returns the scheduling type for the pset */
442 cluster_type_t
pset_type_for_id(uint32_t cluster_id)443 pset_type_for_id(uint32_t cluster_id)
444 {
445 return pset_array[cluster_id]->pset_type;
446 }
447
448 /*
449 * Processor foreign threads
450 *
451 * With the Edge scheduler, each pset maintains a bitmap of processors running threads
452 * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
453 * if its of a different type than its preferred cluster type (E/P). The bitmap should
454 * be updated every time a new thread is assigned to run on a processor. Cluster shared
455 * resource intensive threads are also not counted as foreign threads since these
456 * threads should not be rebalanced when running on non-preferred clusters.
457 *
458 * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
459 * for rebalancing.
460 */
461 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)462 processor_state_update_running_foreign(processor_t processor, thread_t thread)
463 {
464 cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
465 cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
466
467 boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
468 boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
469 if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
470 bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
471 } else {
472 bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
473 }
474 }
475
476 /*
477 * Cluster shared resource intensive threads
478 *
479 * With the Edge scheduler, each pset maintains a bitmap of processors running
480 * threads that are shared resource intensive. This per-thread property is set
481 * by the performance controller or explicitly via dispatch SPIs. The bitmap
482 * allows the Edge scheduler to calculate the cluster shared resource load on
483 * any given cluster and load balance intensive threads accordingly.
484 */
485 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)486 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
487 {
488 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
489 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
490 } else {
491 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
492 }
493 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
494 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
495 } else {
496 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
497 }
498 }
499
500 #endif /* CONFIG_SCHED_EDGE */
501
502 void
processor_state_update_idle(processor_t processor)503 processor_state_update_idle(processor_t processor)
504 {
505 processor->current_pri = IDLEPRI;
506 processor->current_sfi_class = SFI_CLASS_KERNEL;
507 processor->current_recommended_pset_type = PSET_SMP;
508 #if CONFIG_THREAD_GROUPS
509 processor->current_thread_group = NULL;
510 #endif
511 processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
512 processor->current_urgency = THREAD_URGENCY_NONE;
513 #if CONFIG_SCHED_SMT
514 processor->current_is_NO_SMT = false;
515 #endif /* CONFIG_SCHED_SMT */
516 processor->current_is_bound = false;
517 processor->current_is_eagerpreempt = false;
518 #if CONFIG_SCHED_EDGE
519 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
520 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
521 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
522 #endif /* CONFIG_SCHED_EDGE */
523 sched_update_pset_load_average(processor->processor_set, 0);
524 }
525
526 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)527 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
528 {
529 processor->current_pri = thread->sched_pri;
530 processor->current_sfi_class = thread->sfi_class;
531 processor->current_recommended_pset_type = recommended_pset_type(thread);
532 #if CONFIG_SCHED_EDGE
533 processor_state_update_running_foreign(processor, thread);
534 processor_state_update_running_cluster_shared_rsrc(processor, thread);
535 /* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
536 sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
537 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
538 #endif /* CONFIG_SCHED_EDGE */
539
540 #if CONFIG_THREAD_GROUPS
541 processor->current_thread_group = thread_group_get(thread);
542 #endif
543 processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
544 processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
545 #if CONFIG_SCHED_SMT
546 processor->current_is_NO_SMT = thread_no_smt(thread);
547 #endif /* CONFIG_SCHED_SMT */
548 processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
549 processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
550 if (pset_lock_held) {
551 /* Only update the pset load average when the pset lock is held */
552 sched_update_pset_load_average(processor->processor_set, 0);
553 }
554 }
555
556 pset_node_t
pset_node_root(void)557 pset_node_root(void)
558 {
559 return &pset_node0;
560 }
561
562 LCK_GRP_DECLARE(pset_create_grp, "pset_create");
563 LCK_MTX_DECLARE(pset_create_lock, &pset_create_grp);
564
565 processor_set_t
pset_create(pset_node_t node,pset_cluster_type_t pset_type,uint32_t pset_cluster_id,int pset_id)566 pset_create(
567 pset_node_t node,
568 pset_cluster_type_t pset_type,
569 uint32_t pset_cluster_id,
570 int pset_id)
571 {
572 /* some schedulers do not support multiple psets */
573 if (SCHED(multiple_psets_enabled) == FALSE) {
574 return processor_pset(master_processor);
575 }
576
577 processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
578
579 if (pset != PROCESSOR_SET_NULL) {
580 pset->pset_cluster_type = pset_type;
581 pset->pset_cluster_id = pset_cluster_id;
582 pset->pset_id = pset_id;
583 pset_init(pset, node);
584
585 lck_spin_lock(&pset_node_lock);
586
587 prev = &node->psets;
588 while (*prev != PROCESSOR_SET_NULL) {
589 prev = &(*prev)->pset_list;
590 }
591
592 *prev = pset;
593
594 lck_spin_unlock(&pset_node_lock);
595 }
596
597 return pset;
598 }
599
600 /*
601 * Find processor set with specified cluster_id.
602 * Returns default_pset if not found.
603 */
604 processor_set_t
pset_find(uint32_t cluster_id,processor_set_t default_pset)605 pset_find(
606 uint32_t cluster_id,
607 processor_set_t default_pset)
608 {
609 lck_spin_lock(&pset_node_lock);
610 pset_node_t node = &pset_node0;
611 processor_set_t pset = NULL;
612
613 do {
614 pset = node->psets;
615 while (pset != NULL) {
616 if (pset->pset_cluster_id == cluster_id) {
617 break;
618 }
619 pset = pset->pset_list;
620 }
621 } while (pset == NULL && (node = node->node_list) != NULL);
622 lck_spin_unlock(&pset_node_lock);
623 if (pset == NULL) {
624 return default_pset;
625 }
626 return pset;
627 }
628
629 /*
630 * Initialize the given processor_set structure.
631 */
632 void
pset_init(processor_set_t pset,pset_node_t node)633 pset_init(
634 processor_set_t pset,
635 pset_node_t node)
636 {
637 pset->online_processor_count = 0;
638 #if CONFIG_SCHED_EDGE
639 bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
640 bzero(&pset->pset_runnable_depth, sizeof(pset->pset_runnable_depth));
641 #else /* !CONFIG_SCHED_EDGE */
642 pset->load_average = 0;
643 #endif /* CONFIG_SCHED_EDGE */
644 pset->cpu_set_low = pset->cpu_set_hi = 0;
645 pset->cpu_set_count = 0;
646 pset->last_chosen = -1;
647 pset->cpu_bitmask = 0;
648 pset->recommended_bitmask = 0;
649 #if CONFIG_SCHED_SMT
650 pset->primary_map = 0;
651 #endif /* CONFIG_SCHED_SMT */
652 pset->realtime_map = 0;
653 pset->cpu_available_map = 0;
654
655 for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
656 pset->cpu_state_map[i] = 0;
657 }
658 pset->pending_AST_URGENT_cpu_mask = 0;
659 pset->pending_AST_PREEMPT_cpu_mask = 0;
660 #if defined(CONFIG_SCHED_DEFERRED_AST)
661 pset->pending_deferred_AST_cpu_mask = 0;
662 #endif
663 pset->pending_spill_cpu_mask = 0;
664 pset->rt_pending_spill_cpu_mask = 0;
665 pset_lock_init(pset);
666 pset->pset_self = IP_NULL;
667 pset->pset_name_self = IP_NULL;
668 pset->pset_list = PROCESSOR_SET_NULL;
669 #if CONFIG_SCHED_SMT
670 pset->is_SMT = false;
671 #endif /* CONFIG_SCHED_SMT */
672 #if CONFIG_SCHED_EDGE
673 bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
674 pset->cpu_running_foreign = 0;
675 for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
676 pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
677 pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
678 }
679 #endif /* CONFIG_SCHED_EDGE */
680
681 /*
682 * No initial preferences or forced migrations, so use the least numbered
683 * available idle core when picking amongst idle cores in a cluster.
684 */
685 pset->perfcontrol_cpu_preferred_bitmask = 0;
686 pset->perfcontrol_cpu_migration_bitmask = 0;
687 pset->cpu_preferred_last_chosen = -1;
688
689 pset->stealable_rt_threads_earliest_deadline = UINT64_MAX;
690
691 if (pset != &pset0) {
692 /*
693 * Scheduler runqueue initialization for non-boot psets.
694 * This initialization for pset0 happens in sched_init().
695 */
696 SCHED(pset_init)(pset);
697 SCHED(rt_init)(pset);
698 }
699
700 /*
701 * Because the pset_node_lock is not taken by every client of the pset_map,
702 * we need to make sure that the initialized pset contents are visible to any
703 * client that loads a non-NULL value from pset_array.
704 */
705 os_atomic_store(&pset_array[pset->pset_id], pset, release);
706
707 lck_spin_lock(&pset_node_lock);
708 bit_set(node->pset_map, pset->pset_id);
709 pset->node = node;
710 lck_spin_unlock(&pset_node_lock);
711 }
712
713 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)714 processor_info_count(
715 processor_flavor_t flavor,
716 mach_msg_type_number_t *count)
717 {
718 switch (flavor) {
719 case PROCESSOR_BASIC_INFO:
720 *count = PROCESSOR_BASIC_INFO_COUNT;
721 break;
722
723 case PROCESSOR_CPU_LOAD_INFO:
724 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
725 break;
726
727 default:
728 return cpu_info_count(flavor, count);
729 }
730
731 return KERN_SUCCESS;
732 }
733
734 void
processor_cpu_load_info(processor_t processor,natural_t ticks[static CPU_STATE_MAX])735 processor_cpu_load_info(processor_t processor,
736 natural_t ticks[static CPU_STATE_MAX])
737 {
738 struct recount_usage usage = { 0 };
739 uint64_t idle_time = 0;
740 recount_processor_usage(&processor->pr_recount, &usage, &idle_time);
741
742 ticks[CPU_STATE_USER] += (uint32_t)(usage.ru_metrics[RCT_LVL_USER].rm_time_mach /
743 hz_tick_interval);
744 ticks[CPU_STATE_SYSTEM] += (uint32_t)(
745 recount_usage_system_time_mach(&usage) / hz_tick_interval);
746 ticks[CPU_STATE_IDLE] += (uint32_t)(idle_time / hz_tick_interval);
747 }
748
749 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)750 processor_info(
751 processor_t processor,
752 processor_flavor_t flavor,
753 host_t *host,
754 processor_info_t info,
755 mach_msg_type_number_t *count)
756 {
757 int cpu_id, state;
758 kern_return_t result;
759
760 if (processor == PROCESSOR_NULL) {
761 return KERN_INVALID_ARGUMENT;
762 }
763
764 cpu_id = processor->cpu_id;
765
766 switch (flavor) {
767 case PROCESSOR_BASIC_INFO:
768 {
769 processor_basic_info_t basic_info;
770
771 if (*count < PROCESSOR_BASIC_INFO_COUNT) {
772 return KERN_FAILURE;
773 }
774
775 basic_info = (processor_basic_info_t) info;
776 basic_info->cpu_type = slot_type(cpu_id);
777 basic_info->cpu_subtype = slot_subtype(cpu_id);
778 state = processor->state;
779 if (((state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) && !processor->shutdown_temporary)
780 #if defined(__x86_64__)
781 || !processor->is_recommended
782 #endif
783 ) {
784 basic_info->running = FALSE;
785 } else {
786 basic_info->running = TRUE;
787 }
788 basic_info->slot_num = cpu_id;
789 if (processor == master_processor) {
790 basic_info->is_master = TRUE;
791 } else {
792 basic_info->is_master = FALSE;
793 }
794
795 *count = PROCESSOR_BASIC_INFO_COUNT;
796 *host = &realhost;
797
798 return KERN_SUCCESS;
799 }
800
801 case PROCESSOR_CPU_LOAD_INFO:
802 {
803 processor_cpu_load_info_t cpu_load_info;
804
805 if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
806 return KERN_FAILURE;
807 }
808
809 cpu_load_info = (processor_cpu_load_info_t) info;
810
811 cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
812 cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
813 cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
814 processor_cpu_load_info(processor, cpu_load_info->cpu_ticks);
815 cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
816
817 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
818 *host = &realhost;
819
820 return KERN_SUCCESS;
821 }
822
823 default:
824 result = cpu_info(flavor, cpu_id, info, count);
825 if (result == KERN_SUCCESS) {
826 *host = &realhost;
827 }
828
829 return result;
830 }
831 }
832
833 /*
834 * Now that we're enforcing all CPUs actually boot, we may need a way to
835 * relax the timeout.
836 */
837 TUNABLE(uint32_t, cpu_boot_timeout_secs, "cpu_boot_timeout_secs", 1); /* seconds, default to 1 second */
838
839 static const char *
840 processor_start_panic_strings[] = {
841 [PROCESSOR_FIRST_BOOT] = "boot for the first time",
842 [PROCESSOR_BEFORE_ENTERING_SLEEP] = "come online while entering system sleep",
843 [PROCESSOR_WAKE_FROM_SLEEP] = "come online after returning from system sleep",
844 [PROCESSOR_CLUSTER_POWERDOWN_SUSPEND] = "come online while disabling cluster powerdown",
845 [PROCESSOR_CLUSTER_POWERDOWN_RESUME] = "come online before enabling cluster powerdown",
846 [PROCESSOR_POWERED_CORES_CHANGE] = "come online during dynamic cluster power state change",
847 };
848
849 void
processor_wait_for_start(processor_t processor,processor_start_kind_t start_kind)850 processor_wait_for_start(processor_t processor, processor_start_kind_t start_kind)
851 {
852 if (!processor->processor_booted) {
853 panic("processor_boot() missing for cpu %d", processor->cpu_id);
854 }
855
856 uint32_t boot_timeout_extended = cpu_boot_timeout_secs *
857 debug_cpu_performance_degradation_factor;
858
859 spl_t s = splsched();
860 simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
861 while (processor->processor_instartup) {
862 assert_wait_timeout((event_t)&processor->processor_instartup,
863 THREAD_UNINT, boot_timeout_extended, NSEC_PER_SEC);
864 simple_unlock(&processor_start_state_lock);
865 splx(s);
866
867 wait_result_t wait_result = thread_block(THREAD_CONTINUE_NULL);
868 if (wait_result == THREAD_TIMED_OUT) {
869 panic("cpu %d failed to %s, waited %d seconds\n",
870 processor->cpu_id,
871 processor_start_panic_strings[start_kind],
872 boot_timeout_extended);
873 }
874
875 s = splsched();
876 simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
877 }
878
879 if (processor->processor_inshutdown) {
880 panic("%s>cpu %d still in shutdown",
881 __func__, processor->cpu_id);
882 }
883
884 simple_unlock(&processor_start_state_lock);
885
886 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
887
888 if (!processor->processor_online) {
889 panic("%s>cpu %d not online",
890 __func__, processor->cpu_id);
891 }
892
893 if (processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED) {
894 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_RUNNING);
895 } else {
896 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_RUNNING);
897 }
898
899 simple_unlock(&sched_available_cores_lock);
900 splx(s);
901 }
902
903 LCK_GRP_DECLARE(processor_updown_grp, "processor_updown");
904 LCK_MTX_DECLARE(processor_updown_lock, &processor_updown_grp);
905
906 static void
processor_dostartup(processor_t processor,bool first_boot)907 processor_dostartup(
908 processor_t processor,
909 bool first_boot)
910 {
911 if (!processor->processor_booted && !first_boot) {
912 panic("processor %d not booted", processor->cpu_id);
913 }
914
915 lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
916 lck_mtx_assert(&processor_updown_lock, LCK_MTX_ASSERT_OWNED);
917
918 processor_set_t pset = processor->processor_set;
919
920 assert(processor->processor_self);
921
922 spl_t s = splsched();
923
924 simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
925 assert(processor->processor_inshutdown || first_boot);
926 processor->processor_inshutdown = false;
927 assert(processor->processor_instartup == false);
928 processor->processor_instartup = true;
929 simple_unlock(&processor_start_state_lock);
930
931 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
932
933 pset_lock(pset);
934
935 if (first_boot) {
936 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_NOT_BOOTED);
937 } else {
938 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_FULLY_OFFLINE);
939 }
940
941 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTING);
942
943 assert(processor->state == PROCESSOR_OFF_LINE);
944
945 pset_update_processor_state(pset, processor, PROCESSOR_START);
946 pset_unlock(pset);
947
948 simple_unlock(&sched_available_cores_lock);
949
950 splx(s);
951
952 ml_cpu_power_enable(processor->cpu_id);
953 ml_cpu_begin_state_transition(processor->cpu_id);
954 ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
955
956 cpu_start(processor->cpu_id);
957
958 s = splsched();
959 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
960
961 if (processor->processor_offline_state == PROCESSOR_OFFLINE_STARTING) {
962 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_STARTED_NOT_RUNNING);
963 } else {
964 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
965 }
966
967 simple_unlock(&sched_available_cores_lock);
968 splx(s);
969
970 ml_cpu_end_state_transition(processor->cpu_id);
971 /*
972 * Note: Because the actual wait-for-start happens sometime later,
973 * this races with processor_up calling CPU_BOOTED.
974 * To fix that, this should happen after the first wait for start
975 * confirms the CPU has booted.
976 */
977 ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
978 }
979
980 void
processor_exit_reason(processor_t processor,processor_reason_t reason,bool is_system_sleep)981 processor_exit_reason(processor_t processor, processor_reason_t reason, bool is_system_sleep)
982 {
983 assert(processor);
984 assert(processor->processor_set);
985
986 lck_mtx_lock(&processor_updown_lock);
987
988 if (sched_is_in_sleep()) {
989 assert(reason == REASON_SYSTEM);
990 }
991
992 assert((processor != master_processor) || (reason == REASON_SYSTEM) || support_bootcpu_shutdown);
993
994 processor->last_shutdown_reason = reason;
995
996 bool is_final_system_sleep = is_system_sleep && (processor == master_processor);
997
998 processor_doshutdown(processor, is_final_system_sleep);
999
1000 lck_mtx_unlock(&processor_updown_lock);
1001 }
1002
1003 /*
1004 * Called `processor_exit` in Unsupported KPI.
1005 * AppleARMCPU and AppleACPIPlatform call this in response to haltCPU().
1006 *
1007 * Behavior change: on both platforms, now xnu does the processor_sleep,
1008 * and ignores processor_exit calls from kexts.
1009 */
1010 kern_return_t
processor_exit_from_kext(__unused processor_t processor)1011 processor_exit_from_kext(
1012 __unused processor_t processor)
1013 {
1014 /* This is a no-op now. */
1015 return KERN_FAILURE;
1016 }
1017
1018 void
processor_sleep(processor_t processor)1019 processor_sleep(
1020 processor_t processor)
1021 {
1022 lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
1023
1024 processor_exit_reason(processor, REASON_SYSTEM, true);
1025 }
1026
1027 kern_return_t
processor_exit_from_user(processor_t processor)1028 processor_exit_from_user(
1029 processor_t processor)
1030 {
1031 if (processor == PROCESSOR_NULL) {
1032 return KERN_INVALID_ARGUMENT;
1033 }
1034
1035 kern_return_t result;
1036
1037 lck_mtx_lock(&cluster_powerdown_lock);
1038
1039 result = sched_processor_exit_user(processor);
1040
1041 lck_mtx_unlock(&cluster_powerdown_lock);
1042
1043 return result;
1044 }
1045
1046 void
processor_start_reason(processor_t processor,processor_reason_t reason)1047 processor_start_reason(processor_t processor, processor_reason_t reason)
1048 {
1049 lck_mtx_lock(&processor_updown_lock);
1050
1051 assert(processor);
1052 assert(processor->processor_set);
1053 assert(processor->processor_booted);
1054
1055 if (sched_is_in_sleep()) {
1056 assert(reason == REASON_SYSTEM);
1057 }
1058
1059 processor->last_startup_reason = reason;
1060
1061 processor_dostartup(processor, false);
1062
1063 lck_mtx_unlock(&processor_updown_lock);
1064 }
1065
1066 /*
1067 * Called `processor_start` in Unsupported KPI.
1068 * AppleARMCPU calls this to boot processors.
1069 * AppleACPIPlatform expects ml_processor_register to call processor_boot.
1070 *
1071 * Behavior change: now ml_processor_register also boots CPUs on ARM, and xnu
1072 * ignores processor_start calls from kexts.
1073 */
1074 kern_return_t
processor_start_from_kext(__unused processor_t processor)1075 processor_start_from_kext(
1076 __unused processor_t processor)
1077 {
1078 /* This is a no-op now. */
1079 return KERN_FAILURE;
1080 }
1081
1082 kern_return_t
processor_start_from_user(processor_t processor)1083 processor_start_from_user(
1084 processor_t processor)
1085 {
1086 if (processor == PROCESSOR_NULL) {
1087 return KERN_INVALID_ARGUMENT;
1088 }
1089
1090 kern_return_t result;
1091
1092 lck_mtx_lock(&cluster_powerdown_lock);
1093
1094 result = sched_processor_start_user(processor);
1095
1096 lck_mtx_unlock(&cluster_powerdown_lock);
1097
1098 return result;
1099 }
1100
1101 /*
1102 * Boot up a processor for the first time.
1103 *
1104 * This will also be called against the main processor during system boot,
1105 * even though it's already running.
1106 */
1107 void
processor_boot(processor_t processor)1108 processor_boot(
1109 processor_t processor)
1110 {
1111 lck_mtx_lock(&cluster_powerdown_lock);
1112 lck_mtx_lock(&processor_updown_lock);
1113
1114 assert(!sched_is_in_sleep());
1115 assert(!sched_is_cpu_init_completed());
1116
1117 if (processor->processor_booted) {
1118 panic("processor %d already booted", processor->cpu_id);
1119 }
1120
1121 if (processor == master_processor) {
1122 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
1123 } else {
1124 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_NOT_BOOTED);
1125 }
1126
1127 /*
1128 * Create the idle processor thread.
1129 */
1130 if (processor->idle_thread == THREAD_NULL) {
1131 idle_thread_create(processor, processor_start_thread);
1132 }
1133
1134 if (processor->processor_self == IP_NULL) {
1135 ipc_processor_init(processor);
1136 }
1137
1138 if (processor == master_processor) {
1139 processor->last_startup_reason = REASON_SYSTEM;
1140
1141 ml_cpu_power_enable(processor->cpu_id);
1142
1143 processor_t prev = thread_bind(processor);
1144 thread_block(THREAD_CONTINUE_NULL);
1145
1146 cpu_start(processor->cpu_id);
1147
1148 assert(processor->state == PROCESSOR_RUNNING);
1149 processor_update_offline_state(processor, PROCESSOR_OFFLINE_RUNNING);
1150
1151 thread_bind(prev);
1152 } else {
1153 processor->last_startup_reason = REASON_SYSTEM;
1154
1155 /*
1156 * We don't wait for startup to finish, so all CPUs can start
1157 * in parallel.
1158 */
1159 processor_dostartup(processor, true);
1160 }
1161
1162 processor->processor_booted = true;
1163
1164 lck_mtx_unlock(&processor_updown_lock);
1165 lck_mtx_unlock(&cluster_powerdown_lock);
1166 }
1167
1168 /*
1169 * Wake a previously booted processor from a temporarily powered off state.
1170 */
1171 void
processor_wake(processor_t processor)1172 processor_wake(
1173 processor_t processor)
1174 {
1175 lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
1176
1177 assert(processor->processor_booted);
1178 processor_start_reason(processor, REASON_SYSTEM);
1179 }
1180
1181 #if CONFIG_SCHED_SMT
1182 kern_return_t
enable_smt_processors(bool enable)1183 enable_smt_processors(bool enable)
1184 {
1185 if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
1186 /* Not an SMT system */
1187 return KERN_INVALID_ARGUMENT;
1188 }
1189
1190 int ncpus = machine_info.logical_cpu_max;
1191
1192 for (int i = 1; i < ncpus; i++) {
1193 processor_t processor = processor_array[i];
1194
1195 if (processor->processor_primary != processor) {
1196 if (enable) {
1197 processor_start_from_user(processor);
1198 } else { /* Disable */
1199 processor_exit_from_user(processor);
1200 }
1201 }
1202 }
1203
1204 #define BSD_HOST 1
1205 host_basic_info_data_t hinfo;
1206 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1207 kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
1208 if (kret != KERN_SUCCESS) {
1209 return kret;
1210 }
1211
1212 if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
1213 return KERN_FAILURE;
1214 }
1215
1216 if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
1217 return KERN_FAILURE;
1218 }
1219
1220 return KERN_SUCCESS;
1221 }
1222 #endif /* CONFIG_SCHED_SMT */
1223
1224 bool
processor_should_kprintf(processor_t processor,bool starting)1225 processor_should_kprintf(processor_t processor, bool starting)
1226 {
1227 processor_reason_t reason = starting ? processor->last_startup_reason : processor->last_shutdown_reason;
1228
1229 return reason != REASON_CLPC_SYSTEM;
1230 }
1231
1232 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1233 processor_control(
1234 processor_t processor,
1235 processor_info_t info,
1236 mach_msg_type_number_t count)
1237 {
1238 if (processor == PROCESSOR_NULL) {
1239 return KERN_INVALID_ARGUMENT;
1240 }
1241
1242 return cpu_control(processor->cpu_id, info, count);
1243 }
1244
1245 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1246 processor_get_assignment(
1247 processor_t processor,
1248 processor_set_t *pset)
1249 {
1250 int state;
1251
1252 if (processor == PROCESSOR_NULL) {
1253 return KERN_INVALID_ARGUMENT;
1254 }
1255
1256 state = processor->state;
1257 if (state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) {
1258 return KERN_FAILURE;
1259 }
1260
1261 *pset = &pset0;
1262
1263 return KERN_SUCCESS;
1264 }
1265
1266 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1267 processor_set_info(
1268 processor_set_t pset,
1269 int flavor,
1270 host_t *host,
1271 processor_set_info_t info,
1272 mach_msg_type_number_t *count)
1273 {
1274 if (pset == PROCESSOR_SET_NULL) {
1275 return KERN_INVALID_ARGUMENT;
1276 }
1277
1278 if (flavor == PROCESSOR_SET_BASIC_INFO) {
1279 processor_set_basic_info_t basic_info;
1280
1281 if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1282 return KERN_FAILURE;
1283 }
1284
1285 basic_info = (processor_set_basic_info_t) info;
1286 #if defined(__x86_64__)
1287 basic_info->processor_count = processor_avail_count_user;
1288 #else
1289 basic_info->processor_count = processor_avail_count;
1290 #endif
1291 basic_info->default_policy = POLICY_TIMESHARE;
1292
1293 *count = PROCESSOR_SET_BASIC_INFO_COUNT;
1294 *host = &realhost;
1295 return KERN_SUCCESS;
1296 } else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1297 policy_timeshare_base_t ts_base;
1298
1299 if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1300 return KERN_FAILURE;
1301 }
1302
1303 ts_base = (policy_timeshare_base_t) info;
1304 ts_base->base_priority = BASEPRI_DEFAULT;
1305
1306 *count = POLICY_TIMESHARE_BASE_COUNT;
1307 *host = &realhost;
1308 return KERN_SUCCESS;
1309 } else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1310 policy_fifo_base_t fifo_base;
1311
1312 if (*count < POLICY_FIFO_BASE_COUNT) {
1313 return KERN_FAILURE;
1314 }
1315
1316 fifo_base = (policy_fifo_base_t) info;
1317 fifo_base->base_priority = BASEPRI_DEFAULT;
1318
1319 *count = POLICY_FIFO_BASE_COUNT;
1320 *host = &realhost;
1321 return KERN_SUCCESS;
1322 } else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1323 policy_rr_base_t rr_base;
1324
1325 if (*count < POLICY_RR_BASE_COUNT) {
1326 return KERN_FAILURE;
1327 }
1328
1329 rr_base = (policy_rr_base_t) info;
1330 rr_base->base_priority = BASEPRI_DEFAULT;
1331 rr_base->quantum = 1;
1332
1333 *count = POLICY_RR_BASE_COUNT;
1334 *host = &realhost;
1335 return KERN_SUCCESS;
1336 } else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1337 policy_timeshare_limit_t ts_limit;
1338
1339 if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1340 return KERN_FAILURE;
1341 }
1342
1343 ts_limit = (policy_timeshare_limit_t) info;
1344 ts_limit->max_priority = MAXPRI_KERNEL;
1345
1346 *count = POLICY_TIMESHARE_LIMIT_COUNT;
1347 *host = &realhost;
1348 return KERN_SUCCESS;
1349 } else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1350 policy_fifo_limit_t fifo_limit;
1351
1352 if (*count < POLICY_FIFO_LIMIT_COUNT) {
1353 return KERN_FAILURE;
1354 }
1355
1356 fifo_limit = (policy_fifo_limit_t) info;
1357 fifo_limit->max_priority = MAXPRI_KERNEL;
1358
1359 *count = POLICY_FIFO_LIMIT_COUNT;
1360 *host = &realhost;
1361 return KERN_SUCCESS;
1362 } else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1363 policy_rr_limit_t rr_limit;
1364
1365 if (*count < POLICY_RR_LIMIT_COUNT) {
1366 return KERN_FAILURE;
1367 }
1368
1369 rr_limit = (policy_rr_limit_t) info;
1370 rr_limit->max_priority = MAXPRI_KERNEL;
1371
1372 *count = POLICY_RR_LIMIT_COUNT;
1373 *host = &realhost;
1374 return KERN_SUCCESS;
1375 } else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1376 int *enabled;
1377
1378 if (*count < (sizeof(*enabled) / sizeof(int))) {
1379 return KERN_FAILURE;
1380 }
1381
1382 enabled = (int *) info;
1383 *enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1384
1385 *count = sizeof(*enabled) / sizeof(int);
1386 *host = &realhost;
1387 return KERN_SUCCESS;
1388 }
1389
1390
1391 *host = HOST_NULL;
1392 return KERN_INVALID_ARGUMENT;
1393 }
1394
1395 /*
1396 * processor_set_statistics
1397 *
1398 * Returns scheduling statistics for a processor set.
1399 */
1400 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1401 processor_set_statistics(
1402 processor_set_t pset,
1403 int flavor,
1404 processor_set_info_t info,
1405 mach_msg_type_number_t *count)
1406 {
1407 if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1408 return KERN_INVALID_PROCESSOR_SET;
1409 }
1410
1411 if (flavor == PROCESSOR_SET_LOAD_INFO) {
1412 processor_set_load_info_t load_info;
1413
1414 if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1415 return KERN_FAILURE;
1416 }
1417
1418 load_info = (processor_set_load_info_t) info;
1419
1420 load_info->mach_factor = sched_mach_factor;
1421 load_info->load_average = sched_load_average;
1422
1423 load_info->task_count = tasks_count;
1424 load_info->thread_count = threads_count;
1425
1426 *count = PROCESSOR_SET_LOAD_INFO_COUNT;
1427 return KERN_SUCCESS;
1428 }
1429
1430 return KERN_INVALID_ARGUMENT;
1431 }
1432
1433 /*
1434 * processor_set_things:
1435 *
1436 * Common internals for processor_set_{threads,tasks}
1437 */
1438 static kern_return_t
processor_set_things(processor_set_t pset,mach_port_array_t * thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1439 processor_set_things(
1440 processor_set_t pset,
1441 mach_port_array_t *thing_list,
1442 mach_msg_type_number_t *countp,
1443 int type,
1444 mach_task_flavor_t flavor)
1445 {
1446 unsigned int i;
1447 task_t task;
1448 thread_t thread;
1449
1450 mach_port_array_t task_addr;
1451 task_t *task_list;
1452 vm_size_t actual_tasks, task_count_cur, task_count_needed;
1453
1454 mach_port_array_t thread_addr;
1455 thread_t *thread_list;
1456 vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1457
1458 mach_port_array_t addr, newaddr;
1459 vm_size_t count, count_needed;
1460
1461 if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1462 return KERN_INVALID_ARGUMENT;
1463 }
1464
1465 task_count_cur = 0;
1466 task_count_needed = 0;
1467 task_list = NULL;
1468 task_addr = NULL;
1469 actual_tasks = 0;
1470
1471 thread_count_cur = 0;
1472 thread_count_needed = 0;
1473 thread_list = NULL;
1474 thread_addr = NULL;
1475 actual_threads = 0;
1476
1477 for (;;) {
1478 lck_mtx_lock(&tasks_threads_lock);
1479
1480 /* do we have the memory we need? */
1481 if (type == PSET_THING_THREAD) {
1482 thread_count_needed = threads_count;
1483 }
1484 #if !CONFIG_MACF
1485 else
1486 #endif
1487 task_count_needed = tasks_count;
1488
1489 if (task_count_needed <= task_count_cur &&
1490 thread_count_needed <= thread_count_cur) {
1491 break;
1492 }
1493
1494 /* unlock and allocate more memory */
1495 lck_mtx_unlock(&tasks_threads_lock);
1496
1497 /* grow task array */
1498 if (task_count_needed > task_count_cur) {
1499 mach_port_array_free(task_addr, task_count_cur);
1500 assert(task_count_needed > 0);
1501 task_count_cur = task_count_needed;
1502
1503 task_addr = mach_port_array_alloc(task_count_cur,
1504 Z_WAITOK | Z_ZERO);
1505 if (task_addr == NULL) {
1506 mach_port_array_free(thread_addr, thread_count_cur);
1507 return KERN_RESOURCE_SHORTAGE;
1508 }
1509 task_list = (task_t *)task_addr;
1510 }
1511
1512 /* grow thread array */
1513 if (thread_count_needed > thread_count_cur) {
1514 mach_port_array_free(thread_addr, thread_count_cur);
1515 assert(thread_count_needed > 0);
1516 thread_count_cur = thread_count_needed;
1517
1518 thread_addr = mach_port_array_alloc(thread_count_cur,
1519 Z_WAITOK | Z_ZERO);
1520 if (thread_addr == NULL) {
1521 mach_port_array_free(task_addr, task_count_cur);
1522 return KERN_RESOURCE_SHORTAGE;
1523 }
1524 thread_list = (thread_t *)thread_addr;
1525 }
1526 }
1527
1528 /* OK, have memory and the list locked */
1529
1530 /* If we need it, get the thread list */
1531 if (type == PSET_THING_THREAD) {
1532 queue_iterate(&threads, thread, thread_t, threads) {
1533 task = get_threadtask(thread);
1534 #if defined(SECURE_KERNEL)
1535 if (task == kernel_task) {
1536 /* skip threads belonging to kernel_task */
1537 continue;
1538 }
1539 #endif
1540 if (!task->ipc_active || task_is_exec_copy(task)) {
1541 /* skip threads in inactive tasks (in the middle of exec/fork/spawn) */
1542 continue;
1543 }
1544
1545 thread_reference(thread);
1546 thread_list[actual_threads++] = thread;
1547 }
1548 }
1549 #if !CONFIG_MACF
1550 else
1551 #endif
1552 {
1553 /* get a list of the tasks */
1554 queue_iterate(&tasks, task, task_t, tasks) {
1555 #if defined(SECURE_KERNEL)
1556 if (task == kernel_task) {
1557 /* skip kernel_task */
1558 continue;
1559 }
1560 #endif
1561 if (!task->ipc_active || task_is_exec_copy(task)) {
1562 /* skip inactive tasks (in the middle of exec/fork/spawn) */
1563 continue;
1564 }
1565
1566 task_reference(task);
1567 task_list[actual_tasks++] = task;
1568 }
1569 }
1570
1571 lck_mtx_unlock(&tasks_threads_lock);
1572
1573 #if CONFIG_MACF
1574 unsigned int j, used;
1575
1576 /* for each task, make sure we are allowed to examine it */
1577 for (i = used = 0; i < actual_tasks; i++) {
1578 if (mac_task_check_expose_task(task_list[i], flavor)) {
1579 task_deallocate(task_list[i]);
1580 continue;
1581 }
1582 task_list[used++] = task_list[i];
1583 }
1584 actual_tasks = used;
1585 task_count_needed = actual_tasks;
1586
1587 if (type == PSET_THING_THREAD) {
1588 /* for each thread (if any), make sure it's task is in the allowed list */
1589 for (i = used = 0; i < actual_threads; i++) {
1590 boolean_t found_task = FALSE;
1591
1592 task = get_threadtask(thread_list[i]);
1593 for (j = 0; j < actual_tasks; j++) {
1594 if (task_list[j] == task) {
1595 found_task = TRUE;
1596 break;
1597 }
1598 }
1599 if (found_task) {
1600 thread_list[used++] = thread_list[i];
1601 } else {
1602 thread_deallocate(thread_list[i]);
1603 }
1604 }
1605 actual_threads = used;
1606 thread_count_needed = actual_threads;
1607
1608 /* done with the task list */
1609 for (i = 0; i < actual_tasks; i++) {
1610 task_deallocate(task_list[i]);
1611 }
1612 mach_port_array_free(task_addr, task_count_cur);
1613 task_list = NULL;
1614 task_count_cur = 0;
1615 actual_tasks = 0;
1616 }
1617 #endif
1618
1619 if (type == PSET_THING_THREAD) {
1620 if (actual_threads == 0) {
1621 /* no threads available to return */
1622 assert(task_count_cur == 0);
1623 mach_port_array_free(thread_addr, thread_count_cur);
1624 thread_list = NULL;
1625 *thing_list = NULL;
1626 *countp = 0;
1627 return KERN_SUCCESS;
1628 }
1629 count_needed = actual_threads;
1630 count = thread_count_cur;
1631 addr = thread_addr;
1632 } else {
1633 if (actual_tasks == 0) {
1634 /* no tasks available to return */
1635 assert(thread_count_cur == 0);
1636 mach_port_array_free(task_addr, task_count_cur);
1637 *thing_list = NULL;
1638 *countp = 0;
1639 return KERN_SUCCESS;
1640 }
1641 count_needed = actual_tasks;
1642 count = task_count_cur;
1643 addr = task_addr;
1644 }
1645
1646 /* if we allocated too much, must copy */
1647 if (count_needed < count) {
1648 newaddr = mach_port_array_alloc(count_needed, Z_WAITOK | Z_ZERO);
1649 if (newaddr == NULL) {
1650 for (i = 0; i < actual_tasks; i++) {
1651 if (type == PSET_THING_THREAD) {
1652 thread_deallocate(thread_list[i]);
1653 } else {
1654 task_deallocate(task_list[i]);
1655 }
1656 }
1657 mach_port_array_free(addr, count);
1658 return KERN_RESOURCE_SHORTAGE;
1659 }
1660
1661 bcopy(addr, newaddr, count_needed * sizeof(void *));
1662 mach_port_array_free(addr, count);
1663
1664 addr = newaddr;
1665 count = count_needed;
1666 }
1667
1668 *thing_list = addr;
1669 *countp = (mach_msg_type_number_t)count;
1670
1671 return KERN_SUCCESS;
1672 }
1673
1674 /*
1675 * processor_set_tasks:
1676 *
1677 * List all tasks in the processor set.
1678 */
1679 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1680 processor_set_tasks_internal(
1681 processor_set_t pset,
1682 task_array_t *task_list,
1683 mach_msg_type_number_t *count,
1684 mach_task_flavor_t flavor)
1685 {
1686 kern_return_t ret;
1687
1688 ret = processor_set_things(pset, task_list, count, PSET_THING_TASK, flavor);
1689 if (ret != KERN_SUCCESS) {
1690 return ret;
1691 }
1692
1693 /* do the conversion that Mig should handle */
1694 convert_task_array_to_ports(*task_list, *count, flavor);
1695 return KERN_SUCCESS;
1696 }
1697
1698 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1699 processor_set_tasks(
1700 processor_set_t pset,
1701 task_array_t *task_list,
1702 mach_msg_type_number_t *count)
1703 {
1704 return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1705 }
1706
1707 /*
1708 * processor_set_tasks_with_flavor:
1709 *
1710 * Based on flavor, return task/inspect/read port to all tasks in the processor set.
1711 */
1712 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1713 processor_set_tasks_with_flavor(
1714 processor_set_t pset,
1715 mach_task_flavor_t flavor,
1716 task_array_t *task_list,
1717 mach_msg_type_number_t *count)
1718 {
1719 switch (flavor) {
1720 case TASK_FLAVOR_CONTROL:
1721 case TASK_FLAVOR_READ:
1722 case TASK_FLAVOR_INSPECT:
1723 case TASK_FLAVOR_NAME:
1724 return processor_set_tasks_internal(pset, task_list, count, flavor);
1725 default:
1726 return KERN_INVALID_ARGUMENT;
1727 }
1728 }
1729
1730 /*
1731 * processor_set_threads:
1732 *
1733 * List all threads in the processor set.
1734 */
1735 #if defined(SECURE_KERNEL)
1736 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_act_array_t * thread_list,__unused mach_msg_type_number_t * count)1737 processor_set_threads(
1738 __unused processor_set_t pset,
1739 __unused thread_act_array_t *thread_list,
1740 __unused mach_msg_type_number_t *count)
1741 {
1742 return KERN_FAILURE;
1743 }
1744 #elif !defined(XNU_TARGET_OS_OSX)
1745 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_act_array_t * thread_list,__unused mach_msg_type_number_t * count)1746 processor_set_threads(
1747 __unused processor_set_t pset,
1748 __unused thread_act_array_t *thread_list,
1749 __unused mach_msg_type_number_t *count)
1750 {
1751 return KERN_NOT_SUPPORTED;
1752 }
1753 #else
1754 kern_return_t
processor_set_threads(processor_set_t pset,thread_act_array_t * thread_list,mach_msg_type_number_t * count)1755 processor_set_threads(
1756 processor_set_t pset,
1757 thread_act_array_t *thread_list,
1758 mach_msg_type_number_t *count)
1759 {
1760 kern_return_t ret;
1761
1762 ret = processor_set_things(pset, thread_list, count,
1763 PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1764 if (ret != KERN_SUCCESS) {
1765 return ret;
1766 }
1767
1768 /* do the conversion that Mig should handle */
1769 convert_thread_array_to_ports(*thread_list, *count, TASK_FLAVOR_CONTROL);
1770 return KERN_SUCCESS;
1771 }
1772 #endif
1773
1774 pset_cluster_type_t
recommended_pset_type(thread_t thread)1775 recommended_pset_type(thread_t thread)
1776 {
1777 /* Only used by the AMP scheduler policy */
1778 #if CONFIG_THREAD_GROUPS && __AMP__ && !CONFIG_SCHED_EDGE
1779 if (thread == THREAD_NULL) {
1780 return PSET_AMP_E;
1781 }
1782
1783 #if DEVELOPMENT || DEBUG
1784 extern bool system_ecore_only;
1785 extern int enable_task_set_cluster_type;
1786 task_t task = get_threadtask(thread);
1787 if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1788 processor_set_t pset_hint = task->pset_hint;
1789 if (pset_hint) {
1790 return pset_hint->pset_cluster_type;
1791 }
1792 }
1793
1794 if (system_ecore_only) {
1795 return PSET_AMP_E;
1796 }
1797 #endif
1798
1799 if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1800 return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1801 }
1802
1803 if (thread->base_pri <= MAXPRI_THROTTLE) {
1804 if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1805 return PSET_AMP_E;
1806 }
1807 } else if (thread->base_pri <= BASEPRI_UTILITY) {
1808 if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1809 return PSET_AMP_E;
1810 }
1811 }
1812
1813 struct thread_group *tg = thread_group_get(thread);
1814 cluster_type_t recommendation = thread_group_recommendation(tg);
1815 switch (recommendation) {
1816 case CLUSTER_TYPE_SMP:
1817 default:
1818 if (get_threadtask(thread) == kernel_task) {
1819 return PSET_AMP_E;
1820 }
1821 return PSET_AMP_P;
1822 case CLUSTER_TYPE_E:
1823 return PSET_AMP_E;
1824 case CLUSTER_TYPE_P:
1825 return PSET_AMP_P;
1826 }
1827 #else /* !CONFIG_THREAD_GROUPS || !__AMP__ || CONFIG_SCHED_EDGE */
1828 (void)thread;
1829 return PSET_SMP;
1830 #endif /* !CONFIG_THREAD_GROUPS || !__AMP__ || CONFIG_SCHED_EDGE */
1831 }
1832
1833 #if __arm64__
1834
1835 pset_cluster_type_t
cluster_type_to_pset_cluster_type(cluster_type_t cluster_type)1836 cluster_type_to_pset_cluster_type(cluster_type_t cluster_type)
1837 {
1838 switch (cluster_type) {
1839 #if __AMP__
1840 case CLUSTER_TYPE_E:
1841 return PSET_AMP_E;
1842 case CLUSTER_TYPE_P:
1843 return PSET_AMP_P;
1844 #endif /* __AMP__ */
1845 case CLUSTER_TYPE_SMP:
1846 return PSET_SMP;
1847 default:
1848 panic("Unexpected cluster type %d", cluster_type);
1849 }
1850 }
1851
1852 pset_node_t
cluster_type_to_pset_node(cluster_type_t cluster_type)1853 cluster_type_to_pset_node(cluster_type_t cluster_type)
1854 {
1855 switch (cluster_type) {
1856 #if __AMP__
1857 case CLUSTER_TYPE_E:
1858 return ecore_node;
1859 case CLUSTER_TYPE_P:
1860 return pcore_node;
1861 #endif /* __AMP__ */
1862 case CLUSTER_TYPE_SMP:
1863 return &pset_node0;
1864 default:
1865 panic("Unexpected cluster type %d", cluster_type);
1866 }
1867 }
1868
1869 #endif /* __arm64__ */
1870
1871 #if CONFIG_THREAD_GROUPS && __AMP__ && !CONFIG_SCHED_EDGE
1872
1873 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)1874 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1875 {
1876 sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1877
1878 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1879
1880 switch (perfctl_class) {
1881 case PERFCONTROL_CLASS_UTILITY:
1882 os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1883 break;
1884 case PERFCONTROL_CLASS_BACKGROUND:
1885 os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1886 break;
1887 default:
1888 panic("perfctl_class invalid");
1889 break;
1890 }
1891 }
1892
1893 #elif defined(__arm64__)
1894
1895 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1896 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)1897 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1898 {
1899 }
1900
1901 #endif /* defined(__arm64__) */
1902