1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * processor.c: processor and processor_set manipulation routines.
61 */
62
63 #include <mach/boolean.h>
64 #include <mach/policy.h>
65 #include <mach/processor.h>
66 #include <mach/processor_info.h>
67 #include <mach/vm_param.h>
68 #include <kern/cpu_number.h>
69 #include <kern/host.h>
70 #include <kern/ipc_host.h>
71 #include <kern/ipc_tt.h>
72 #include <kern/kalloc.h>
73 #include <kern/machine.h>
74 #include <kern/misc_protos.h>
75 #include <kern/processor.h>
76 #include <kern/sched.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/timer.h>
80 #if KPERF
81 #include <kperf/kperf.h>
82 #endif /* KPERF */
83 #include <ipc/ipc_port.h>
84
85 #include <security/mac_mach_internal.h>
86
87 #if defined(CONFIG_XNUPOST)
88
89 #include <tests/xnupost.h>
90
91 #endif /* CONFIG_XNUPOST */
92
93 /*
94 * Exported interface
95 */
96 #include <mach/mach_host_server.h>
97 #include <mach/processor_set_server.h>
98 #include <san/kcov.h>
99
100 /*
101 * The first pset and the pset_node are created by default for all platforms.
102 * Those typically represent the boot-cluster. For AMP platforms, all clusters
103 * of the same type are part of the same pset_node. This allows for easier
104 * CPU selection logic.
105 */
106 struct processor_set pset0;
107 struct pset_node pset_node0;
108
109 #if __AMP__
110 struct pset_node pset_node1;
111 pset_node_t ecore_node;
112 pset_node_t pcore_node;
113 #endif
114
115 LCK_SPIN_DECLARE(pset_node_lock, LCK_GRP_NULL);
116
117 LCK_GRP_DECLARE(pset_lck_grp, "pset");
118
119 queue_head_t tasks;
120 queue_head_t terminated_tasks; /* To be used ONLY for stackshot. */
121 queue_head_t corpse_tasks;
122 int tasks_count;
123 int terminated_tasks_count;
124 queue_head_t threads;
125 queue_head_t terminated_threads;
126 int threads_count;
127 int terminated_threads_count;
128 LCK_GRP_DECLARE(task_lck_grp, "task");
129 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
130 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
131 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
132
133 processor_t processor_list;
134 unsigned int processor_count;
135 static processor_t processor_list_tail;
136 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
137
138 uint32_t processor_avail_count;
139 uint32_t processor_avail_count_user;
140 uint32_t primary_processor_avail_count;
141 uint32_t primary_processor_avail_count_user;
142
143 SECURITY_READ_ONLY_LATE(int) master_cpu = 0;
144
145 struct processor PERCPU_DATA(processor);
146 processor_t processor_array[MAX_SCHED_CPUS] = { 0 };
147 processor_set_t pset_array[MAX_PSETS] = { 0 };
148
149 static timer_call_func_t running_timer_funcs[] = {
150 [RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
151 [RUNNING_TIMER_PREEMPT] = thread_preempt_expire,
152 [RUNNING_TIMER_KPERF] = kperf_timer_expire,
153 };
154 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
155 == RUNNING_TIMER_MAX, "missing running timer function");
156
157 #if defined(CONFIG_XNUPOST)
158 kern_return_t ipi_test(void);
159 extern void arm64_ipi_test(void);
160
161 kern_return_t
ipi_test()162 ipi_test()
163 {
164 #if __arm64__
165 processor_t p;
166
167 for (p = processor_list; p != NULL; p = p->processor_list) {
168 thread_bind(p);
169 thread_block(THREAD_CONTINUE_NULL);
170 kprintf("Running IPI test on cpu %d\n", p->cpu_id);
171 arm64_ipi_test();
172 }
173
174 /* unbind thread from specific cpu */
175 thread_bind(PROCESSOR_NULL);
176 thread_block(THREAD_CONTINUE_NULL);
177
178 T_PASS("Done running IPI tests");
179 #else
180 T_PASS("Unsupported platform. Not running IPI tests");
181
182 #endif /* __arm64__ */
183
184 return KERN_SUCCESS;
185 }
186 #endif /* defined(CONFIG_XNUPOST) */
187
188 int sched_enable_smt = 1;
189
190 void
processor_bootstrap(void)191 processor_bootstrap(void)
192 {
193 /* Initialize PSET node and PSET associated with boot cluster */
194 pset_node0.psets = &pset0;
195 pset_node0.pset_cluster_type = PSET_SMP;
196
197 #if __AMP__
198 const ml_topology_info_t *topology_info = ml_get_topology_info();
199
200 /*
201 * Since this is an AMP system, fill up cluster type and ID information; this should do the
202 * same kind of initialization done via ml_processor_register()
203 */
204 ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
205 pset0.pset_id = boot_cluster->cluster_id;
206 pset0.pset_cluster_id = boot_cluster->cluster_id;
207 if (boot_cluster->cluster_type == CLUSTER_TYPE_E) {
208 pset0.pset_cluster_type = PSET_AMP_E;
209 pset_node0.pset_cluster_type = PSET_AMP_E;
210 ecore_node = &pset_node0;
211
212 pset_node1.pset_cluster_type = PSET_AMP_P;
213 pcore_node = &pset_node1;
214 } else {
215 pset0.pset_cluster_type = PSET_AMP_P;
216 pset_node0.pset_cluster_type = PSET_AMP_P;
217 pcore_node = &pset_node0;
218
219 pset_node1.pset_cluster_type = PSET_AMP_E;
220 ecore_node = &pset_node1;
221 }
222
223 /* Link pset_node1 to pset_node0 */
224 pset_node0.node_list = &pset_node1;
225 #endif
226
227 pset_init(&pset0, &pset_node0);
228 queue_init(&tasks);
229 queue_init(&terminated_tasks);
230 queue_init(&threads);
231 queue_init(&terminated_threads);
232 queue_init(&corpse_tasks);
233
234 processor_init(master_processor, master_cpu, &pset0);
235 }
236
237 /*
238 * Initialize the given processor for the cpu
239 * indicated by cpu_id, and assign to the
240 * specified processor set.
241 */
242 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)243 processor_init(
244 processor_t processor,
245 int cpu_id,
246 processor_set_t pset)
247 {
248 spl_t s;
249
250 assert(cpu_id < MAX_SCHED_CPUS);
251 processor->cpu_id = cpu_id;
252
253 if (processor != master_processor) {
254 /* Scheduler state for master_processor initialized in sched_init() */
255 SCHED(processor_init)(processor);
256 smr_cpu_init(processor);
257 }
258
259 processor->state = PROCESSOR_OFF_LINE;
260 processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
261 processor->processor_set = pset;
262 processor_state_update_idle(processor);
263 processor->starting_pri = MINPRI;
264 processor->quantum_end = UINT64_MAX;
265 processor->deadline = UINT64_MAX;
266 processor->first_timeslice = FALSE;
267 processor->processor_offlined = false;
268 processor->processor_primary = processor; /* no SMT relationship known at this point */
269 processor->processor_secondary = NULL;
270 processor->is_SMT = false;
271 processor->is_recommended = true;
272 processor->processor_self = IP_NULL;
273 processor->processor_list = NULL;
274 processor->must_idle = false;
275 processor->next_idle_short = false;
276 processor->last_startup_reason = REASON_SYSTEM;
277 processor->last_shutdown_reason = REASON_NONE;
278 processor->shutdown_temporary = false;
279 processor->shutdown_locked = false;
280 processor->last_recommend_reason = REASON_SYSTEM;
281 processor->last_derecommend_reason = REASON_NONE;
282 processor->running_timers_active = false;
283 for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
284 timer_call_setup(&processor->running_timers[i],
285 running_timer_funcs[i], processor);
286 running_timer_clear(processor, i);
287 }
288 recount_processor_init(processor);
289 simple_lock_init(&processor->start_state_lock, 0);
290
291 s = splsched();
292 pset_lock(pset);
293 bit_set(pset->cpu_bitmask, cpu_id);
294 bit_set(pset->recommended_bitmask, cpu_id);
295 bit_set(pset->primary_map, cpu_id);
296 bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
297 if (pset->cpu_set_count++ == 0) {
298 pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
299 } else {
300 pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
301 pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
302 }
303 pset_unlock(pset);
304 splx(s);
305
306 simple_lock(&processor_list_lock, LCK_GRP_NULL);
307 if (processor_list == NULL) {
308 processor_list = processor;
309 } else {
310 processor_list_tail->processor_list = processor;
311 }
312 processor_list_tail = processor;
313 processor_count++;
314 simple_unlock(&processor_list_lock);
315 processor_array[cpu_id] = processor;
316 }
317
318 bool system_is_SMT = false;
319
320 void
processor_set_primary(processor_t processor,processor_t primary)321 processor_set_primary(
322 processor_t processor,
323 processor_t primary)
324 {
325 assert(processor->processor_primary == primary || processor->processor_primary == processor);
326 /* Re-adjust primary point for this (possibly) secondary processor */
327 processor->processor_primary = primary;
328
329 assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
330 if (primary != processor) {
331 /* Link primary to secondary, assumes a 2-way SMT model
332 * We'll need to move to a queue if any future architecture
333 * requires otherwise.
334 */
335 assert(processor->processor_secondary == NULL);
336 primary->processor_secondary = processor;
337 /* Mark both processors as SMT siblings */
338 primary->is_SMT = TRUE;
339 processor->is_SMT = TRUE;
340
341 if (!system_is_SMT) {
342 system_is_SMT = true;
343 sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
344 }
345
346 processor_set_t pset = processor->processor_set;
347 spl_t s = splsched();
348 pset_lock(pset);
349 if (!pset->is_SMT) {
350 pset->is_SMT = true;
351 }
352 bit_clear(pset->primary_map, processor->cpu_id);
353 pset_unlock(pset);
354 splx(s);
355 }
356 }
357
358 processor_set_t
processor_pset(processor_t processor)359 processor_pset(
360 processor_t processor)
361 {
362 return processor->processor_set;
363 }
364
365 #if CONFIG_SCHED_EDGE
366
367 cluster_type_t
pset_type_for_id(uint32_t cluster_id)368 pset_type_for_id(uint32_t cluster_id)
369 {
370 return pset_array[cluster_id]->pset_type;
371 }
372
373 /*
374 * Processor foreign threads
375 *
376 * With the Edge scheduler, each pset maintains a bitmap of processors running threads
377 * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
378 * if its of a different type than its preferred cluster type (E/P). The bitmap should
379 * be updated every time a new thread is assigned to run on a processor. Cluster shared
380 * resource intensive threads are also not counted as foreign threads since these
381 * threads should not be rebalanced when running on non-preferred clusters.
382 *
383 * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
384 * for rebalancing.
385 */
386 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)387 processor_state_update_running_foreign(processor_t processor, thread_t thread)
388 {
389 cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
390 cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
391
392 boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
393 boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
394 if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
395 bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
396 } else {
397 bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
398 }
399 }
400
401 /*
402 * Cluster shared resource intensive threads
403 *
404 * With the Edge scheduler, each pset maintains a bitmap of processors running
405 * threads that are shared resource intensive. This per-thread property is set
406 * by the performance controller or explicitly via dispatch SPIs. The bitmap
407 * allows the Edge scheduler to calculate the cluster shared resource load on
408 * any given cluster and load balance intensive threads accordingly.
409 */
410 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)411 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
412 {
413 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
414 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
415 } else {
416 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
417 }
418 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
419 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
420 } else {
421 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
422 }
423 }
424
425 #endif /* CONFIG_SCHED_EDGE */
426
427 void
processor_state_update_idle(processor_t processor)428 processor_state_update_idle(processor_t processor)
429 {
430 processor->current_pri = IDLEPRI;
431 processor->current_sfi_class = SFI_CLASS_KERNEL;
432 processor->current_recommended_pset_type = PSET_SMP;
433 #if CONFIG_THREAD_GROUPS
434 processor->current_thread_group = NULL;
435 #endif
436 processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
437 processor->current_urgency = THREAD_URGENCY_NONE;
438 processor->current_is_NO_SMT = false;
439 processor->current_is_bound = false;
440 processor->current_is_eagerpreempt = false;
441 #if CONFIG_SCHED_EDGE
442 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
443 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
444 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
445 #endif /* CONFIG_SCHED_EDGE */
446 sched_update_pset_load_average(processor->processor_set, 0);
447 }
448
449 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)450 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
451 {
452 processor->current_pri = thread->sched_pri;
453 processor->current_sfi_class = thread->sfi_class;
454 processor->current_recommended_pset_type = recommended_pset_type(thread);
455 #if CONFIG_SCHED_EDGE
456 processor_state_update_running_foreign(processor, thread);
457 processor_state_update_running_cluster_shared_rsrc(processor, thread);
458 /* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
459 sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
460 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
461 #endif /* CONFIG_SCHED_EDGE */
462
463 #if CONFIG_THREAD_GROUPS
464 processor->current_thread_group = thread_group_get(thread);
465 #endif
466 processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
467 processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
468 processor->current_is_NO_SMT = thread_no_smt(thread);
469 processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
470 processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
471 if (pset_lock_held) {
472 /* Only update the pset load average when the pset lock is held */
473 sched_update_pset_load_average(processor->processor_set, 0);
474 }
475 }
476
477 void
processor_state_update_explicit(processor_t processor,int pri,sfi_class_id_t sfi_class,pset_cluster_type_t pset_type,perfcontrol_class_t perfctl_class,thread_urgency_t urgency,__unused sched_bucket_t bucket)478 processor_state_update_explicit(processor_t processor, int pri, sfi_class_id_t sfi_class,
479 pset_cluster_type_t pset_type, perfcontrol_class_t perfctl_class, thread_urgency_t urgency, __unused sched_bucket_t bucket)
480 {
481 processor->current_pri = pri;
482 processor->current_sfi_class = sfi_class;
483 processor->current_recommended_pset_type = pset_type;
484 processor->current_perfctl_class = perfctl_class;
485 processor->current_urgency = urgency;
486 #if CONFIG_SCHED_EDGE
487 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
488 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
489 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
490 #endif /* CONFIG_SCHED_EDGE */
491 }
492
493 pset_node_t
pset_node_root(void)494 pset_node_root(void)
495 {
496 return &pset_node0;
497 }
498
499 LCK_GRP_DECLARE(pset_create_grp, "pset_create");
500 LCK_MTX_DECLARE(pset_create_lock, &pset_create_grp);
501
502 processor_set_t
pset_create(pset_node_t node,pset_cluster_type_t pset_type,uint32_t pset_cluster_id,int pset_id)503 pset_create(
504 pset_node_t node,
505 pset_cluster_type_t pset_type,
506 uint32_t pset_cluster_id,
507 int pset_id)
508 {
509 /* some schedulers do not support multiple psets */
510 if (SCHED(multiple_psets_enabled) == FALSE) {
511 return processor_pset(master_processor);
512 }
513
514 processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
515
516 if (pset != PROCESSOR_SET_NULL) {
517 pset->pset_cluster_type = pset_type;
518 pset->pset_cluster_id = pset_cluster_id;
519 pset->pset_id = pset_id;
520 pset_init(pset, node);
521
522 lck_spin_lock(&pset_node_lock);
523
524 prev = &node->psets;
525 while (*prev != PROCESSOR_SET_NULL) {
526 prev = &(*prev)->pset_list;
527 }
528
529 *prev = pset;
530
531 lck_spin_unlock(&pset_node_lock);
532 }
533
534 return pset;
535 }
536
537 /*
538 * Find processor set with specified cluster_id.
539 * Returns default_pset if not found.
540 */
541 processor_set_t
pset_find(uint32_t cluster_id,processor_set_t default_pset)542 pset_find(
543 uint32_t cluster_id,
544 processor_set_t default_pset)
545 {
546 lck_spin_lock(&pset_node_lock);
547 pset_node_t node = &pset_node0;
548 processor_set_t pset = NULL;
549
550 do {
551 pset = node->psets;
552 while (pset != NULL) {
553 if (pset->pset_cluster_id == cluster_id) {
554 break;
555 }
556 pset = pset->pset_list;
557 }
558 } while (pset == NULL && (node = node->node_list) != NULL);
559 lck_spin_unlock(&pset_node_lock);
560 if (pset == NULL) {
561 return default_pset;
562 }
563 return pset;
564 }
565
566 /*
567 * Initialize the given processor_set structure.
568 */
569 void
pset_init(processor_set_t pset,pset_node_t node)570 pset_init(
571 processor_set_t pset,
572 pset_node_t node)
573 {
574 pset->online_processor_count = 0;
575 pset->load_average = 0;
576 bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
577 pset->cpu_set_low = pset->cpu_set_hi = 0;
578 pset->cpu_set_count = 0;
579 pset->last_chosen = -1;
580 pset->cpu_bitmask = 0;
581 pset->recommended_bitmask = 0;
582 pset->primary_map = 0;
583 pset->realtime_map = 0;
584 pset->cpu_available_map = 0;
585
586 for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
587 pset->cpu_state_map[i] = 0;
588 }
589 pset->pending_AST_URGENT_cpu_mask = 0;
590 pset->pending_AST_PREEMPT_cpu_mask = 0;
591 #if defined(CONFIG_SCHED_DEFERRED_AST)
592 pset->pending_deferred_AST_cpu_mask = 0;
593 #endif
594 pset->pending_spill_cpu_mask = 0;
595 pset->rt_pending_spill_cpu_mask = 0;
596 pset_lock_init(pset);
597 pset->pset_self = IP_NULL;
598 pset->pset_name_self = IP_NULL;
599 pset->pset_list = PROCESSOR_SET_NULL;
600 pset->is_SMT = false;
601 #if CONFIG_SCHED_EDGE
602 bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
603 pset->cpu_running_foreign = 0;
604 for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
605 pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
606 pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
607 }
608 #endif /* CONFIG_SCHED_EDGE */
609
610 /*
611 * No initial preferences or forced migrations, so use the least numbered
612 * available idle core when picking amongst idle cores in a cluster.
613 */
614 pset->perfcontrol_cpu_preferred_bitmask = 0;
615 pset->perfcontrol_cpu_migration_bitmask = 0;
616 pset->cpu_preferred_last_chosen = -1;
617
618 pset->stealable_rt_threads_earliest_deadline = UINT64_MAX;
619
620 if (pset != &pset0) {
621 /*
622 * Scheduler runqueue initialization for non-boot psets.
623 * This initialization for pset0 happens in sched_init().
624 */
625 SCHED(pset_init)(pset);
626 SCHED(rt_init)(pset);
627 }
628
629 /*
630 * Because the pset_node_lock is not taken by every client of the pset_map,
631 * we need to make sure that the initialized pset contents are visible to any
632 * client that loads a non-NULL value from pset_array.
633 */
634 os_atomic_store(&pset_array[pset->pset_id], pset, release);
635
636 lck_spin_lock(&pset_node_lock);
637 bit_set(node->pset_map, pset->pset_id);
638 pset->node = node;
639 lck_spin_unlock(&pset_node_lock);
640 }
641
642 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)643 processor_info_count(
644 processor_flavor_t flavor,
645 mach_msg_type_number_t *count)
646 {
647 switch (flavor) {
648 case PROCESSOR_BASIC_INFO:
649 *count = PROCESSOR_BASIC_INFO_COUNT;
650 break;
651
652 case PROCESSOR_CPU_LOAD_INFO:
653 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
654 break;
655
656 default:
657 return cpu_info_count(flavor, count);
658 }
659
660 return KERN_SUCCESS;
661 }
662
663 void
processor_cpu_load_info(processor_t processor,natural_t ticks[static CPU_STATE_MAX])664 processor_cpu_load_info(processor_t processor,
665 natural_t ticks[static CPU_STATE_MAX])
666 {
667 struct recount_usage usage = { 0 };
668 uint64_t idle_time = 0;
669 recount_processor_usage(&processor->pr_recount, &usage, &idle_time);
670
671 ticks[CPU_STATE_USER] += (uint32_t)(usage.ru_metrics[RCT_LVL_USER].rm_time_mach /
672 hz_tick_interval);
673 ticks[CPU_STATE_SYSTEM] += (uint32_t)(
674 recount_usage_system_time_mach(&usage) / hz_tick_interval);
675 ticks[CPU_STATE_IDLE] += (uint32_t)(idle_time / hz_tick_interval);
676 }
677
678 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)679 processor_info(
680 processor_t processor,
681 processor_flavor_t flavor,
682 host_t *host,
683 processor_info_t info,
684 mach_msg_type_number_t *count)
685 {
686 int cpu_id, state;
687 kern_return_t result;
688
689 if (processor == PROCESSOR_NULL) {
690 return KERN_INVALID_ARGUMENT;
691 }
692
693 cpu_id = processor->cpu_id;
694
695 switch (flavor) {
696 case PROCESSOR_BASIC_INFO:
697 {
698 processor_basic_info_t basic_info;
699
700 if (*count < PROCESSOR_BASIC_INFO_COUNT) {
701 return KERN_FAILURE;
702 }
703
704 basic_info = (processor_basic_info_t) info;
705 basic_info->cpu_type = slot_type(cpu_id);
706 basic_info->cpu_subtype = slot_subtype(cpu_id);
707 state = processor->state;
708 if (((state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) && !processor->shutdown_temporary)
709 #if defined(__x86_64__)
710 || !processor->is_recommended
711 #endif
712 ) {
713 basic_info->running = FALSE;
714 } else {
715 basic_info->running = TRUE;
716 }
717 basic_info->slot_num = cpu_id;
718 if (processor == master_processor) {
719 basic_info->is_master = TRUE;
720 } else {
721 basic_info->is_master = FALSE;
722 }
723
724 *count = PROCESSOR_BASIC_INFO_COUNT;
725 *host = &realhost;
726
727 return KERN_SUCCESS;
728 }
729
730 case PROCESSOR_CPU_LOAD_INFO:
731 {
732 processor_cpu_load_info_t cpu_load_info;
733
734 if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
735 return KERN_FAILURE;
736 }
737
738 cpu_load_info = (processor_cpu_load_info_t) info;
739
740 cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
741 cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
742 cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
743 processor_cpu_load_info(processor, cpu_load_info->cpu_ticks);
744 cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
745
746 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
747 *host = &realhost;
748
749 return KERN_SUCCESS;
750 }
751
752 default:
753 result = cpu_info(flavor, cpu_id, info, count);
754 if (result == KERN_SUCCESS) {
755 *host = &realhost;
756 }
757
758 return result;
759 }
760 }
761
762 void
processor_wait_for_start(processor_t processor)763 processor_wait_for_start(processor_t processor)
764 {
765 spl_t s = splsched();
766 simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
767 while (processor->state == PROCESSOR_START) {
768 assert_wait_timeout((event_t)&processor->state, THREAD_UNINT, 1000, 1000 * NSEC_PER_USEC); /* 1 second */
769 simple_unlock(&processor->start_state_lock);
770 splx(s);
771
772 wait_result_t wait_result = thread_block(THREAD_CONTINUE_NULL);
773 if (wait_result == THREAD_TIMED_OUT) {
774 panic("%s>cpu %d failed to start\n", __FUNCTION__, processor->cpu_id);
775 }
776
777 s = splsched();
778 simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
779 }
780 simple_unlock(&processor->start_state_lock);
781 splx(s);
782 }
783
784 LCK_GRP_DECLARE(processor_updown_grp, "processor_updown");
785 LCK_MTX_DECLARE(processor_updown_lock, &processor_updown_grp);
786
787 static kern_return_t
processor_startup(processor_t processor,processor_reason_t reason,uint32_t flags)788 processor_startup(
789 processor_t processor,
790 processor_reason_t reason,
791 uint32_t flags)
792 {
793 processor_set_t pset;
794 thread_t thread;
795 kern_return_t result;
796 spl_t s;
797
798 if (processor == PROCESSOR_NULL || processor->processor_set == PROCESSOR_SET_NULL) {
799 return KERN_INVALID_ARGUMENT;
800 }
801
802 if ((flags & (LOCK_STATE | UNLOCK_STATE)) && (reason != REASON_SYSTEM)) {
803 return KERN_INVALID_ARGUMENT;
804 }
805
806 lck_mtx_lock(&processor_updown_lock);
807
808 if (processor == master_processor) {
809 processor_t prev;
810
811 processor->last_startup_reason = reason;
812
813 ml_cpu_power_enable(processor->cpu_id);
814
815 prev = thread_bind(processor);
816 thread_block(THREAD_CONTINUE_NULL);
817
818 result = cpu_start(processor->cpu_id);
819
820 thread_bind(prev);
821
822 lck_mtx_unlock(&processor_updown_lock);
823 return result;
824 }
825
826 bool scheduler_disable = false;
827
828 if ((processor->processor_primary != processor) && (sched_enable_smt == 0)) {
829 if (cpu_can_exit(processor->cpu_id)) {
830 lck_mtx_unlock(&processor_updown_lock);
831 return KERN_SUCCESS;
832 }
833 /*
834 * This secondary SMT processor must start in order to service interrupts,
835 * so instead it will be disabled at the scheduler level.
836 */
837 scheduler_disable = true;
838 }
839
840 s = splsched();
841 pset = processor->processor_set;
842 pset_lock(pset);
843 if (flags & LOCK_STATE) {
844 processor->shutdown_locked = true;
845 } else if (flags & UNLOCK_STATE) {
846 processor->shutdown_locked = false;
847 }
848
849 if (processor->state == PROCESSOR_START) {
850 pset_unlock(pset);
851 splx(s);
852
853 processor_wait_for_start(processor);
854
855 lck_mtx_unlock(&processor_updown_lock);
856 return KERN_SUCCESS;
857 }
858
859 if ((processor->state != PROCESSOR_OFF_LINE) || ((flags & SHUTDOWN_TEMPORARY) && !processor->shutdown_temporary)) {
860 pset_unlock(pset);
861 splx(s);
862
863 lck_mtx_unlock(&processor_updown_lock);
864 return KERN_FAILURE;
865 }
866
867 pset_update_processor_state(pset, processor, PROCESSOR_START);
868 processor->last_startup_reason = reason;
869 pset_unlock(pset);
870 splx(s);
871
872 /*
873 * Create the idle processor thread.
874 */
875 if (processor->idle_thread == THREAD_NULL) {
876 result = idle_thread_create(processor);
877 if (result != KERN_SUCCESS) {
878 s = splsched();
879 pset_lock(pset);
880 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
881 pset_unlock(pset);
882 splx(s);
883
884 lck_mtx_unlock(&processor_updown_lock);
885 return result;
886 }
887 }
888
889 /*
890 * If there is no active thread, the processor
891 * has never been started. Create a dedicated
892 * start up thread.
893 */
894 if (processor->active_thread == THREAD_NULL &&
895 processor->startup_thread == THREAD_NULL) {
896 result = kernel_thread_create(processor_start_thread, NULL, MAXPRI_KERNEL, &thread);
897 if (result != KERN_SUCCESS) {
898 s = splsched();
899 pset_lock(pset);
900 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
901 pset_unlock(pset);
902 splx(s);
903
904 lck_mtx_unlock(&processor_updown_lock);
905 return result;
906 }
907
908 s = splsched();
909 thread_lock(thread);
910 thread->bound_processor = processor;
911 processor->startup_thread = thread;
912 thread->state = TH_RUN;
913 thread->last_made_runnable_time = thread->last_basepri_change_time = mach_absolute_time();
914 thread_unlock(thread);
915 splx(s);
916
917 thread_deallocate(thread);
918 }
919
920 if (processor->processor_self == IP_NULL) {
921 ipc_processor_init(processor);
922 }
923
924 ml_cpu_power_enable(processor->cpu_id);
925 ml_cpu_begin_state_transition(processor->cpu_id);
926 ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
927 result = cpu_start(processor->cpu_id);
928 #if defined (__arm__) || defined (__arm64__)
929 assert(result == KERN_SUCCESS);
930 #else
931 if (result != KERN_SUCCESS) {
932 s = splsched();
933 pset_lock(pset);
934 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
935 pset_unlock(pset);
936 splx(s);
937 ml_cpu_end_state_transition(processor->cpu_id);
938
939 lck_mtx_unlock(&processor_updown_lock);
940 return result;
941 }
942 #endif
943 if (scheduler_disable) {
944 assert(processor->processor_primary != processor);
945 sched_processor_enable(processor, FALSE);
946 }
947
948 if (flags & WAIT_FOR_START) {
949 processor_wait_for_start(processor);
950 }
951
952 ml_cpu_end_state_transition(processor->cpu_id);
953 ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
954
955 #if CONFIG_KCOV
956 kcov_start_cpu(processor->cpu_id);
957 #endif
958
959 lck_mtx_unlock(&processor_updown_lock);
960 return KERN_SUCCESS;
961 }
962
963 kern_return_t
processor_exit_reason(processor_t processor,processor_reason_t reason,uint32_t flags)964 processor_exit_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
965 {
966 if (processor == PROCESSOR_NULL) {
967 return KERN_INVALID_ARGUMENT;
968 }
969
970 if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
971 #ifdef RHODES_CLUSTER_POWERDOWN_WORKAROUND
972 /*
973 * Must allow CLPC to finish powering down the whole cluster,
974 * or IOCPUSleepKernel() will fail to restart the offline cpus.
975 */
976 if (reason != REASON_CLPC_SYSTEM) {
977 return KERN_FAILURE;
978 }
979 #else
980 return KERN_FAILURE;
981 #endif
982 }
983
984 if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
985 return sched_processor_enable(processor, FALSE);
986 } else if ((reason == REASON_SYSTEM) || cpu_can_exit(processor->cpu_id)) {
987 return processor_shutdown(processor, reason, flags);
988 }
989
990 return KERN_INVALID_ARGUMENT;
991 }
992
993 kern_return_t
processor_exit(processor_t processor)994 processor_exit(
995 processor_t processor)
996 {
997 return processor_exit_reason(processor, REASON_SYSTEM, 0);
998 }
999
1000 kern_return_t
processor_exit_from_user(processor_t processor)1001 processor_exit_from_user(
1002 processor_t processor)
1003 {
1004 return processor_exit_reason(processor, REASON_USER, 0);
1005 }
1006
1007 kern_return_t
processor_start_reason(processor_t processor,processor_reason_t reason,uint32_t flags)1008 processor_start_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
1009 {
1010 if (processor == PROCESSOR_NULL) {
1011 return KERN_INVALID_ARGUMENT;
1012 }
1013
1014 if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
1015 return KERN_FAILURE;
1016 }
1017
1018 if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
1019 return sched_processor_enable(processor, TRUE);
1020 } else {
1021 return processor_startup(processor, reason, flags);
1022 }
1023 }
1024
1025 kern_return_t
processor_start(processor_t processor)1026 processor_start(
1027 processor_t processor)
1028 {
1029 return processor_start_reason(processor, REASON_SYSTEM, 0);
1030 }
1031
1032 kern_return_t
processor_start_from_user(processor_t processor)1033 processor_start_from_user(
1034 processor_t processor)
1035 {
1036 return processor_start_reason(processor, REASON_USER, 0);
1037 }
1038
1039 kern_return_t
enable_smt_processors(bool enable)1040 enable_smt_processors(bool enable)
1041 {
1042 if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
1043 /* Not an SMT system */
1044 return KERN_INVALID_ARGUMENT;
1045 }
1046
1047 int ncpus = machine_info.logical_cpu_max;
1048
1049 for (int i = 1; i < ncpus; i++) {
1050 processor_t processor = processor_array[i];
1051
1052 if (processor->processor_primary != processor) {
1053 if (enable) {
1054 processor_start_from_user(processor);
1055 } else { /* Disable */
1056 processor_exit_from_user(processor);
1057 }
1058 }
1059 }
1060
1061 #define BSD_HOST 1
1062 host_basic_info_data_t hinfo;
1063 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1064 kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
1065 if (kret != KERN_SUCCESS) {
1066 return kret;
1067 }
1068
1069 if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
1070 return KERN_FAILURE;
1071 }
1072
1073 if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
1074 return KERN_FAILURE;
1075 }
1076
1077 return KERN_SUCCESS;
1078 }
1079
1080 bool
processor_should_kprintf(processor_t processor,bool starting)1081 processor_should_kprintf(processor_t processor, bool starting)
1082 {
1083 processor_reason_t reason = starting ? processor->last_startup_reason : processor->last_shutdown_reason;
1084
1085 return reason != REASON_CLPC_SYSTEM;
1086 }
1087
1088 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1089 processor_control(
1090 processor_t processor,
1091 processor_info_t info,
1092 mach_msg_type_number_t count)
1093 {
1094 if (processor == PROCESSOR_NULL) {
1095 return KERN_INVALID_ARGUMENT;
1096 }
1097
1098 return cpu_control(processor->cpu_id, info, count);
1099 }
1100
1101 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1102 processor_get_assignment(
1103 processor_t processor,
1104 processor_set_t *pset)
1105 {
1106 int state;
1107
1108 if (processor == PROCESSOR_NULL) {
1109 return KERN_INVALID_ARGUMENT;
1110 }
1111
1112 state = processor->state;
1113 if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) {
1114 return KERN_FAILURE;
1115 }
1116
1117 *pset = &pset0;
1118
1119 return KERN_SUCCESS;
1120 }
1121
1122 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1123 processor_set_info(
1124 processor_set_t pset,
1125 int flavor,
1126 host_t *host,
1127 processor_set_info_t info,
1128 mach_msg_type_number_t *count)
1129 {
1130 if (pset == PROCESSOR_SET_NULL) {
1131 return KERN_INVALID_ARGUMENT;
1132 }
1133
1134 if (flavor == PROCESSOR_SET_BASIC_INFO) {
1135 processor_set_basic_info_t basic_info;
1136
1137 if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1138 return KERN_FAILURE;
1139 }
1140
1141 basic_info = (processor_set_basic_info_t) info;
1142 #if defined(__x86_64__)
1143 basic_info->processor_count = processor_avail_count_user;
1144 #else
1145 basic_info->processor_count = processor_avail_count;
1146 #endif
1147 basic_info->default_policy = POLICY_TIMESHARE;
1148
1149 *count = PROCESSOR_SET_BASIC_INFO_COUNT;
1150 *host = &realhost;
1151 return KERN_SUCCESS;
1152 } else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1153 policy_timeshare_base_t ts_base;
1154
1155 if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1156 return KERN_FAILURE;
1157 }
1158
1159 ts_base = (policy_timeshare_base_t) info;
1160 ts_base->base_priority = BASEPRI_DEFAULT;
1161
1162 *count = POLICY_TIMESHARE_BASE_COUNT;
1163 *host = &realhost;
1164 return KERN_SUCCESS;
1165 } else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1166 policy_fifo_base_t fifo_base;
1167
1168 if (*count < POLICY_FIFO_BASE_COUNT) {
1169 return KERN_FAILURE;
1170 }
1171
1172 fifo_base = (policy_fifo_base_t) info;
1173 fifo_base->base_priority = BASEPRI_DEFAULT;
1174
1175 *count = POLICY_FIFO_BASE_COUNT;
1176 *host = &realhost;
1177 return KERN_SUCCESS;
1178 } else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1179 policy_rr_base_t rr_base;
1180
1181 if (*count < POLICY_RR_BASE_COUNT) {
1182 return KERN_FAILURE;
1183 }
1184
1185 rr_base = (policy_rr_base_t) info;
1186 rr_base->base_priority = BASEPRI_DEFAULT;
1187 rr_base->quantum = 1;
1188
1189 *count = POLICY_RR_BASE_COUNT;
1190 *host = &realhost;
1191 return KERN_SUCCESS;
1192 } else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1193 policy_timeshare_limit_t ts_limit;
1194
1195 if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1196 return KERN_FAILURE;
1197 }
1198
1199 ts_limit = (policy_timeshare_limit_t) info;
1200 ts_limit->max_priority = MAXPRI_KERNEL;
1201
1202 *count = POLICY_TIMESHARE_LIMIT_COUNT;
1203 *host = &realhost;
1204 return KERN_SUCCESS;
1205 } else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1206 policy_fifo_limit_t fifo_limit;
1207
1208 if (*count < POLICY_FIFO_LIMIT_COUNT) {
1209 return KERN_FAILURE;
1210 }
1211
1212 fifo_limit = (policy_fifo_limit_t) info;
1213 fifo_limit->max_priority = MAXPRI_KERNEL;
1214
1215 *count = POLICY_FIFO_LIMIT_COUNT;
1216 *host = &realhost;
1217 return KERN_SUCCESS;
1218 } else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1219 policy_rr_limit_t rr_limit;
1220
1221 if (*count < POLICY_RR_LIMIT_COUNT) {
1222 return KERN_FAILURE;
1223 }
1224
1225 rr_limit = (policy_rr_limit_t) info;
1226 rr_limit->max_priority = MAXPRI_KERNEL;
1227
1228 *count = POLICY_RR_LIMIT_COUNT;
1229 *host = &realhost;
1230 return KERN_SUCCESS;
1231 } else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1232 int *enabled;
1233
1234 if (*count < (sizeof(*enabled) / sizeof(int))) {
1235 return KERN_FAILURE;
1236 }
1237
1238 enabled = (int *) info;
1239 *enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1240
1241 *count = sizeof(*enabled) / sizeof(int);
1242 *host = &realhost;
1243 return KERN_SUCCESS;
1244 }
1245
1246
1247 *host = HOST_NULL;
1248 return KERN_INVALID_ARGUMENT;
1249 }
1250
1251 /*
1252 * processor_set_statistics
1253 *
1254 * Returns scheduling statistics for a processor set.
1255 */
1256 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1257 processor_set_statistics(
1258 processor_set_t pset,
1259 int flavor,
1260 processor_set_info_t info,
1261 mach_msg_type_number_t *count)
1262 {
1263 if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1264 return KERN_INVALID_PROCESSOR_SET;
1265 }
1266
1267 if (flavor == PROCESSOR_SET_LOAD_INFO) {
1268 processor_set_load_info_t load_info;
1269
1270 if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1271 return KERN_FAILURE;
1272 }
1273
1274 load_info = (processor_set_load_info_t) info;
1275
1276 load_info->mach_factor = sched_mach_factor;
1277 load_info->load_average = sched_load_average;
1278
1279 load_info->task_count = tasks_count;
1280 load_info->thread_count = threads_count;
1281
1282 *count = PROCESSOR_SET_LOAD_INFO_COUNT;
1283 return KERN_SUCCESS;
1284 }
1285
1286 return KERN_INVALID_ARGUMENT;
1287 }
1288
1289 /*
1290 * processor_set_things:
1291 *
1292 * Common internals for processor_set_{threads,tasks}
1293 */
1294 static kern_return_t
processor_set_things(processor_set_t pset,mach_port_array_t * thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1295 processor_set_things(
1296 processor_set_t pset,
1297 mach_port_array_t *thing_list,
1298 mach_msg_type_number_t *countp,
1299 int type,
1300 mach_task_flavor_t flavor)
1301 {
1302 unsigned int i;
1303 task_t task;
1304 thread_t thread;
1305
1306 mach_port_array_t task_addr;
1307 task_t *task_list;
1308 vm_size_t actual_tasks, task_count_cur, task_count_needed;
1309
1310 mach_port_array_t thread_addr;
1311 thread_t *thread_list;
1312 vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1313
1314 mach_port_array_t addr, newaddr;
1315 vm_size_t count, count_needed;
1316
1317 if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1318 return KERN_INVALID_ARGUMENT;
1319 }
1320
1321 task_count_cur = 0;
1322 task_count_needed = 0;
1323 task_list = NULL;
1324 task_addr = NULL;
1325 actual_tasks = 0;
1326
1327 thread_count_cur = 0;
1328 thread_count_needed = 0;
1329 thread_list = NULL;
1330 thread_addr = NULL;
1331 actual_threads = 0;
1332
1333 for (;;) {
1334 lck_mtx_lock(&tasks_threads_lock);
1335
1336 /* do we have the memory we need? */
1337 if (type == PSET_THING_THREAD) {
1338 thread_count_needed = threads_count;
1339 }
1340 #if !CONFIG_MACF
1341 else
1342 #endif
1343 task_count_needed = tasks_count;
1344
1345 if (task_count_needed <= task_count_cur &&
1346 thread_count_needed <= thread_count_cur) {
1347 break;
1348 }
1349
1350 /* unlock and allocate more memory */
1351 lck_mtx_unlock(&tasks_threads_lock);
1352
1353 /* grow task array */
1354 if (task_count_needed > task_count_cur) {
1355 mach_port_array_free(task_addr, task_count_cur);
1356 assert(task_count_needed > 0);
1357 task_count_cur = task_count_needed;
1358
1359 task_addr = mach_port_array_alloc(task_count_cur,
1360 Z_WAITOK | Z_ZERO);
1361 if (task_addr == NULL) {
1362 mach_port_array_free(thread_addr, thread_count_cur);
1363 return KERN_RESOURCE_SHORTAGE;
1364 }
1365 task_list = (task_t *)task_addr;
1366 }
1367
1368 /* grow thread array */
1369 if (thread_count_needed > thread_count_cur) {
1370 mach_port_array_free(thread_addr, thread_count_cur);
1371 assert(thread_count_needed > 0);
1372 thread_count_cur = thread_count_needed;
1373
1374 thread_addr = mach_port_array_alloc(thread_count_cur,
1375 Z_WAITOK | Z_ZERO);
1376 if (thread_addr == NULL) {
1377 mach_port_array_free(task_addr, task_count_cur);
1378 return KERN_RESOURCE_SHORTAGE;
1379 }
1380 thread_list = (thread_t *)thread_addr;
1381 }
1382 }
1383
1384 /* OK, have memory and the list locked */
1385
1386 /* If we need it, get the thread list */
1387 if (type == PSET_THING_THREAD) {
1388 queue_iterate(&threads, thread, thread_t, threads) {
1389 task = get_threadtask(thread);
1390 #if defined(SECURE_KERNEL)
1391 if (task == kernel_task) {
1392 /* skip threads belonging to kernel_task */
1393 continue;
1394 }
1395 #endif
1396 if (!task->ipc_active || task_is_exec_copy(task)) {
1397 /* skip threads in inactive tasks (in the middle of exec/fork/spawn) */
1398 continue;
1399 }
1400
1401 thread_reference(thread);
1402 thread_list[actual_threads++] = thread;
1403 }
1404 }
1405 #if !CONFIG_MACF
1406 else
1407 #endif
1408 {
1409 /* get a list of the tasks */
1410 queue_iterate(&tasks, task, task_t, tasks) {
1411 #if defined(SECURE_KERNEL)
1412 if (task == kernel_task) {
1413 /* skip kernel_task */
1414 continue;
1415 }
1416 #endif
1417 if (!task->ipc_active || task_is_exec_copy(task)) {
1418 /* skip inactive tasks (in the middle of exec/fork/spawn) */
1419 continue;
1420 }
1421
1422 task_reference(task);
1423 task_list[actual_tasks++] = task;
1424 }
1425 }
1426
1427 lck_mtx_unlock(&tasks_threads_lock);
1428
1429 #if CONFIG_MACF
1430 unsigned int j, used;
1431
1432 /* for each task, make sure we are allowed to examine it */
1433 for (i = used = 0; i < actual_tasks; i++) {
1434 if (mac_task_check_expose_task(task_list[i], flavor)) {
1435 task_deallocate(task_list[i]);
1436 continue;
1437 }
1438 task_list[used++] = task_list[i];
1439 }
1440 actual_tasks = used;
1441 task_count_needed = actual_tasks;
1442
1443 if (type == PSET_THING_THREAD) {
1444 /* for each thread (if any), make sure it's task is in the allowed list */
1445 for (i = used = 0; i < actual_threads; i++) {
1446 boolean_t found_task = FALSE;
1447
1448 task = get_threadtask(thread_list[i]);
1449 for (j = 0; j < actual_tasks; j++) {
1450 if (task_list[j] == task) {
1451 found_task = TRUE;
1452 break;
1453 }
1454 }
1455 if (found_task) {
1456 thread_list[used++] = thread_list[i];
1457 } else {
1458 thread_deallocate(thread_list[i]);
1459 }
1460 }
1461 actual_threads = used;
1462 thread_count_needed = actual_threads;
1463
1464 /* done with the task list */
1465 for (i = 0; i < actual_tasks; i++) {
1466 task_deallocate(task_list[i]);
1467 }
1468 mach_port_array_free(task_addr, task_count_cur);
1469 task_list = NULL;
1470 task_count_cur = 0;
1471 actual_tasks = 0;
1472 }
1473 #endif
1474
1475 if (type == PSET_THING_THREAD) {
1476 if (actual_threads == 0) {
1477 /* no threads available to return */
1478 assert(task_count_cur == 0);
1479 mach_port_array_free(thread_addr, thread_count_cur);
1480 thread_list = NULL;
1481 *thing_list = NULL;
1482 *countp = 0;
1483 return KERN_SUCCESS;
1484 }
1485 count_needed = actual_threads;
1486 count = thread_count_cur;
1487 addr = thread_addr;
1488 } else {
1489 if (actual_tasks == 0) {
1490 /* no tasks available to return */
1491 assert(thread_count_cur == 0);
1492 mach_port_array_free(task_addr, task_count_cur);
1493 *thing_list = NULL;
1494 *countp = 0;
1495 return KERN_SUCCESS;
1496 }
1497 count_needed = actual_tasks;
1498 count = task_count_cur;
1499 addr = task_addr;
1500 }
1501
1502 /* if we allocated too much, must copy */
1503 if (count_needed < count) {
1504 newaddr = mach_port_array_alloc(count_needed, Z_WAITOK | Z_ZERO);
1505 if (newaddr == NULL) {
1506 for (i = 0; i < actual_tasks; i++) {
1507 if (type == PSET_THING_THREAD) {
1508 thread_deallocate(thread_list[i]);
1509 } else {
1510 task_deallocate(task_list[i]);
1511 }
1512 }
1513 mach_port_array_free(addr, count);
1514 return KERN_RESOURCE_SHORTAGE;
1515 }
1516
1517 bcopy(addr, newaddr, count_needed * sizeof(void *));
1518 mach_port_array_free(addr, count);
1519
1520 addr = newaddr;
1521 count = count_needed;
1522 }
1523
1524 *thing_list = addr;
1525 *countp = (mach_msg_type_number_t)count;
1526
1527 return KERN_SUCCESS;
1528 }
1529
1530 /*
1531 * processor_set_tasks:
1532 *
1533 * List all tasks in the processor set.
1534 */
1535 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1536 processor_set_tasks_internal(
1537 processor_set_t pset,
1538 task_array_t *task_list,
1539 mach_msg_type_number_t *count,
1540 mach_task_flavor_t flavor)
1541 {
1542 kern_return_t ret;
1543
1544 ret = processor_set_things(pset, task_list, count, PSET_THING_TASK, flavor);
1545 if (ret != KERN_SUCCESS) {
1546 return ret;
1547 }
1548
1549 /* do the conversion that Mig should handle */
1550 convert_task_array_to_ports(*task_list, *count, flavor);
1551 return KERN_SUCCESS;
1552 }
1553
1554 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1555 processor_set_tasks(
1556 processor_set_t pset,
1557 task_array_t *task_list,
1558 mach_msg_type_number_t *count)
1559 {
1560 return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1561 }
1562
1563 /*
1564 * processor_set_tasks_with_flavor:
1565 *
1566 * Based on flavor, return task/inspect/read port to all tasks in the processor set.
1567 */
1568 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1569 processor_set_tasks_with_flavor(
1570 processor_set_t pset,
1571 mach_task_flavor_t flavor,
1572 task_array_t *task_list,
1573 mach_msg_type_number_t *count)
1574 {
1575 switch (flavor) {
1576 case TASK_FLAVOR_CONTROL:
1577 case TASK_FLAVOR_READ:
1578 case TASK_FLAVOR_INSPECT:
1579 case TASK_FLAVOR_NAME:
1580 return processor_set_tasks_internal(pset, task_list, count, flavor);
1581 default:
1582 return KERN_INVALID_ARGUMENT;
1583 }
1584 }
1585
1586 /*
1587 * processor_set_threads:
1588 *
1589 * List all threads in the processor set.
1590 */
1591 #if defined(SECURE_KERNEL)
1592 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_act_array_t * thread_list,__unused mach_msg_type_number_t * count)1593 processor_set_threads(
1594 __unused processor_set_t pset,
1595 __unused thread_act_array_t *thread_list,
1596 __unused mach_msg_type_number_t *count)
1597 {
1598 return KERN_FAILURE;
1599 }
1600 #elif !defined(XNU_TARGET_OS_OSX)
1601 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_act_array_t * thread_list,__unused mach_msg_type_number_t * count)1602 processor_set_threads(
1603 __unused processor_set_t pset,
1604 __unused thread_act_array_t *thread_list,
1605 __unused mach_msg_type_number_t *count)
1606 {
1607 return KERN_NOT_SUPPORTED;
1608 }
1609 #else
1610 kern_return_t
processor_set_threads(processor_set_t pset,thread_act_array_t * thread_list,mach_msg_type_number_t * count)1611 processor_set_threads(
1612 processor_set_t pset,
1613 thread_act_array_t *thread_list,
1614 mach_msg_type_number_t *count)
1615 {
1616 kern_return_t ret;
1617
1618 ret = processor_set_things(pset, thread_list, count,
1619 PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1620 if (ret != KERN_SUCCESS) {
1621 return ret;
1622 }
1623
1624 /* do the conversion that Mig should handle */
1625 convert_thread_array_to_ports(*thread_list, *count, TASK_FLAVOR_CONTROL);
1626 return KERN_SUCCESS;
1627 }
1628 #endif
1629
1630 pset_cluster_type_t
recommended_pset_type(thread_t thread)1631 recommended_pset_type(thread_t thread)
1632 {
1633 #if CONFIG_THREAD_GROUPS && __AMP__
1634 if (thread == THREAD_NULL) {
1635 return PSET_AMP_E;
1636 }
1637
1638 #if DEVELOPMENT || DEBUG
1639 extern bool system_ecore_only;
1640 extern int enable_task_set_cluster_type;
1641 task_t task = get_threadtask(thread);
1642 if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1643 processor_set_t pset_hint = task->pset_hint;
1644 if (pset_hint) {
1645 return pset_hint->pset_cluster_type;
1646 }
1647 }
1648
1649 if (system_ecore_only) {
1650 return PSET_AMP_E;
1651 }
1652 #endif
1653
1654 if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1655 return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1656 }
1657
1658 if (thread->base_pri <= MAXPRI_THROTTLE) {
1659 if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1660 return PSET_AMP_E;
1661 }
1662 } else if (thread->base_pri <= BASEPRI_UTILITY) {
1663 if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1664 return PSET_AMP_E;
1665 }
1666 }
1667
1668 struct thread_group *tg = thread_group_get(thread);
1669 cluster_type_t recommendation = thread_group_recommendation(tg);
1670 switch (recommendation) {
1671 case CLUSTER_TYPE_SMP:
1672 default:
1673 if (get_threadtask(thread) == kernel_task) {
1674 return PSET_AMP_E;
1675 }
1676 return PSET_AMP_P;
1677 case CLUSTER_TYPE_E:
1678 return PSET_AMP_E;
1679 case CLUSTER_TYPE_P:
1680 return PSET_AMP_P;
1681 }
1682 #else
1683 (void)thread;
1684 return PSET_SMP;
1685 #endif
1686 }
1687
1688 #if CONFIG_THREAD_GROUPS && __AMP__
1689
1690 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)1691 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1692 {
1693 sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1694
1695 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1696
1697 switch (perfctl_class) {
1698 case PERFCONTROL_CLASS_UTILITY:
1699 os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1700 break;
1701 case PERFCONTROL_CLASS_BACKGROUND:
1702 os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1703 break;
1704 default:
1705 panic("perfctl_class invalid");
1706 break;
1707 }
1708 }
1709
1710 #elif defined(__arm64__)
1711
1712 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1713 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)1714 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1715 {
1716 }
1717
1718 #endif /* defined(__arm64__) */
1719