1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * processor.c: processor and processor_set manipulation routines.
61 */
62
63 #include <mach/boolean.h>
64 #include <mach/policy.h>
65 #include <mach/processor.h>
66 #include <mach/processor_info.h>
67 #include <mach/vm_param.h>
68 #include <kern/cpu_number.h>
69 #include <kern/host.h>
70 #include <kern/ipc_host.h>
71 #include <kern/ipc_tt.h>
72 #include <kern/kalloc.h>
73 #include <kern/machine.h>
74 #include <kern/misc_protos.h>
75 #include <kern/processor.h>
76 #include <kern/sched.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/timer.h>
80 #if KPERF
81 #include <kperf/kperf.h>
82 #endif /* KPERF */
83 #include <ipc/ipc_port.h>
84
85 #include <security/mac_mach_internal.h>
86
87 #if defined(CONFIG_XNUPOST)
88
89 #include <tests/xnupost.h>
90
91 #endif /* CONFIG_XNUPOST */
92
93 /*
94 * Exported interface
95 */
96 #include <mach/mach_host_server.h>
97 #include <mach/processor_set_server.h>
98 #include <san/kcov.h>
99
100 /*
101 * The first pset and the pset_node are created by default for all platforms.
102 * Those typically represent the boot-cluster. For AMP platforms, all clusters
103 * of the same type are part of the same pset_node. This allows for easier
104 * CPU selection logic.
105 */
106 struct processor_set pset0;
107 struct pset_node pset_node0;
108
109 #if __AMP__
110 struct pset_node pset_node1;
111 pset_node_t ecore_node;
112 pset_node_t pcore_node;
113 #endif
114
115 LCK_SPIN_DECLARE(pset_node_lock, LCK_GRP_NULL);
116
117 LCK_GRP_DECLARE(pset_lck_grp, "pset");
118
119 queue_head_t tasks;
120 queue_head_t terminated_tasks; /* To be used ONLY for stackshot. */
121 queue_head_t corpse_tasks;
122 int tasks_count;
123 int terminated_tasks_count;
124 queue_head_t threads;
125 queue_head_t terminated_threads;
126 int threads_count;
127 int terminated_threads_count;
128 LCK_GRP_DECLARE(task_lck_grp, "task");
129 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
130 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
131 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
132
133 processor_t processor_list;
134 unsigned int processor_count;
135 static processor_t processor_list_tail;
136 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
137
138 uint32_t processor_avail_count;
139 uint32_t processor_avail_count_user;
140 uint32_t primary_processor_avail_count;
141 uint32_t primary_processor_avail_count_user;
142
143 SECURITY_READ_ONLY_LATE(int) master_cpu = 0;
144
145 struct processor PERCPU_DATA(processor);
146 processor_t processor_array[MAX_SCHED_CPUS] = { 0 };
147 processor_set_t pset_array[MAX_PSETS] = { 0 };
148
149 static timer_call_func_t running_timer_funcs[] = {
150 [RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
151 [RUNNING_TIMER_KPERF] = kperf_timer_expire,
152 };
153 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
154 == RUNNING_TIMER_MAX, "missing running timer function");
155
156 #if defined(CONFIG_XNUPOST)
157 kern_return_t ipi_test(void);
158 extern void arm64_ipi_test(void);
159
160 kern_return_t
ipi_test()161 ipi_test()
162 {
163 #if __arm64__
164 processor_t p;
165
166 for (p = processor_list; p != NULL; p = p->processor_list) {
167 thread_bind(p);
168 thread_block(THREAD_CONTINUE_NULL);
169 kprintf("Running IPI test on cpu %d\n", p->cpu_id);
170 arm64_ipi_test();
171 }
172
173 /* unbind thread from specific cpu */
174 thread_bind(PROCESSOR_NULL);
175 thread_block(THREAD_CONTINUE_NULL);
176
177 T_PASS("Done running IPI tests");
178 #else
179 T_PASS("Unsupported platform. Not running IPI tests");
180
181 #endif /* __arm64__ */
182
183 return KERN_SUCCESS;
184 }
185 #endif /* defined(CONFIG_XNUPOST) */
186
187 int sched_enable_smt = 1;
188
189 void
processor_bootstrap(void)190 processor_bootstrap(void)
191 {
192 /* Initialize PSET node and PSET associated with boot cluster */
193 pset_node0.psets = &pset0;
194 pset_node0.pset_cluster_type = PSET_SMP;
195
196 #if __AMP__
197 const ml_topology_info_t *topology_info = ml_get_topology_info();
198
199 /*
200 * Since this is an AMP system, fill up cluster type and ID information; this should do the
201 * same kind of initialization done via ml_processor_register()
202 */
203 ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
204 pset0.pset_id = boot_cluster->cluster_id;
205 pset0.pset_cluster_id = boot_cluster->cluster_id;
206 if (boot_cluster->cluster_type == CLUSTER_TYPE_E) {
207 pset0.pset_cluster_type = PSET_AMP_E;
208 pset_node0.pset_cluster_type = PSET_AMP_E;
209 ecore_node = &pset_node0;
210
211 pset_node1.pset_cluster_type = PSET_AMP_P;
212 pcore_node = &pset_node1;
213 } else {
214 pset0.pset_cluster_type = PSET_AMP_P;
215 pset_node0.pset_cluster_type = PSET_AMP_P;
216 pcore_node = &pset_node0;
217
218 pset_node1.pset_cluster_type = PSET_AMP_E;
219 ecore_node = &pset_node1;
220 }
221
222 /* Link pset_node1 to pset_node0 */
223 pset_node0.node_list = &pset_node1;
224 #endif
225
226 pset_init(&pset0, &pset_node0);
227 queue_init(&tasks);
228 queue_init(&terminated_tasks);
229 queue_init(&threads);
230 queue_init(&terminated_threads);
231 queue_init(&corpse_tasks);
232
233 processor_init(master_processor, master_cpu, &pset0);
234 }
235
236 /*
237 * Initialize the given processor for the cpu
238 * indicated by cpu_id, and assign to the
239 * specified processor set.
240 */
241 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)242 processor_init(
243 processor_t processor,
244 int cpu_id,
245 processor_set_t pset)
246 {
247 spl_t s;
248
249 assert(cpu_id < MAX_SCHED_CPUS);
250 processor->cpu_id = cpu_id;
251
252 if (processor != master_processor) {
253 /* Scheduler state for master_processor initialized in sched_init() */
254 SCHED(processor_init)(processor);
255 }
256
257 processor->state = PROCESSOR_OFF_LINE;
258 processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
259 processor->processor_set = pset;
260 processor_state_update_idle(processor);
261 processor->starting_pri = MINPRI;
262 processor->quantum_end = UINT64_MAX;
263 processor->deadline = UINT64_MAX;
264 processor->first_timeslice = FALSE;
265 processor->processor_offlined = false;
266 processor->processor_primary = processor; /* no SMT relationship known at this point */
267 processor->processor_secondary = NULL;
268 processor->is_SMT = false;
269 processor->is_recommended = true;
270 processor->processor_self = IP_NULL;
271 processor->processor_list = NULL;
272 processor->must_idle = false;
273 processor->last_startup_reason = REASON_SYSTEM;
274 processor->last_shutdown_reason = REASON_NONE;
275 processor->shutdown_temporary = false;
276 processor->shutdown_locked = false;
277 processor->last_recommend_reason = REASON_SYSTEM;
278 processor->last_derecommend_reason = REASON_NONE;
279 processor->running_timers_active = false;
280 for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
281 timer_call_setup(&processor->running_timers[i],
282 running_timer_funcs[i], processor);
283 running_timer_clear(processor, i);
284 }
285 recount_processor_init(processor);
286 simple_lock_init(&processor->start_state_lock, 0);
287
288 s = splsched();
289 pset_lock(pset);
290 bit_set(pset->cpu_bitmask, cpu_id);
291 bit_set(pset->recommended_bitmask, cpu_id);
292 bit_set(pset->primary_map, cpu_id);
293 bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
294 if (pset->cpu_set_count++ == 0) {
295 pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
296 } else {
297 pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
298 pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
299 }
300 pset_unlock(pset);
301 splx(s);
302
303 simple_lock(&processor_list_lock, LCK_GRP_NULL);
304 if (processor_list == NULL) {
305 processor_list = processor;
306 } else {
307 processor_list_tail->processor_list = processor;
308 }
309 processor_list_tail = processor;
310 processor_count++;
311 simple_unlock(&processor_list_lock);
312 processor_array[cpu_id] = processor;
313 }
314
315 bool system_is_SMT = false;
316
317 void
processor_set_primary(processor_t processor,processor_t primary)318 processor_set_primary(
319 processor_t processor,
320 processor_t primary)
321 {
322 assert(processor->processor_primary == primary || processor->processor_primary == processor);
323 /* Re-adjust primary point for this (possibly) secondary processor */
324 processor->processor_primary = primary;
325
326 assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
327 if (primary != processor) {
328 /* Link primary to secondary, assumes a 2-way SMT model
329 * We'll need to move to a queue if any future architecture
330 * requires otherwise.
331 */
332 assert(processor->processor_secondary == NULL);
333 primary->processor_secondary = processor;
334 /* Mark both processors as SMT siblings */
335 primary->is_SMT = TRUE;
336 processor->is_SMT = TRUE;
337
338 if (!system_is_SMT) {
339 system_is_SMT = true;
340 sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
341 }
342
343 processor_set_t pset = processor->processor_set;
344 spl_t s = splsched();
345 pset_lock(pset);
346 if (!pset->is_SMT) {
347 pset->is_SMT = true;
348 }
349 bit_clear(pset->primary_map, processor->cpu_id);
350 pset_unlock(pset);
351 splx(s);
352 }
353 }
354
355 processor_set_t
processor_pset(processor_t processor)356 processor_pset(
357 processor_t processor)
358 {
359 return processor->processor_set;
360 }
361
362 #if CONFIG_SCHED_EDGE
363
364 cluster_type_t
pset_type_for_id(uint32_t cluster_id)365 pset_type_for_id(uint32_t cluster_id)
366 {
367 return pset_array[cluster_id]->pset_type;
368 }
369
370 /*
371 * Processor foreign threads
372 *
373 * With the Edge scheduler, each pset maintains a bitmap of processors running threads
374 * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
375 * if its of a different type than its preferred cluster type (E/P). The bitmap should
376 * be updated every time a new thread is assigned to run on a processor. Cluster shared
377 * resource intensive threads are also not counted as foreign threads since these
378 * threads should not be rebalanced when running on non-preferred clusters.
379 *
380 * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
381 * for rebalancing.
382 */
383 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)384 processor_state_update_running_foreign(processor_t processor, thread_t thread)
385 {
386 cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
387 cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
388
389 boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
390 boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
391 if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
392 bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
393 } else {
394 bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
395 }
396 }
397
398 /*
399 * Cluster shared resource intensive threads
400 *
401 * With the Edge scheduler, each pset maintains a bitmap of processors running
402 * threads that are shared resource intensive. This per-thread property is set
403 * by the performance controller or explicitly via dispatch SPIs. The bitmap
404 * allows the Edge scheduler to calculate the cluster shared resource load on
405 * any given cluster and load balance intensive threads accordingly.
406 */
407 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)408 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
409 {
410 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
411 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
412 } else {
413 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
414 }
415 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
416 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
417 } else {
418 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
419 }
420 }
421
422 #endif /* CONFIG_SCHED_EDGE */
423
424 void
processor_state_update_idle(processor_t processor)425 processor_state_update_idle(processor_t processor)
426 {
427 processor->current_pri = IDLEPRI;
428 processor->current_sfi_class = SFI_CLASS_KERNEL;
429 processor->current_recommended_pset_type = PSET_SMP;
430 #if CONFIG_THREAD_GROUPS
431 processor->current_thread_group = NULL;
432 #endif
433 processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
434 processor->current_urgency = THREAD_URGENCY_NONE;
435 processor->current_is_NO_SMT = false;
436 processor->current_is_bound = false;
437 processor->current_is_eagerpreempt = false;
438 #if CONFIG_SCHED_EDGE
439 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
440 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
441 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
442 #endif /* CONFIG_SCHED_EDGE */
443 sched_update_pset_load_average(processor->processor_set, 0);
444 }
445
446 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)447 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
448 {
449 processor->current_pri = thread->sched_pri;
450 processor->current_sfi_class = thread->sfi_class;
451 processor->current_recommended_pset_type = recommended_pset_type(thread);
452 #if CONFIG_SCHED_EDGE
453 processor_state_update_running_foreign(processor, thread);
454 processor_state_update_running_cluster_shared_rsrc(processor, thread);
455 /* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
456 sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
457 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
458 #endif /* CONFIG_SCHED_EDGE */
459
460 #if CONFIG_THREAD_GROUPS
461 processor->current_thread_group = thread_group_get(thread);
462 #endif
463 processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
464 processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
465 processor->current_is_NO_SMT = thread_no_smt(thread);
466 processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
467 processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
468 if (pset_lock_held) {
469 /* Only update the pset load average when the pset lock is held */
470 sched_update_pset_load_average(processor->processor_set, 0);
471 }
472 }
473
474 void
processor_state_update_explicit(processor_t processor,int pri,sfi_class_id_t sfi_class,pset_cluster_type_t pset_type,perfcontrol_class_t perfctl_class,thread_urgency_t urgency,__unused sched_bucket_t bucket)475 processor_state_update_explicit(processor_t processor, int pri, sfi_class_id_t sfi_class,
476 pset_cluster_type_t pset_type, perfcontrol_class_t perfctl_class, thread_urgency_t urgency, __unused sched_bucket_t bucket)
477 {
478 processor->current_pri = pri;
479 processor->current_sfi_class = sfi_class;
480 processor->current_recommended_pset_type = pset_type;
481 processor->current_perfctl_class = perfctl_class;
482 processor->current_urgency = urgency;
483 #if CONFIG_SCHED_EDGE
484 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
485 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
486 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
487 #endif /* CONFIG_SCHED_EDGE */
488 }
489
490 pset_node_t
pset_node_root(void)491 pset_node_root(void)
492 {
493 return &pset_node0;
494 }
495
496 LCK_GRP_DECLARE(pset_create_grp, "pset_create");
497 LCK_MTX_DECLARE(pset_create_lock, &pset_create_grp);
498
499 processor_set_t
pset_create(pset_node_t node,pset_cluster_type_t pset_type,uint32_t pset_cluster_id,int pset_id)500 pset_create(
501 pset_node_t node,
502 pset_cluster_type_t pset_type,
503 uint32_t pset_cluster_id,
504 int pset_id)
505 {
506 /* some schedulers do not support multiple psets */
507 if (SCHED(multiple_psets_enabled) == FALSE) {
508 return processor_pset(master_processor);
509 }
510
511 processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
512
513 if (pset != PROCESSOR_SET_NULL) {
514 pset->pset_cluster_type = pset_type;
515 pset->pset_cluster_id = pset_cluster_id;
516 pset->pset_id = pset_id;
517 pset_init(pset, node);
518
519 lck_spin_lock(&pset_node_lock);
520
521 prev = &node->psets;
522 while (*prev != PROCESSOR_SET_NULL) {
523 prev = &(*prev)->pset_list;
524 }
525
526 *prev = pset;
527
528 lck_spin_unlock(&pset_node_lock);
529 }
530
531 return pset;
532 }
533
534 /*
535 * Find processor set with specified cluster_id.
536 * Returns default_pset if not found.
537 */
538 processor_set_t
pset_find(uint32_t cluster_id,processor_set_t default_pset)539 pset_find(
540 uint32_t cluster_id,
541 processor_set_t default_pset)
542 {
543 lck_spin_lock(&pset_node_lock);
544 pset_node_t node = &pset_node0;
545 processor_set_t pset = NULL;
546
547 do {
548 pset = node->psets;
549 while (pset != NULL) {
550 if (pset->pset_cluster_id == cluster_id) {
551 break;
552 }
553 pset = pset->pset_list;
554 }
555 } while (pset == NULL && (node = node->node_list) != NULL);
556 lck_spin_unlock(&pset_node_lock);
557 if (pset == NULL) {
558 return default_pset;
559 }
560 return pset;
561 }
562
563 /*
564 * Initialize the given processor_set structure.
565 */
566 void
pset_init(processor_set_t pset,pset_node_t node)567 pset_init(
568 processor_set_t pset,
569 pset_node_t node)
570 {
571 pset->online_processor_count = 0;
572 pset->load_average = 0;
573 bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
574 pset->cpu_set_low = pset->cpu_set_hi = 0;
575 pset->cpu_set_count = 0;
576 pset->last_chosen = -1;
577 pset->cpu_bitmask = 0;
578 pset->recommended_bitmask = 0;
579 pset->primary_map = 0;
580 pset->realtime_map = 0;
581 pset->cpu_available_map = 0;
582
583 for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
584 pset->cpu_state_map[i] = 0;
585 }
586 pset->pending_AST_URGENT_cpu_mask = 0;
587 pset->pending_AST_PREEMPT_cpu_mask = 0;
588 #if defined(CONFIG_SCHED_DEFERRED_AST)
589 pset->pending_deferred_AST_cpu_mask = 0;
590 #endif
591 pset->pending_spill_cpu_mask = 0;
592 pset->rt_pending_spill_cpu_mask = 0;
593 pset_lock_init(pset);
594 pset->pset_self = IP_NULL;
595 pset->pset_name_self = IP_NULL;
596 pset->pset_list = PROCESSOR_SET_NULL;
597 pset->is_SMT = false;
598 #if CONFIG_SCHED_EDGE
599 bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
600 pset->cpu_running_foreign = 0;
601 for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
602 pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
603 pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
604 }
605 #endif /* CONFIG_SCHED_EDGE */
606 pset->stealable_rt_threads_earliest_deadline = UINT64_MAX;
607
608 if (pset != &pset0) {
609 /*
610 * Scheduler runqueue initialization for non-boot psets.
611 * This initialization for pset0 happens in sched_init().
612 */
613 SCHED(pset_init)(pset);
614 SCHED(rt_init)(pset);
615 }
616
617 /*
618 * Because the pset_node_lock is not taken by every client of the pset_map,
619 * we need to make sure that the initialized pset contents are visible to any
620 * client that loads a non-NULL value from pset_array.
621 */
622 os_atomic_store(&pset_array[pset->pset_id], pset, release);
623
624 lck_spin_lock(&pset_node_lock);
625 bit_set(node->pset_map, pset->pset_id);
626 pset->node = node;
627 lck_spin_unlock(&pset_node_lock);
628 }
629
630 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)631 processor_info_count(
632 processor_flavor_t flavor,
633 mach_msg_type_number_t *count)
634 {
635 switch (flavor) {
636 case PROCESSOR_BASIC_INFO:
637 *count = PROCESSOR_BASIC_INFO_COUNT;
638 break;
639
640 case PROCESSOR_CPU_LOAD_INFO:
641 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
642 break;
643
644 default:
645 return cpu_info_count(flavor, count);
646 }
647
648 return KERN_SUCCESS;
649 }
650
651 void
processor_cpu_load_info(processor_t processor,natural_t ticks[static CPU_STATE_MAX])652 processor_cpu_load_info(processor_t processor,
653 natural_t ticks[static CPU_STATE_MAX])
654 {
655 struct recount_usage usage = { 0 };
656 uint64_t idle_time = 0;
657 recount_processor_usage(&processor->pr_recount, &usage, &idle_time);
658
659 ticks[CPU_STATE_USER] += (uint32_t)(usage.ru_user_time_mach /
660 hz_tick_interval);
661 ticks[CPU_STATE_SYSTEM] += (uint32_t)(usage.ru_system_time_mach /
662 hz_tick_interval);
663 ticks[CPU_STATE_IDLE] += (uint32_t)(idle_time / hz_tick_interval);
664 }
665
666 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)667 processor_info(
668 processor_t processor,
669 processor_flavor_t flavor,
670 host_t *host,
671 processor_info_t info,
672 mach_msg_type_number_t *count)
673 {
674 int cpu_id, state;
675 kern_return_t result;
676
677 if (processor == PROCESSOR_NULL) {
678 return KERN_INVALID_ARGUMENT;
679 }
680
681 cpu_id = processor->cpu_id;
682
683 switch (flavor) {
684 case PROCESSOR_BASIC_INFO:
685 {
686 processor_basic_info_t basic_info;
687
688 if (*count < PROCESSOR_BASIC_INFO_COUNT) {
689 return KERN_FAILURE;
690 }
691
692 basic_info = (processor_basic_info_t) info;
693 basic_info->cpu_type = slot_type(cpu_id);
694 basic_info->cpu_subtype = slot_subtype(cpu_id);
695 state = processor->state;
696 if (((state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) && !processor->shutdown_temporary)
697 #if defined(__x86_64__)
698 || !processor->is_recommended
699 #endif
700 ) {
701 basic_info->running = FALSE;
702 } else {
703 basic_info->running = TRUE;
704 }
705 basic_info->slot_num = cpu_id;
706 if (processor == master_processor) {
707 basic_info->is_master = TRUE;
708 } else {
709 basic_info->is_master = FALSE;
710 }
711
712 *count = PROCESSOR_BASIC_INFO_COUNT;
713 *host = &realhost;
714
715 return KERN_SUCCESS;
716 }
717
718 case PROCESSOR_CPU_LOAD_INFO:
719 {
720 processor_cpu_load_info_t cpu_load_info;
721
722 if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
723 return KERN_FAILURE;
724 }
725
726 cpu_load_info = (processor_cpu_load_info_t) info;
727
728 cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
729 cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
730 cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
731 processor_cpu_load_info(processor, cpu_load_info->cpu_ticks);
732 cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
733
734 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
735 *host = &realhost;
736
737 return KERN_SUCCESS;
738 }
739
740 default:
741 result = cpu_info(flavor, cpu_id, info, count);
742 if (result == KERN_SUCCESS) {
743 *host = &realhost;
744 }
745
746 return result;
747 }
748 }
749
750 void
processor_wait_for_start(processor_t processor)751 processor_wait_for_start(processor_t processor)
752 {
753 spl_t s = splsched();
754 simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
755 while (processor->state == PROCESSOR_START) {
756 assert_wait_timeout((event_t)&processor->state, THREAD_UNINT, 1000, 1000 * NSEC_PER_USEC); /* 1 second */
757 simple_unlock(&processor->start_state_lock);
758 splx(s);
759
760 wait_result_t wait_result = thread_block(THREAD_CONTINUE_NULL);
761 if (wait_result == THREAD_TIMED_OUT) {
762 panic("%s>cpu %d failed to start\n", __FUNCTION__, processor->cpu_id);
763 }
764
765 s = splsched();
766 simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
767 }
768 simple_unlock(&processor->start_state_lock);
769 splx(s);
770 }
771
772 LCK_GRP_DECLARE(processor_updown_grp, "processor_updown");
773 LCK_MTX_DECLARE(processor_updown_lock, &processor_updown_grp);
774
775 static kern_return_t
processor_startup(processor_t processor,processor_reason_t reason,uint32_t flags)776 processor_startup(
777 processor_t processor,
778 processor_reason_t reason,
779 uint32_t flags)
780 {
781 processor_set_t pset;
782 thread_t thread;
783 kern_return_t result;
784 spl_t s;
785
786 if (processor == PROCESSOR_NULL || processor->processor_set == PROCESSOR_SET_NULL) {
787 return KERN_INVALID_ARGUMENT;
788 }
789
790 if ((flags & (LOCK_STATE | UNLOCK_STATE)) && (reason != REASON_SYSTEM)) {
791 return KERN_INVALID_ARGUMENT;
792 }
793
794 lck_mtx_lock(&processor_updown_lock);
795
796 if (processor == master_processor) {
797 processor_t prev;
798
799 processor->last_startup_reason = reason;
800
801 ml_cpu_power_enable(processor->cpu_id);
802
803 prev = thread_bind(processor);
804 thread_block(THREAD_CONTINUE_NULL);
805
806 result = cpu_start(processor->cpu_id);
807
808 thread_bind(prev);
809
810 lck_mtx_unlock(&processor_updown_lock);
811 return result;
812 }
813
814 bool scheduler_disable = false;
815
816 if ((processor->processor_primary != processor) && (sched_enable_smt == 0)) {
817 if (cpu_can_exit(processor->cpu_id)) {
818 lck_mtx_unlock(&processor_updown_lock);
819 return KERN_SUCCESS;
820 }
821 /*
822 * This secondary SMT processor must start in order to service interrupts,
823 * so instead it will be disabled at the scheduler level.
824 */
825 scheduler_disable = true;
826 }
827
828 s = splsched();
829 pset = processor->processor_set;
830 pset_lock(pset);
831 if (flags & LOCK_STATE) {
832 processor->shutdown_locked = true;
833 } else if (flags & UNLOCK_STATE) {
834 processor->shutdown_locked = false;
835 }
836
837 if (processor->state == PROCESSOR_START) {
838 pset_unlock(pset);
839 splx(s);
840
841 processor_wait_for_start(processor);
842
843 lck_mtx_unlock(&processor_updown_lock);
844 return KERN_SUCCESS;
845 }
846
847 if ((processor->state != PROCESSOR_OFF_LINE) || ((flags & SHUTDOWN_TEMPORARY) && !processor->shutdown_temporary)) {
848 pset_unlock(pset);
849 splx(s);
850
851 lck_mtx_unlock(&processor_updown_lock);
852 return KERN_FAILURE;
853 }
854
855 pset_update_processor_state(pset, processor, PROCESSOR_START);
856 processor->last_startup_reason = reason;
857 pset_unlock(pset);
858 splx(s);
859
860 /*
861 * Create the idle processor thread.
862 */
863 if (processor->idle_thread == THREAD_NULL) {
864 result = idle_thread_create(processor);
865 if (result != KERN_SUCCESS) {
866 s = splsched();
867 pset_lock(pset);
868 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
869 pset_unlock(pset);
870 splx(s);
871
872 lck_mtx_unlock(&processor_updown_lock);
873 return result;
874 }
875 }
876
877 /*
878 * If there is no active thread, the processor
879 * has never been started. Create a dedicated
880 * start up thread.
881 */
882 if (processor->active_thread == THREAD_NULL &&
883 processor->startup_thread == THREAD_NULL) {
884 result = kernel_thread_create(processor_start_thread, NULL, MAXPRI_KERNEL, &thread);
885 if (result != KERN_SUCCESS) {
886 s = splsched();
887 pset_lock(pset);
888 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
889 pset_unlock(pset);
890 splx(s);
891
892 lck_mtx_unlock(&processor_updown_lock);
893 return result;
894 }
895
896 s = splsched();
897 thread_lock(thread);
898 thread->bound_processor = processor;
899 processor->startup_thread = thread;
900 thread->state = TH_RUN;
901 thread->last_made_runnable_time = thread->last_basepri_change_time = mach_absolute_time();
902 thread_unlock(thread);
903 splx(s);
904
905 thread_deallocate(thread);
906 }
907
908 if (processor->processor_self == IP_NULL) {
909 ipc_processor_init(processor);
910 }
911
912 ml_cpu_power_enable(processor->cpu_id);
913 ml_cpu_begin_state_transition(processor->cpu_id);
914 ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
915 result = cpu_start(processor->cpu_id);
916 #if defined (__arm__) || defined (__arm64__)
917 assert(result == KERN_SUCCESS);
918 #else
919 if (result != KERN_SUCCESS) {
920 s = splsched();
921 pset_lock(pset);
922 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
923 pset_unlock(pset);
924 splx(s);
925 ml_cpu_end_state_transition(processor->cpu_id);
926
927 lck_mtx_unlock(&processor_updown_lock);
928 return result;
929 }
930 #endif
931 if (scheduler_disable) {
932 assert(processor->processor_primary != processor);
933 sched_processor_enable(processor, FALSE);
934 }
935
936 if (flags & WAIT_FOR_START) {
937 processor_wait_for_start(processor);
938 }
939
940 ml_cpu_end_state_transition(processor->cpu_id);
941 ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
942
943 #if CONFIG_KCOV
944 kcov_start_cpu(processor->cpu_id);
945 #endif
946
947 lck_mtx_unlock(&processor_updown_lock);
948 return KERN_SUCCESS;
949 }
950
951 kern_return_t
processor_exit_reason(processor_t processor,processor_reason_t reason,uint32_t flags)952 processor_exit_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
953 {
954 if (processor == PROCESSOR_NULL) {
955 return KERN_INVALID_ARGUMENT;
956 }
957
958 if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
959 #ifdef RHODES_CLUSTER_POWERDOWN_WORKAROUND
960 /*
961 * Must allow CLPC to finish powering down the whole cluster,
962 * or IOCPUSleepKernel() will fail to restart the offline cpus.
963 */
964 if (reason != REASON_CLPC_SYSTEM) {
965 return KERN_FAILURE;
966 }
967 #else
968 return KERN_FAILURE;
969 #endif
970 }
971
972 if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
973 return sched_processor_enable(processor, FALSE);
974 } else if ((reason == REASON_SYSTEM) || cpu_can_exit(processor->cpu_id)) {
975 return processor_shutdown(processor, reason, flags);
976 }
977
978 return KERN_INVALID_ARGUMENT;
979 }
980
981 kern_return_t
processor_exit(processor_t processor)982 processor_exit(
983 processor_t processor)
984 {
985 return processor_exit_reason(processor, REASON_SYSTEM, 0);
986 }
987
988 kern_return_t
processor_exit_from_user(processor_t processor)989 processor_exit_from_user(
990 processor_t processor)
991 {
992 return processor_exit_reason(processor, REASON_USER, 0);
993 }
994
995 kern_return_t
processor_start_reason(processor_t processor,processor_reason_t reason,uint32_t flags)996 processor_start_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
997 {
998 if (processor == PROCESSOR_NULL) {
999 return KERN_INVALID_ARGUMENT;
1000 }
1001
1002 if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
1003 return KERN_FAILURE;
1004 }
1005
1006 if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
1007 return sched_processor_enable(processor, TRUE);
1008 } else {
1009 return processor_startup(processor, reason, flags);
1010 }
1011 }
1012
1013 kern_return_t
processor_start(processor_t processor)1014 processor_start(
1015 processor_t processor)
1016 {
1017 return processor_start_reason(processor, REASON_SYSTEM, 0);
1018 }
1019
1020 kern_return_t
processor_start_from_user(processor_t processor)1021 processor_start_from_user(
1022 processor_t processor)
1023 {
1024 return processor_start_reason(processor, REASON_USER, 0);
1025 }
1026
1027 kern_return_t
enable_smt_processors(bool enable)1028 enable_smt_processors(bool enable)
1029 {
1030 if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
1031 /* Not an SMT system */
1032 return KERN_INVALID_ARGUMENT;
1033 }
1034
1035 int ncpus = machine_info.logical_cpu_max;
1036
1037 for (int i = 1; i < ncpus; i++) {
1038 processor_t processor = processor_array[i];
1039
1040 if (processor->processor_primary != processor) {
1041 if (enable) {
1042 processor_start_from_user(processor);
1043 } else { /* Disable */
1044 processor_exit_from_user(processor);
1045 }
1046 }
1047 }
1048
1049 #define BSD_HOST 1
1050 host_basic_info_data_t hinfo;
1051 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1052 kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
1053 if (kret != KERN_SUCCESS) {
1054 return kret;
1055 }
1056
1057 if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
1058 return KERN_FAILURE;
1059 }
1060
1061 if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
1062 return KERN_FAILURE;
1063 }
1064
1065 return KERN_SUCCESS;
1066 }
1067
1068 bool
processor_should_kprintf(processor_t processor,bool starting)1069 processor_should_kprintf(processor_t processor, bool starting)
1070 {
1071 processor_reason_t reason = starting ? processor->last_startup_reason : processor->last_shutdown_reason;
1072
1073 return reason != REASON_CLPC_SYSTEM;
1074 }
1075
1076 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1077 processor_control(
1078 processor_t processor,
1079 processor_info_t info,
1080 mach_msg_type_number_t count)
1081 {
1082 if (processor == PROCESSOR_NULL) {
1083 return KERN_INVALID_ARGUMENT;
1084 }
1085
1086 return cpu_control(processor->cpu_id, info, count);
1087 }
1088
1089 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1090 processor_get_assignment(
1091 processor_t processor,
1092 processor_set_t *pset)
1093 {
1094 int state;
1095
1096 if (processor == PROCESSOR_NULL) {
1097 return KERN_INVALID_ARGUMENT;
1098 }
1099
1100 state = processor->state;
1101 if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) {
1102 return KERN_FAILURE;
1103 }
1104
1105 *pset = &pset0;
1106
1107 return KERN_SUCCESS;
1108 }
1109
1110 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1111 processor_set_info(
1112 processor_set_t pset,
1113 int flavor,
1114 host_t *host,
1115 processor_set_info_t info,
1116 mach_msg_type_number_t *count)
1117 {
1118 if (pset == PROCESSOR_SET_NULL) {
1119 return KERN_INVALID_ARGUMENT;
1120 }
1121
1122 if (flavor == PROCESSOR_SET_BASIC_INFO) {
1123 processor_set_basic_info_t basic_info;
1124
1125 if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1126 return KERN_FAILURE;
1127 }
1128
1129 basic_info = (processor_set_basic_info_t) info;
1130 #if defined(__x86_64__)
1131 basic_info->processor_count = processor_avail_count_user;
1132 #else
1133 basic_info->processor_count = processor_avail_count;
1134 #endif
1135 basic_info->default_policy = POLICY_TIMESHARE;
1136
1137 *count = PROCESSOR_SET_BASIC_INFO_COUNT;
1138 *host = &realhost;
1139 return KERN_SUCCESS;
1140 } else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1141 policy_timeshare_base_t ts_base;
1142
1143 if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1144 return KERN_FAILURE;
1145 }
1146
1147 ts_base = (policy_timeshare_base_t) info;
1148 ts_base->base_priority = BASEPRI_DEFAULT;
1149
1150 *count = POLICY_TIMESHARE_BASE_COUNT;
1151 *host = &realhost;
1152 return KERN_SUCCESS;
1153 } else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1154 policy_fifo_base_t fifo_base;
1155
1156 if (*count < POLICY_FIFO_BASE_COUNT) {
1157 return KERN_FAILURE;
1158 }
1159
1160 fifo_base = (policy_fifo_base_t) info;
1161 fifo_base->base_priority = BASEPRI_DEFAULT;
1162
1163 *count = POLICY_FIFO_BASE_COUNT;
1164 *host = &realhost;
1165 return KERN_SUCCESS;
1166 } else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1167 policy_rr_base_t rr_base;
1168
1169 if (*count < POLICY_RR_BASE_COUNT) {
1170 return KERN_FAILURE;
1171 }
1172
1173 rr_base = (policy_rr_base_t) info;
1174 rr_base->base_priority = BASEPRI_DEFAULT;
1175 rr_base->quantum = 1;
1176
1177 *count = POLICY_RR_BASE_COUNT;
1178 *host = &realhost;
1179 return KERN_SUCCESS;
1180 } else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1181 policy_timeshare_limit_t ts_limit;
1182
1183 if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1184 return KERN_FAILURE;
1185 }
1186
1187 ts_limit = (policy_timeshare_limit_t) info;
1188 ts_limit->max_priority = MAXPRI_KERNEL;
1189
1190 *count = POLICY_TIMESHARE_LIMIT_COUNT;
1191 *host = &realhost;
1192 return KERN_SUCCESS;
1193 } else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1194 policy_fifo_limit_t fifo_limit;
1195
1196 if (*count < POLICY_FIFO_LIMIT_COUNT) {
1197 return KERN_FAILURE;
1198 }
1199
1200 fifo_limit = (policy_fifo_limit_t) info;
1201 fifo_limit->max_priority = MAXPRI_KERNEL;
1202
1203 *count = POLICY_FIFO_LIMIT_COUNT;
1204 *host = &realhost;
1205 return KERN_SUCCESS;
1206 } else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1207 policy_rr_limit_t rr_limit;
1208
1209 if (*count < POLICY_RR_LIMIT_COUNT) {
1210 return KERN_FAILURE;
1211 }
1212
1213 rr_limit = (policy_rr_limit_t) info;
1214 rr_limit->max_priority = MAXPRI_KERNEL;
1215
1216 *count = POLICY_RR_LIMIT_COUNT;
1217 *host = &realhost;
1218 return KERN_SUCCESS;
1219 } else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1220 int *enabled;
1221
1222 if (*count < (sizeof(*enabled) / sizeof(int))) {
1223 return KERN_FAILURE;
1224 }
1225
1226 enabled = (int *) info;
1227 *enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1228
1229 *count = sizeof(*enabled) / sizeof(int);
1230 *host = &realhost;
1231 return KERN_SUCCESS;
1232 }
1233
1234
1235 *host = HOST_NULL;
1236 return KERN_INVALID_ARGUMENT;
1237 }
1238
1239 /*
1240 * processor_set_statistics
1241 *
1242 * Returns scheduling statistics for a processor set.
1243 */
1244 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1245 processor_set_statistics(
1246 processor_set_t pset,
1247 int flavor,
1248 processor_set_info_t info,
1249 mach_msg_type_number_t *count)
1250 {
1251 if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1252 return KERN_INVALID_PROCESSOR_SET;
1253 }
1254
1255 if (flavor == PROCESSOR_SET_LOAD_INFO) {
1256 processor_set_load_info_t load_info;
1257
1258 if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1259 return KERN_FAILURE;
1260 }
1261
1262 load_info = (processor_set_load_info_t) info;
1263
1264 load_info->mach_factor = sched_mach_factor;
1265 load_info->load_average = sched_load_average;
1266
1267 load_info->task_count = tasks_count;
1268 load_info->thread_count = threads_count;
1269
1270 *count = PROCESSOR_SET_LOAD_INFO_COUNT;
1271 return KERN_SUCCESS;
1272 }
1273
1274 return KERN_INVALID_ARGUMENT;
1275 }
1276
1277 /*
1278 * processor_set_things:
1279 *
1280 * Common internals for processor_set_{threads,tasks}
1281 */
1282 static kern_return_t
processor_set_things(processor_set_t pset,void ** thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1283 processor_set_things(
1284 processor_set_t pset,
1285 void **thing_list,
1286 mach_msg_type_number_t *countp,
1287 int type,
1288 mach_task_flavor_t flavor)
1289 {
1290 unsigned int i;
1291 task_t task;
1292 thread_t thread;
1293
1294 task_t *task_list;
1295 vm_size_t actual_tasks, task_count_cur, task_count_needed;
1296
1297 thread_t *thread_list;
1298 vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1299
1300 void *addr, *newaddr;
1301 vm_size_t count, count_needed;
1302
1303 if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1304 return KERN_INVALID_ARGUMENT;
1305 }
1306
1307 task_count_cur = 0;
1308 task_count_needed = 0;
1309 task_list = NULL;
1310 actual_tasks = 0;
1311
1312 thread_count_cur = 0;
1313 thread_count_needed = 0;
1314 thread_list = NULL;
1315 actual_threads = 0;
1316
1317 for (;;) {
1318 lck_mtx_lock(&tasks_threads_lock);
1319
1320 /* do we have the memory we need? */
1321 if (type == PSET_THING_THREAD) {
1322 thread_count_needed = threads_count;
1323 }
1324 #if !CONFIG_MACF
1325 else
1326 #endif
1327 task_count_needed = tasks_count;
1328
1329 if (task_count_needed <= task_count_cur &&
1330 thread_count_needed <= thread_count_cur) {
1331 break;
1332 }
1333
1334 /* unlock and allocate more memory */
1335 lck_mtx_unlock(&tasks_threads_lock);
1336
1337 /* grow task array */
1338 if (task_count_needed > task_count_cur) {
1339 kfree_type(task_t, task_count_cur, task_list);
1340 assert(task_count_needed > 0);
1341 task_count_cur = task_count_needed;
1342
1343 task_list = kalloc_type(task_t, task_count_cur, Z_WAITOK | Z_ZERO);
1344 if (task_list == NULL) {
1345 kfree_type(thread_t, thread_count_cur, thread_list);
1346 return KERN_RESOURCE_SHORTAGE;
1347 }
1348 }
1349
1350 /* grow thread array */
1351 if (thread_count_needed > thread_count_cur) {
1352 kfree_type(thread_t, thread_count_cur, thread_list);
1353
1354 assert(thread_count_needed > 0);
1355 thread_count_cur = thread_count_needed;
1356
1357 thread_list = kalloc_type(thread_t, thread_count_cur, Z_WAITOK | Z_ZERO);
1358 if (thread_list == NULL) {
1359 kfree_type(task_t, task_count_cur, task_list);
1360 return KERN_RESOURCE_SHORTAGE;
1361 }
1362 }
1363 }
1364
1365 /* OK, have memory and the list locked */
1366
1367 /* If we need it, get the thread list */
1368 if (type == PSET_THING_THREAD) {
1369 queue_iterate(&threads, thread, thread_t, threads) {
1370 task = get_threadtask(thread);
1371 #if defined(SECURE_KERNEL)
1372 if (task == kernel_task) {
1373 /* skip threads belonging to kernel_task */
1374 continue;
1375 }
1376 #endif
1377 if (!task->ipc_active || task_is_exec_copy(task)) {
1378 /* skip threads in inactive tasks (in the middle of exec/fork/spawn) */
1379 continue;
1380 }
1381
1382 thread_reference(thread);
1383 thread_list[actual_threads++] = thread;
1384 }
1385 }
1386 #if !CONFIG_MACF
1387 else
1388 #endif
1389 {
1390 /* get a list of the tasks */
1391 queue_iterate(&tasks, task, task_t, tasks) {
1392 #if defined(SECURE_KERNEL)
1393 if (task == kernel_task) {
1394 /* skip kernel_task */
1395 continue;
1396 }
1397 #endif
1398 if (!task->ipc_active || task_is_exec_copy(task)) {
1399 /* skip inactive tasks (in the middle of exec/fork/spawn) */
1400 continue;
1401 }
1402
1403 task_reference(task);
1404 task_list[actual_tasks++] = task;
1405 }
1406 }
1407
1408 lck_mtx_unlock(&tasks_threads_lock);
1409
1410 #if CONFIG_MACF
1411 unsigned int j, used;
1412
1413 /* for each task, make sure we are allowed to examine it */
1414 for (i = used = 0; i < actual_tasks; i++) {
1415 if (mac_task_check_expose_task(task_list[i], flavor)) {
1416 task_deallocate(task_list[i]);
1417 continue;
1418 }
1419 task_list[used++] = task_list[i];
1420 }
1421 actual_tasks = used;
1422 task_count_needed = actual_tasks;
1423
1424 if (type == PSET_THING_THREAD) {
1425 /* for each thread (if any), make sure it's task is in the allowed list */
1426 for (i = used = 0; i < actual_threads; i++) {
1427 boolean_t found_task = FALSE;
1428
1429 task = get_threadtask(thread_list[i]);
1430 for (j = 0; j < actual_tasks; j++) {
1431 if (task_list[j] == task) {
1432 found_task = TRUE;
1433 break;
1434 }
1435 }
1436 if (found_task) {
1437 thread_list[used++] = thread_list[i];
1438 } else {
1439 thread_deallocate(thread_list[i]);
1440 }
1441 }
1442 actual_threads = used;
1443 thread_count_needed = actual_threads;
1444
1445 /* done with the task list */
1446 for (i = 0; i < actual_tasks; i++) {
1447 task_deallocate(task_list[i]);
1448 }
1449 kfree_type(task_t, task_count_cur, task_list);
1450 task_count_cur = 0;
1451 actual_tasks = 0;
1452 task_list = NULL;
1453 }
1454 #endif
1455
1456 if (type == PSET_THING_THREAD) {
1457 if (actual_threads == 0) {
1458 /* no threads available to return */
1459 assert(task_count_cur == 0);
1460 kfree_type(thread_t, thread_count_cur, thread_list);
1461 *thing_list = NULL;
1462 *countp = 0;
1463 return KERN_SUCCESS;
1464 }
1465 count_needed = actual_threads;
1466 count = thread_count_cur;
1467 addr = thread_list;
1468 } else {
1469 if (actual_tasks == 0) {
1470 /* no tasks available to return */
1471 assert(thread_count_cur == 0);
1472 kfree_type(task_t, task_count_cur, task_list);
1473 *thing_list = NULL;
1474 *countp = 0;
1475 return KERN_SUCCESS;
1476 }
1477 count_needed = actual_tasks;
1478 count = task_count_cur;
1479 addr = task_list;
1480 }
1481
1482 /* if we allocated too much, must copy */
1483 if (count_needed < count) {
1484 newaddr = kalloc_type(void *, count_needed, Z_WAITOK | Z_ZERO);
1485 if (newaddr == 0) {
1486 for (i = 0; i < actual_tasks; i++) {
1487 if (type == PSET_THING_THREAD) {
1488 thread_deallocate(thread_list[i]);
1489 } else {
1490 task_deallocate(task_list[i]);
1491 }
1492 }
1493 kfree_type(void *, count, addr);
1494 return KERN_RESOURCE_SHORTAGE;
1495 }
1496
1497 bcopy(addr, newaddr, count_needed * sizeof(void *));
1498 kfree_type(void *, count, addr);
1499
1500 addr = newaddr;
1501 count = count_needed;
1502 }
1503
1504 *thing_list = (void **)addr;
1505 *countp = (mach_msg_type_number_t)count;
1506
1507 return KERN_SUCCESS;
1508 }
1509
1510 /*
1511 * processor_set_tasks:
1512 *
1513 * List all tasks in the processor set.
1514 */
1515 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1516 processor_set_tasks_internal(
1517 processor_set_t pset,
1518 task_array_t *task_list,
1519 mach_msg_type_number_t *count,
1520 mach_task_flavor_t flavor)
1521 {
1522 kern_return_t ret;
1523 mach_msg_type_number_t i;
1524
1525 ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK, flavor);
1526 if (ret != KERN_SUCCESS) {
1527 return ret;
1528 }
1529
1530 /* do the conversion that Mig should handle */
1531 switch (flavor) {
1532 case TASK_FLAVOR_CONTROL:
1533 for (i = 0; i < *count; i++) {
1534 if ((*task_list)[i] == current_task()) {
1535 /* if current_task(), return pinned port */
1536 (*task_list)[i] = (task_t)convert_task_to_port_pinned((*task_list)[i]);
1537 } else {
1538 (*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
1539 }
1540 }
1541 break;
1542 case TASK_FLAVOR_READ:
1543 for (i = 0; i < *count; i++) {
1544 (*task_list)[i] = (task_t)convert_task_read_to_port((*task_list)[i]);
1545 }
1546 break;
1547 case TASK_FLAVOR_INSPECT:
1548 for (i = 0; i < *count; i++) {
1549 (*task_list)[i] = (task_t)convert_task_inspect_to_port((*task_list)[i]);
1550 }
1551 break;
1552 case TASK_FLAVOR_NAME:
1553 for (i = 0; i < *count; i++) {
1554 (*task_list)[i] = (task_t)convert_task_name_to_port((*task_list)[i]);
1555 }
1556 break;
1557 default:
1558 return KERN_INVALID_ARGUMENT;
1559 }
1560
1561 return KERN_SUCCESS;
1562 }
1563
1564 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1565 processor_set_tasks(
1566 processor_set_t pset,
1567 task_array_t *task_list,
1568 mach_msg_type_number_t *count)
1569 {
1570 return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1571 }
1572
1573 /*
1574 * processor_set_tasks_with_flavor:
1575 *
1576 * Based on flavor, return task/inspect/read port to all tasks in the processor set.
1577 */
1578 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1579 processor_set_tasks_with_flavor(
1580 processor_set_t pset,
1581 mach_task_flavor_t flavor,
1582 task_array_t *task_list,
1583 mach_msg_type_number_t *count)
1584 {
1585 switch (flavor) {
1586 case TASK_FLAVOR_CONTROL:
1587 case TASK_FLAVOR_READ:
1588 case TASK_FLAVOR_INSPECT:
1589 case TASK_FLAVOR_NAME:
1590 return processor_set_tasks_internal(pset, task_list, count, flavor);
1591 default:
1592 return KERN_INVALID_ARGUMENT;
1593 }
1594 }
1595
1596 /*
1597 * processor_set_threads:
1598 *
1599 * List all threads in the processor set.
1600 */
1601 #if defined(SECURE_KERNEL)
1602 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1603 processor_set_threads(
1604 __unused processor_set_t pset,
1605 __unused thread_array_t *thread_list,
1606 __unused mach_msg_type_number_t *count)
1607 {
1608 return KERN_FAILURE;
1609 }
1610 #elif !defined(XNU_TARGET_OS_OSX)
1611 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1612 processor_set_threads(
1613 __unused processor_set_t pset,
1614 __unused thread_array_t *thread_list,
1615 __unused mach_msg_type_number_t *count)
1616 {
1617 return KERN_NOT_SUPPORTED;
1618 }
1619 #else
1620 kern_return_t
processor_set_threads(processor_set_t pset,thread_array_t * thread_list,mach_msg_type_number_t * count)1621 processor_set_threads(
1622 processor_set_t pset,
1623 thread_array_t *thread_list,
1624 mach_msg_type_number_t *count)
1625 {
1626 kern_return_t ret;
1627 mach_msg_type_number_t i;
1628
1629 ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1630 if (ret != KERN_SUCCESS) {
1631 return ret;
1632 }
1633
1634 /* do the conversion that Mig should handle */
1635 for (i = 0; i < *count; i++) {
1636 (*thread_list)[i] = (thread_t)convert_thread_to_port((*thread_list)[i]);
1637 }
1638 return KERN_SUCCESS;
1639 }
1640 #endif
1641
1642 pset_cluster_type_t
recommended_pset_type(thread_t thread)1643 recommended_pset_type(thread_t thread)
1644 {
1645 #if CONFIG_THREAD_GROUPS && __AMP__
1646 if (thread == THREAD_NULL) {
1647 return PSET_AMP_E;
1648 }
1649
1650 #if DEVELOPMENT || DEBUG
1651 extern bool system_ecore_only;
1652 extern int enable_task_set_cluster_type;
1653 task_t task = get_threadtask(thread);
1654 if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1655 processor_set_t pset_hint = task->pset_hint;
1656 if (pset_hint) {
1657 return pset_hint->pset_cluster_type;
1658 }
1659 }
1660
1661 if (system_ecore_only) {
1662 return PSET_AMP_E;
1663 }
1664 #endif
1665
1666 if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1667 return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1668 }
1669
1670 if (thread->base_pri <= MAXPRI_THROTTLE) {
1671 if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1672 return PSET_AMP_E;
1673 }
1674 } else if (thread->base_pri <= BASEPRI_UTILITY) {
1675 if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1676 return PSET_AMP_E;
1677 }
1678 }
1679
1680 struct thread_group *tg = thread_group_get(thread);
1681 cluster_type_t recommendation = thread_group_recommendation(tg);
1682 switch (recommendation) {
1683 case CLUSTER_TYPE_SMP:
1684 default:
1685 if (get_threadtask(thread) == kernel_task) {
1686 return PSET_AMP_E;
1687 }
1688 return PSET_AMP_P;
1689 case CLUSTER_TYPE_E:
1690 return PSET_AMP_E;
1691 case CLUSTER_TYPE_P:
1692 return PSET_AMP_P;
1693 }
1694 #else
1695 (void)thread;
1696 return PSET_SMP;
1697 #endif
1698 }
1699
1700 #if CONFIG_THREAD_GROUPS && __AMP__
1701
1702 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)1703 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1704 {
1705 sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1706
1707 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1708
1709 switch (perfctl_class) {
1710 case PERFCONTROL_CLASS_UTILITY:
1711 os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1712 break;
1713 case PERFCONTROL_CLASS_BACKGROUND:
1714 os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1715 break;
1716 default:
1717 panic("perfctl_class invalid");
1718 break;
1719 }
1720 }
1721
1722 #elif defined(__arm64__)
1723
1724 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1725 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)1726 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1727 {
1728 }
1729
1730 #endif /* defined(__arm64__) */
1731