1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * processor.c: processor and processor_set manipulation routines.
61 */
62
63 #include <mach/boolean.h>
64 #include <mach/policy.h>
65 #include <mach/processor.h>
66 #include <mach/processor_info.h>
67 #include <mach/vm_param.h>
68 #include <kern/cpu_number.h>
69 #include <kern/host.h>
70 #include <kern/ipc_host.h>
71 #include <kern/ipc_tt.h>
72 #include <kern/kalloc.h>
73 #include <kern/machine.h>
74 #include <kern/misc_protos.h>
75 #include <kern/processor.h>
76 #include <kern/sched.h>
77 #include <kern/task.h>
78 #include <kern/thread.h>
79 #include <kern/timer.h>
80 #if KPERF
81 #include <kperf/kperf.h>
82 #endif /* KPERF */
83 #include <ipc/ipc_port.h>
84
85 #include <security/mac_mach_internal.h>
86
87 #if defined(CONFIG_XNUPOST)
88
89 #include <tests/xnupost.h>
90
91 #endif /* CONFIG_XNUPOST */
92
93 /*
94 * Exported interface
95 */
96 #include <mach/mach_host_server.h>
97 #include <mach/processor_set_server.h>
98 #include <san/kcov.h>
99
100 /*
101 * The first pset and the pset_node are created by default for all platforms.
102 * Those typically represent the boot-cluster. For AMP platforms, all clusters
103 * of the same type are part of the same pset_node. This allows for easier
104 * CPU selection logic.
105 */
106 struct processor_set pset0;
107 struct pset_node pset_node0;
108
109 #if __AMP__
110 struct pset_node pset_node1;
111 pset_node_t ecore_node;
112 pset_node_t pcore_node;
113 #endif
114
115 LCK_SPIN_DECLARE(pset_node_lock, LCK_GRP_NULL);
116
117 LCK_GRP_DECLARE(pset_lck_grp, "pset");
118
119 queue_head_t tasks;
120 queue_head_t terminated_tasks; /* To be used ONLY for stackshot. */
121 queue_head_t corpse_tasks;
122 int tasks_count;
123 int terminated_tasks_count;
124 queue_head_t threads;
125 queue_head_t terminated_threads;
126 int threads_count;
127 int terminated_threads_count;
128 LCK_GRP_DECLARE(task_lck_grp, "task");
129 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
130 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
131 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
132
133 processor_t processor_list;
134 unsigned int processor_count;
135 static processor_t processor_list_tail;
136 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
137
138 uint32_t processor_avail_count;
139 uint32_t processor_avail_count_user;
140 uint32_t primary_processor_avail_count;
141 uint32_t primary_processor_avail_count_user;
142
143 SECURITY_READ_ONLY_LATE(int) master_cpu = 0;
144
145 struct processor PERCPU_DATA(processor);
146 processor_t processor_array[MAX_SCHED_CPUS] = { 0 };
147 processor_set_t pset_array[MAX_PSETS] = { 0 };
148
149 static timer_call_func_t running_timer_funcs[] = {
150 [RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
151 [RUNNING_TIMER_KPERF] = kperf_timer_expire,
152 };
153 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
154 == RUNNING_TIMER_MAX, "missing running timer function");
155
156 #if defined(CONFIG_XNUPOST)
157 kern_return_t ipi_test(void);
158 extern void arm64_ipi_test(void);
159
160 kern_return_t
ipi_test()161 ipi_test()
162 {
163 #if __arm64__
164 processor_t p;
165
166 for (p = processor_list; p != NULL; p = p->processor_list) {
167 thread_bind(p);
168 thread_block(THREAD_CONTINUE_NULL);
169 kprintf("Running IPI test on cpu %d\n", p->cpu_id);
170 arm64_ipi_test();
171 }
172
173 /* unbind thread from specific cpu */
174 thread_bind(PROCESSOR_NULL);
175 thread_block(THREAD_CONTINUE_NULL);
176
177 T_PASS("Done running IPI tests");
178 #else
179 T_PASS("Unsupported platform. Not running IPI tests");
180
181 #endif /* __arm64__ */
182
183 return KERN_SUCCESS;
184 }
185 #endif /* defined(CONFIG_XNUPOST) */
186
187 int sched_enable_smt = 1;
188
189 void
processor_bootstrap(void)190 processor_bootstrap(void)
191 {
192 /* Initialize PSET node and PSET associated with boot cluster */
193 pset_node0.psets = &pset0;
194 pset_node0.pset_cluster_type = PSET_SMP;
195
196 #if __AMP__
197 const ml_topology_info_t *topology_info = ml_get_topology_info();
198
199 /*
200 * Since this is an AMP system, fill up cluster type and ID information; this should do the
201 * same kind of initialization done via ml_processor_register()
202 */
203 ml_topology_cluster_t *boot_cluster = topology_info->boot_cluster;
204 pset0.pset_id = boot_cluster->cluster_id;
205 pset0.pset_cluster_id = boot_cluster->cluster_id;
206 if (boot_cluster->cluster_type == CLUSTER_TYPE_E) {
207 pset0.pset_cluster_type = PSET_AMP_E;
208 pset_node0.pset_cluster_type = PSET_AMP_E;
209 ecore_node = &pset_node0;
210
211 pset_node1.pset_cluster_type = PSET_AMP_P;
212 pcore_node = &pset_node1;
213 } else {
214 pset0.pset_cluster_type = PSET_AMP_P;
215 pset_node0.pset_cluster_type = PSET_AMP_P;
216 pcore_node = &pset_node0;
217
218 pset_node1.pset_cluster_type = PSET_AMP_E;
219 ecore_node = &pset_node1;
220 }
221
222 /* Link pset_node1 to pset_node0 */
223 pset_node0.node_list = &pset_node1;
224 #endif
225
226 pset_init(&pset0, &pset_node0);
227 queue_init(&tasks);
228 queue_init(&terminated_tasks);
229 queue_init(&threads);
230 queue_init(&terminated_threads);
231 queue_init(&corpse_tasks);
232
233 processor_init(master_processor, master_cpu, &pset0);
234 }
235
236 /*
237 * Initialize the given processor for the cpu
238 * indicated by cpu_id, and assign to the
239 * specified processor set.
240 */
241 void
processor_init(processor_t processor,int cpu_id,processor_set_t pset)242 processor_init(
243 processor_t processor,
244 int cpu_id,
245 processor_set_t pset)
246 {
247 spl_t s;
248
249 assert(cpu_id < MAX_SCHED_CPUS);
250 processor->cpu_id = cpu_id;
251
252 if (processor != master_processor) {
253 /* Scheduler state for master_processor initialized in sched_init() */
254 SCHED(processor_init)(processor);
255 }
256
257 processor->state = PROCESSOR_OFF_LINE;
258 processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
259 processor->processor_set = pset;
260 processor_state_update_idle(processor);
261 processor->starting_pri = MINPRI;
262 processor->quantum_end = UINT64_MAX;
263 processor->deadline = UINT64_MAX;
264 processor->first_timeslice = FALSE;
265 processor->processor_offlined = false;
266 processor->processor_primary = processor; /* no SMT relationship known at this point */
267 processor->processor_secondary = NULL;
268 processor->is_SMT = false;
269 processor->is_recommended = true;
270 processor->processor_self = IP_NULL;
271 processor->processor_list = NULL;
272 processor->must_idle = false;
273 processor->last_startup_reason = REASON_SYSTEM;
274 processor->last_shutdown_reason = REASON_NONE;
275 processor->shutdown_temporary = false;
276 processor->shutdown_locked = false;
277 processor->last_recommend_reason = REASON_SYSTEM;
278 processor->last_derecommend_reason = REASON_NONE;
279 processor->running_timers_active = false;
280 for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
281 timer_call_setup(&processor->running_timers[i],
282 running_timer_funcs[i], processor);
283 running_timer_clear(processor, i);
284 }
285 recount_processor_init(processor);
286 simple_lock_init(&processor->start_state_lock, 0);
287
288 s = splsched();
289 pset_lock(pset);
290 bit_set(pset->cpu_bitmask, cpu_id);
291 bit_set(pset->recommended_bitmask, cpu_id);
292 bit_set(pset->primary_map, cpu_id);
293 bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
294 if (pset->cpu_set_count++ == 0) {
295 pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
296 } else {
297 pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
298 pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
299 }
300 pset_unlock(pset);
301 splx(s);
302
303 simple_lock(&processor_list_lock, LCK_GRP_NULL);
304 if (processor_list == NULL) {
305 processor_list = processor;
306 } else {
307 processor_list_tail->processor_list = processor;
308 }
309 processor_list_tail = processor;
310 processor_count++;
311 simple_unlock(&processor_list_lock);
312 processor_array[cpu_id] = processor;
313 }
314
315 bool system_is_SMT = false;
316
317 void
processor_set_primary(processor_t processor,processor_t primary)318 processor_set_primary(
319 processor_t processor,
320 processor_t primary)
321 {
322 assert(processor->processor_primary == primary || processor->processor_primary == processor);
323 /* Re-adjust primary point for this (possibly) secondary processor */
324 processor->processor_primary = primary;
325
326 assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
327 if (primary != processor) {
328 /* Link primary to secondary, assumes a 2-way SMT model
329 * We'll need to move to a queue if any future architecture
330 * requires otherwise.
331 */
332 assert(processor->processor_secondary == NULL);
333 primary->processor_secondary = processor;
334 /* Mark both processors as SMT siblings */
335 primary->is_SMT = TRUE;
336 processor->is_SMT = TRUE;
337
338 if (!system_is_SMT) {
339 system_is_SMT = true;
340 sched_rt_n_backup_processors = SCHED_DEFAULT_BACKUP_PROCESSORS_SMT;
341 }
342
343 processor_set_t pset = processor->processor_set;
344 spl_t s = splsched();
345 pset_lock(pset);
346 if (!pset->is_SMT) {
347 pset->is_SMT = true;
348 }
349 bit_clear(pset->primary_map, processor->cpu_id);
350 pset_unlock(pset);
351 splx(s);
352 }
353 }
354
355 processor_set_t
processor_pset(processor_t processor)356 processor_pset(
357 processor_t processor)
358 {
359 return processor->processor_set;
360 }
361
362 #if CONFIG_SCHED_EDGE
363
364 cluster_type_t
pset_type_for_id(uint32_t cluster_id)365 pset_type_for_id(uint32_t cluster_id)
366 {
367 return pset_array[cluster_id]->pset_type;
368 }
369
370 /*
371 * Processor foreign threads
372 *
373 * With the Edge scheduler, each pset maintains a bitmap of processors running threads
374 * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
375 * if its of a different type than its preferred cluster type (E/P). The bitmap should
376 * be updated every time a new thread is assigned to run on a processor. Cluster shared
377 * resource intensive threads are also not counted as foreign threads since these
378 * threads should not be rebalanced when running on non-preferred clusters.
379 *
380 * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
381 * for rebalancing.
382 */
383 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)384 processor_state_update_running_foreign(processor_t processor, thread_t thread)
385 {
386 cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
387 cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
388
389 boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
390 boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
391 if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
392 bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
393 } else {
394 bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
395 }
396 }
397
398 /*
399 * Cluster shared resource intensive threads
400 *
401 * With the Edge scheduler, each pset maintains a bitmap of processors running
402 * threads that are shared resource intensive. This per-thread property is set
403 * by the performance controller or explicitly via dispatch SPIs. The bitmap
404 * allows the Edge scheduler to calculate the cluster shared resource load on
405 * any given cluster and load balance intensive threads accordingly.
406 */
407 static void
processor_state_update_running_cluster_shared_rsrc(processor_t processor,thread_t thread)408 processor_state_update_running_cluster_shared_rsrc(processor_t processor, thread_t thread)
409 {
410 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_RR)) {
411 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
412 } else {
413 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
414 }
415 if (thread_shared_rsrc_policy_get(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST)) {
416 bit_set(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
417 } else {
418 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
419 }
420 }
421
422 #endif /* CONFIG_SCHED_EDGE */
423
424 void
processor_state_update_idle(processor_t processor)425 processor_state_update_idle(processor_t processor)
426 {
427 processor->current_pri = IDLEPRI;
428 processor->current_sfi_class = SFI_CLASS_KERNEL;
429 processor->current_recommended_pset_type = PSET_SMP;
430 #if CONFIG_THREAD_GROUPS
431 processor->current_thread_group = NULL;
432 #endif
433 processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
434 processor->current_urgency = THREAD_URGENCY_NONE;
435 processor->current_is_NO_SMT = false;
436 processor->current_is_bound = false;
437 processor->current_is_eagerpreempt = false;
438 #if CONFIG_SCHED_EDGE
439 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
440 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
441 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
442 #endif /* CONFIG_SCHED_EDGE */
443 sched_update_pset_load_average(processor->processor_set, 0);
444 }
445
446 void
processor_state_update_from_thread(processor_t processor,thread_t thread,boolean_t pset_lock_held)447 processor_state_update_from_thread(processor_t processor, thread_t thread, boolean_t pset_lock_held)
448 {
449 processor->current_pri = thread->sched_pri;
450 processor->current_sfi_class = thread->sfi_class;
451 processor->current_recommended_pset_type = recommended_pset_type(thread);
452 #if CONFIG_SCHED_EDGE
453 processor_state_update_running_foreign(processor, thread);
454 processor_state_update_running_cluster_shared_rsrc(processor, thread);
455 /* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
456 sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
457 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
458 #endif /* CONFIG_SCHED_EDGE */
459
460 #if CONFIG_THREAD_GROUPS
461 processor->current_thread_group = thread_group_get(thread);
462 #endif
463 processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
464 processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
465 processor->current_is_NO_SMT = thread_no_smt(thread);
466 processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
467 processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
468 if (pset_lock_held) {
469 /* Only update the pset load average when the pset lock is held */
470 sched_update_pset_load_average(processor->processor_set, 0);
471 }
472 }
473
474 void
processor_state_update_explicit(processor_t processor,int pri,sfi_class_id_t sfi_class,pset_cluster_type_t pset_type,perfcontrol_class_t perfctl_class,thread_urgency_t urgency,__unused sched_bucket_t bucket)475 processor_state_update_explicit(processor_t processor, int pri, sfi_class_id_t sfi_class,
476 pset_cluster_type_t pset_type, perfcontrol_class_t perfctl_class, thread_urgency_t urgency, __unused sched_bucket_t bucket)
477 {
478 processor->current_pri = pri;
479 processor->current_sfi_class = sfi_class;
480 processor->current_recommended_pset_type = pset_type;
481 processor->current_perfctl_class = perfctl_class;
482 processor->current_urgency = urgency;
483 #if CONFIG_SCHED_EDGE
484 os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
485 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_RR], processor->cpu_id);
486 bit_clear(processor->processor_set->cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST], processor->cpu_id);
487 #endif /* CONFIG_SCHED_EDGE */
488 }
489
490 pset_node_t
pset_node_root(void)491 pset_node_root(void)
492 {
493 return &pset_node0;
494 }
495
496 LCK_GRP_DECLARE(pset_create_grp, "pset_create");
497 LCK_MTX_DECLARE(pset_create_lock, &pset_create_grp);
498
499 processor_set_t
pset_create(pset_node_t node,pset_cluster_type_t pset_type,uint32_t pset_cluster_id,int pset_id)500 pset_create(
501 pset_node_t node,
502 pset_cluster_type_t pset_type,
503 uint32_t pset_cluster_id,
504 int pset_id)
505 {
506 /* some schedulers do not support multiple psets */
507 if (SCHED(multiple_psets_enabled) == FALSE) {
508 return processor_pset(master_processor);
509 }
510
511 processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
512
513 if (pset != PROCESSOR_SET_NULL) {
514 pset->pset_cluster_type = pset_type;
515 pset->pset_cluster_id = pset_cluster_id;
516 pset->pset_id = pset_id;
517 pset_init(pset, node);
518
519 lck_spin_lock(&pset_node_lock);
520
521 prev = &node->psets;
522 while (*prev != PROCESSOR_SET_NULL) {
523 prev = &(*prev)->pset_list;
524 }
525
526 *prev = pset;
527
528 lck_spin_unlock(&pset_node_lock);
529 }
530
531 return pset;
532 }
533
534 /*
535 * Find processor set with specified cluster_id.
536 * Returns default_pset if not found.
537 */
538 processor_set_t
pset_find(uint32_t cluster_id,processor_set_t default_pset)539 pset_find(
540 uint32_t cluster_id,
541 processor_set_t default_pset)
542 {
543 lck_spin_lock(&pset_node_lock);
544 pset_node_t node = &pset_node0;
545 processor_set_t pset = NULL;
546
547 do {
548 pset = node->psets;
549 while (pset != NULL) {
550 if (pset->pset_cluster_id == cluster_id) {
551 break;
552 }
553 pset = pset->pset_list;
554 }
555 } while (pset == NULL && (node = node->node_list) != NULL);
556 lck_spin_unlock(&pset_node_lock);
557 if (pset == NULL) {
558 return default_pset;
559 }
560 return pset;
561 }
562
563 /*
564 * Initialize the given processor_set structure.
565 */
566 void
pset_init(processor_set_t pset,pset_node_t node)567 pset_init(
568 processor_set_t pset,
569 pset_node_t node)
570 {
571 pset->online_processor_count = 0;
572 pset->load_average = 0;
573 bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
574 pset->cpu_set_low = pset->cpu_set_hi = 0;
575 pset->cpu_set_count = 0;
576 pset->last_chosen = -1;
577 pset->cpu_bitmask = 0;
578 pset->recommended_bitmask = 0;
579 pset->primary_map = 0;
580 pset->realtime_map = 0;
581 pset->cpu_available_map = 0;
582
583 for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
584 pset->cpu_state_map[i] = 0;
585 }
586 pset->pending_AST_URGENT_cpu_mask = 0;
587 pset->pending_AST_PREEMPT_cpu_mask = 0;
588 #if defined(CONFIG_SCHED_DEFERRED_AST)
589 pset->pending_deferred_AST_cpu_mask = 0;
590 #endif
591 pset->pending_spill_cpu_mask = 0;
592 pset->rt_pending_spill_cpu_mask = 0;
593 pset_lock_init(pset);
594 pset->pset_self = IP_NULL;
595 pset->pset_name_self = IP_NULL;
596 pset->pset_list = PROCESSOR_SET_NULL;
597 pset->is_SMT = false;
598 #if CONFIG_SCHED_EDGE
599 bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
600 pset->cpu_running_foreign = 0;
601 for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
602 pset->cpu_running_cluster_shared_rsrc_thread[shared_rsrc_type] = 0;
603 pset->pset_cluster_shared_rsrc_load[shared_rsrc_type] = 0;
604 }
605 #endif /* CONFIG_SCHED_EDGE */
606 pset->stealable_rt_threads_earliest_deadline = UINT64_MAX;
607
608 if (pset != &pset0) {
609 /*
610 * Scheduler runqueue initialization for non-boot psets.
611 * This initialization for pset0 happens in sched_init().
612 */
613 SCHED(pset_init)(pset);
614 SCHED(rt_init)(pset);
615 }
616 pset_array[pset->pset_id] = pset;
617 lck_spin_lock(&pset_node_lock);
618 bit_set(node->pset_map, pset->pset_id);
619 pset->node = node;
620 lck_spin_unlock(&pset_node_lock);
621 }
622
623 kern_return_t
processor_info_count(processor_flavor_t flavor,mach_msg_type_number_t * count)624 processor_info_count(
625 processor_flavor_t flavor,
626 mach_msg_type_number_t *count)
627 {
628 switch (flavor) {
629 case PROCESSOR_BASIC_INFO:
630 *count = PROCESSOR_BASIC_INFO_COUNT;
631 break;
632
633 case PROCESSOR_CPU_LOAD_INFO:
634 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
635 break;
636
637 default:
638 return cpu_info_count(flavor, count);
639 }
640
641 return KERN_SUCCESS;
642 }
643
644 void
processor_cpu_load_info(processor_t processor,natural_t ticks[static CPU_STATE_MAX])645 processor_cpu_load_info(processor_t processor,
646 natural_t ticks[static CPU_STATE_MAX])
647 {
648 struct recount_usage usage = { 0 };
649 uint64_t idle_time = 0;
650 recount_processor_usage(&processor->pr_recount, &usage, &idle_time);
651
652 ticks[CPU_STATE_USER] += (uint32_t)(usage.ru_user_time_mach /
653 hz_tick_interval);
654 ticks[CPU_STATE_SYSTEM] += (uint32_t)(usage.ru_system_time_mach /
655 hz_tick_interval);
656 ticks[CPU_STATE_IDLE] += (uint32_t)(idle_time / hz_tick_interval);
657 }
658
659 kern_return_t
processor_info(processor_t processor,processor_flavor_t flavor,host_t * host,processor_info_t info,mach_msg_type_number_t * count)660 processor_info(
661 processor_t processor,
662 processor_flavor_t flavor,
663 host_t *host,
664 processor_info_t info,
665 mach_msg_type_number_t *count)
666 {
667 int cpu_id, state;
668 kern_return_t result;
669
670 if (processor == PROCESSOR_NULL) {
671 return KERN_INVALID_ARGUMENT;
672 }
673
674 cpu_id = processor->cpu_id;
675
676 switch (flavor) {
677 case PROCESSOR_BASIC_INFO:
678 {
679 processor_basic_info_t basic_info;
680
681 if (*count < PROCESSOR_BASIC_INFO_COUNT) {
682 return KERN_FAILURE;
683 }
684
685 basic_info = (processor_basic_info_t) info;
686 basic_info->cpu_type = slot_type(cpu_id);
687 basic_info->cpu_subtype = slot_subtype(cpu_id);
688 state = processor->state;
689 if (((state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) && !processor->shutdown_temporary)
690 #if defined(__x86_64__)
691 || !processor->is_recommended
692 #endif
693 ) {
694 basic_info->running = FALSE;
695 } else {
696 basic_info->running = TRUE;
697 }
698 basic_info->slot_num = cpu_id;
699 if (processor == master_processor) {
700 basic_info->is_master = TRUE;
701 } else {
702 basic_info->is_master = FALSE;
703 }
704
705 *count = PROCESSOR_BASIC_INFO_COUNT;
706 *host = &realhost;
707
708 return KERN_SUCCESS;
709 }
710
711 case PROCESSOR_CPU_LOAD_INFO:
712 {
713 processor_cpu_load_info_t cpu_load_info;
714
715 if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
716 return KERN_FAILURE;
717 }
718
719 cpu_load_info = (processor_cpu_load_info_t) info;
720
721 cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
722 cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0;
723 cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
724 processor_cpu_load_info(processor, cpu_load_info->cpu_ticks);
725 cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
726
727 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
728 *host = &realhost;
729
730 return KERN_SUCCESS;
731 }
732
733 default:
734 result = cpu_info(flavor, cpu_id, info, count);
735 if (result == KERN_SUCCESS) {
736 *host = &realhost;
737 }
738
739 return result;
740 }
741 }
742
743 void
processor_wait_for_start(processor_t processor)744 processor_wait_for_start(processor_t processor)
745 {
746 spl_t s = splsched();
747 simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
748 while (processor->state == PROCESSOR_START) {
749 assert_wait_timeout((event_t)&processor->state, THREAD_UNINT, 1000, 1000 * NSEC_PER_USEC); /* 1 second */
750 simple_unlock(&processor->start_state_lock);
751 splx(s);
752
753 wait_result_t wait_result = thread_block(THREAD_CONTINUE_NULL);
754 if (wait_result == THREAD_TIMED_OUT) {
755 panic("%s>cpu %d failed to start\n", __FUNCTION__, processor->cpu_id);
756 }
757
758 s = splsched();
759 simple_lock(&processor->start_state_lock, LCK_GRP_NULL);
760 }
761 simple_unlock(&processor->start_state_lock);
762 splx(s);
763 }
764
765 LCK_GRP_DECLARE(processor_updown_grp, "processor_updown");
766 LCK_MTX_DECLARE(processor_updown_lock, &processor_updown_grp);
767
768 static kern_return_t
processor_startup(processor_t processor,processor_reason_t reason,uint32_t flags)769 processor_startup(
770 processor_t processor,
771 processor_reason_t reason,
772 uint32_t flags)
773 {
774 processor_set_t pset;
775 thread_t thread;
776 kern_return_t result;
777 spl_t s;
778
779 if (processor == PROCESSOR_NULL || processor->processor_set == PROCESSOR_SET_NULL) {
780 return KERN_INVALID_ARGUMENT;
781 }
782
783 if ((flags & (LOCK_STATE | UNLOCK_STATE)) && (reason != REASON_SYSTEM)) {
784 return KERN_INVALID_ARGUMENT;
785 }
786
787 lck_mtx_lock(&processor_updown_lock);
788
789 if (processor == master_processor) {
790 processor_t prev;
791
792 processor->last_startup_reason = reason;
793
794 ml_cpu_power_enable(processor->cpu_id);
795
796 prev = thread_bind(processor);
797 thread_block(THREAD_CONTINUE_NULL);
798
799 result = cpu_start(processor->cpu_id);
800
801 thread_bind(prev);
802
803 lck_mtx_unlock(&processor_updown_lock);
804 return result;
805 }
806
807 bool scheduler_disable = false;
808
809 if ((processor->processor_primary != processor) && (sched_enable_smt == 0)) {
810 if (cpu_can_exit(processor->cpu_id)) {
811 lck_mtx_unlock(&processor_updown_lock);
812 return KERN_SUCCESS;
813 }
814 /*
815 * This secondary SMT processor must start in order to service interrupts,
816 * so instead it will be disabled at the scheduler level.
817 */
818 scheduler_disable = true;
819 }
820
821 s = splsched();
822 pset = processor->processor_set;
823 pset_lock(pset);
824 if (flags & LOCK_STATE) {
825 processor->shutdown_locked = true;
826 } else if (flags & UNLOCK_STATE) {
827 processor->shutdown_locked = false;
828 }
829
830 if (processor->state == PROCESSOR_START) {
831 pset_unlock(pset);
832 splx(s);
833
834 processor_wait_for_start(processor);
835
836 lck_mtx_unlock(&processor_updown_lock);
837 return KERN_SUCCESS;
838 }
839
840 if ((processor->state != PROCESSOR_OFF_LINE) || ((flags & SHUTDOWN_TEMPORARY) && !processor->shutdown_temporary)) {
841 pset_unlock(pset);
842 splx(s);
843
844 lck_mtx_unlock(&processor_updown_lock);
845 return KERN_FAILURE;
846 }
847
848 pset_update_processor_state(pset, processor, PROCESSOR_START);
849 processor->last_startup_reason = reason;
850 pset_unlock(pset);
851 splx(s);
852
853 /*
854 * Create the idle processor thread.
855 */
856 if (processor->idle_thread == THREAD_NULL) {
857 result = idle_thread_create(processor);
858 if (result != KERN_SUCCESS) {
859 s = splsched();
860 pset_lock(pset);
861 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
862 pset_unlock(pset);
863 splx(s);
864
865 lck_mtx_unlock(&processor_updown_lock);
866 return result;
867 }
868 }
869
870 /*
871 * If there is no active thread, the processor
872 * has never been started. Create a dedicated
873 * start up thread.
874 */
875 if (processor->active_thread == THREAD_NULL &&
876 processor->startup_thread == THREAD_NULL) {
877 result = kernel_thread_create(processor_start_thread, NULL, MAXPRI_KERNEL, &thread);
878 if (result != KERN_SUCCESS) {
879 s = splsched();
880 pset_lock(pset);
881 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
882 pset_unlock(pset);
883 splx(s);
884
885 lck_mtx_unlock(&processor_updown_lock);
886 return result;
887 }
888
889 s = splsched();
890 thread_lock(thread);
891 thread->bound_processor = processor;
892 processor->startup_thread = thread;
893 thread->state = TH_RUN;
894 thread->last_made_runnable_time = thread->last_basepri_change_time = mach_absolute_time();
895 thread_unlock(thread);
896 splx(s);
897
898 thread_deallocate(thread);
899 }
900
901 if (processor->processor_self == IP_NULL) {
902 ipc_processor_init(processor);
903 }
904
905 ml_cpu_power_enable(processor->cpu_id);
906 ml_cpu_begin_state_transition(processor->cpu_id);
907 ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
908 result = cpu_start(processor->cpu_id);
909 #if defined (__arm__) || defined (__arm64__)
910 assert(result == KERN_SUCCESS);
911 #else
912 if (result != KERN_SUCCESS) {
913 s = splsched();
914 pset_lock(pset);
915 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
916 pset_unlock(pset);
917 splx(s);
918 ml_cpu_end_state_transition(processor->cpu_id);
919
920 lck_mtx_unlock(&processor_updown_lock);
921 return result;
922 }
923 #endif
924 if (scheduler_disable) {
925 assert(processor->processor_primary != processor);
926 sched_processor_enable(processor, FALSE);
927 }
928
929 if (flags & WAIT_FOR_START) {
930 processor_wait_for_start(processor);
931 }
932
933 ml_cpu_end_state_transition(processor->cpu_id);
934 ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
935
936 #if CONFIG_KCOV
937 kcov_start_cpu(processor->cpu_id);
938 #endif
939
940 lck_mtx_unlock(&processor_updown_lock);
941 return KERN_SUCCESS;
942 }
943
944 kern_return_t
processor_exit_reason(processor_t processor,processor_reason_t reason,uint32_t flags)945 processor_exit_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
946 {
947 if (processor == PROCESSOR_NULL) {
948 return KERN_INVALID_ARGUMENT;
949 }
950
951 if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
952 #ifdef RHODES_CLUSTER_POWERDOWN_WORKAROUND
953 /*
954 * Must allow CLPC to finish powering down the whole cluster,
955 * or IOCPUSleepKernel() will fail to restart the offline cpus.
956 */
957 if (reason != REASON_CLPC_SYSTEM) {
958 return KERN_FAILURE;
959 }
960 #else
961 return KERN_FAILURE;
962 #endif
963 }
964
965 if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
966 return sched_processor_enable(processor, FALSE);
967 } else if ((reason == REASON_SYSTEM) || cpu_can_exit(processor->cpu_id)) {
968 return processor_shutdown(processor, reason, flags);
969 }
970
971 return KERN_INVALID_ARGUMENT;
972 }
973
974 kern_return_t
processor_exit(processor_t processor)975 processor_exit(
976 processor_t processor)
977 {
978 return processor_exit_reason(processor, REASON_SYSTEM, 0);
979 }
980
981 kern_return_t
processor_exit_from_user(processor_t processor)982 processor_exit_from_user(
983 processor_t processor)
984 {
985 return processor_exit_reason(processor, REASON_USER, 0);
986 }
987
988 kern_return_t
processor_start_reason(processor_t processor,processor_reason_t reason,uint32_t flags)989 processor_start_reason(processor_t processor, processor_reason_t reason, uint32_t flags)
990 {
991 if (processor == PROCESSOR_NULL) {
992 return KERN_INVALID_ARGUMENT;
993 }
994
995 if (sched_is_in_sleep() && (reason != REASON_SYSTEM)) {
996 return KERN_FAILURE;
997 }
998
999 if ((reason == REASON_USER) && !cpu_can_exit(processor->cpu_id)) {
1000 return sched_processor_enable(processor, TRUE);
1001 } else {
1002 return processor_startup(processor, reason, flags);
1003 }
1004 }
1005
1006 kern_return_t
processor_start(processor_t processor)1007 processor_start(
1008 processor_t processor)
1009 {
1010 return processor_start_reason(processor, REASON_SYSTEM, 0);
1011 }
1012
1013 kern_return_t
processor_start_from_user(processor_t processor)1014 processor_start_from_user(
1015 processor_t processor)
1016 {
1017 return processor_start_reason(processor, REASON_USER, 0);
1018 }
1019
1020 kern_return_t
enable_smt_processors(bool enable)1021 enable_smt_processors(bool enable)
1022 {
1023 if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
1024 /* Not an SMT system */
1025 return KERN_INVALID_ARGUMENT;
1026 }
1027
1028 int ncpus = machine_info.logical_cpu_max;
1029
1030 for (int i = 1; i < ncpus; i++) {
1031 processor_t processor = processor_array[i];
1032
1033 if (processor->processor_primary != processor) {
1034 if (enable) {
1035 processor_start_from_user(processor);
1036 } else { /* Disable */
1037 processor_exit_from_user(processor);
1038 }
1039 }
1040 }
1041
1042 #define BSD_HOST 1
1043 host_basic_info_data_t hinfo;
1044 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
1045 kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
1046 if (kret != KERN_SUCCESS) {
1047 return kret;
1048 }
1049
1050 if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
1051 return KERN_FAILURE;
1052 }
1053
1054 if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
1055 return KERN_FAILURE;
1056 }
1057
1058 return KERN_SUCCESS;
1059 }
1060
1061 bool
processor_should_kprintf(processor_t processor,bool starting)1062 processor_should_kprintf(processor_t processor, bool starting)
1063 {
1064 processor_reason_t reason = starting ? processor->last_startup_reason : processor->last_shutdown_reason;
1065
1066 return reason != REASON_CLPC_SYSTEM;
1067 }
1068
1069 kern_return_t
processor_control(processor_t processor,processor_info_t info,mach_msg_type_number_t count)1070 processor_control(
1071 processor_t processor,
1072 processor_info_t info,
1073 mach_msg_type_number_t count)
1074 {
1075 if (processor == PROCESSOR_NULL) {
1076 return KERN_INVALID_ARGUMENT;
1077 }
1078
1079 return cpu_control(processor->cpu_id, info, count);
1080 }
1081
1082 kern_return_t
processor_get_assignment(processor_t processor,processor_set_t * pset)1083 processor_get_assignment(
1084 processor_t processor,
1085 processor_set_t *pset)
1086 {
1087 int state;
1088
1089 if (processor == PROCESSOR_NULL) {
1090 return KERN_INVALID_ARGUMENT;
1091 }
1092
1093 state = processor->state;
1094 if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE || state == PROCESSOR_PENDING_OFFLINE) {
1095 return KERN_FAILURE;
1096 }
1097
1098 *pset = &pset0;
1099
1100 return KERN_SUCCESS;
1101 }
1102
1103 kern_return_t
processor_set_info(processor_set_t pset,int flavor,host_t * host,processor_set_info_t info,mach_msg_type_number_t * count)1104 processor_set_info(
1105 processor_set_t pset,
1106 int flavor,
1107 host_t *host,
1108 processor_set_info_t info,
1109 mach_msg_type_number_t *count)
1110 {
1111 if (pset == PROCESSOR_SET_NULL) {
1112 return KERN_INVALID_ARGUMENT;
1113 }
1114
1115 if (flavor == PROCESSOR_SET_BASIC_INFO) {
1116 processor_set_basic_info_t basic_info;
1117
1118 if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1119 return KERN_FAILURE;
1120 }
1121
1122 basic_info = (processor_set_basic_info_t) info;
1123 #if defined(__x86_64__)
1124 basic_info->processor_count = processor_avail_count_user;
1125 #else
1126 basic_info->processor_count = processor_avail_count;
1127 #endif
1128 basic_info->default_policy = POLICY_TIMESHARE;
1129
1130 *count = PROCESSOR_SET_BASIC_INFO_COUNT;
1131 *host = &realhost;
1132 return KERN_SUCCESS;
1133 } else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1134 policy_timeshare_base_t ts_base;
1135
1136 if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1137 return KERN_FAILURE;
1138 }
1139
1140 ts_base = (policy_timeshare_base_t) info;
1141 ts_base->base_priority = BASEPRI_DEFAULT;
1142
1143 *count = POLICY_TIMESHARE_BASE_COUNT;
1144 *host = &realhost;
1145 return KERN_SUCCESS;
1146 } else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1147 policy_fifo_base_t fifo_base;
1148
1149 if (*count < POLICY_FIFO_BASE_COUNT) {
1150 return KERN_FAILURE;
1151 }
1152
1153 fifo_base = (policy_fifo_base_t) info;
1154 fifo_base->base_priority = BASEPRI_DEFAULT;
1155
1156 *count = POLICY_FIFO_BASE_COUNT;
1157 *host = &realhost;
1158 return KERN_SUCCESS;
1159 } else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1160 policy_rr_base_t rr_base;
1161
1162 if (*count < POLICY_RR_BASE_COUNT) {
1163 return KERN_FAILURE;
1164 }
1165
1166 rr_base = (policy_rr_base_t) info;
1167 rr_base->base_priority = BASEPRI_DEFAULT;
1168 rr_base->quantum = 1;
1169
1170 *count = POLICY_RR_BASE_COUNT;
1171 *host = &realhost;
1172 return KERN_SUCCESS;
1173 } else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1174 policy_timeshare_limit_t ts_limit;
1175
1176 if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1177 return KERN_FAILURE;
1178 }
1179
1180 ts_limit = (policy_timeshare_limit_t) info;
1181 ts_limit->max_priority = MAXPRI_KERNEL;
1182
1183 *count = POLICY_TIMESHARE_LIMIT_COUNT;
1184 *host = &realhost;
1185 return KERN_SUCCESS;
1186 } else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1187 policy_fifo_limit_t fifo_limit;
1188
1189 if (*count < POLICY_FIFO_LIMIT_COUNT) {
1190 return KERN_FAILURE;
1191 }
1192
1193 fifo_limit = (policy_fifo_limit_t) info;
1194 fifo_limit->max_priority = MAXPRI_KERNEL;
1195
1196 *count = POLICY_FIFO_LIMIT_COUNT;
1197 *host = &realhost;
1198 return KERN_SUCCESS;
1199 } else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1200 policy_rr_limit_t rr_limit;
1201
1202 if (*count < POLICY_RR_LIMIT_COUNT) {
1203 return KERN_FAILURE;
1204 }
1205
1206 rr_limit = (policy_rr_limit_t) info;
1207 rr_limit->max_priority = MAXPRI_KERNEL;
1208
1209 *count = POLICY_RR_LIMIT_COUNT;
1210 *host = &realhost;
1211 return KERN_SUCCESS;
1212 } else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1213 int *enabled;
1214
1215 if (*count < (sizeof(*enabled) / sizeof(int))) {
1216 return KERN_FAILURE;
1217 }
1218
1219 enabled = (int *) info;
1220 *enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1221
1222 *count = sizeof(*enabled) / sizeof(int);
1223 *host = &realhost;
1224 return KERN_SUCCESS;
1225 }
1226
1227
1228 *host = HOST_NULL;
1229 return KERN_INVALID_ARGUMENT;
1230 }
1231
1232 /*
1233 * processor_set_statistics
1234 *
1235 * Returns scheduling statistics for a processor set.
1236 */
1237 kern_return_t
processor_set_statistics(processor_set_t pset,int flavor,processor_set_info_t info,mach_msg_type_number_t * count)1238 processor_set_statistics(
1239 processor_set_t pset,
1240 int flavor,
1241 processor_set_info_t info,
1242 mach_msg_type_number_t *count)
1243 {
1244 if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1245 return KERN_INVALID_PROCESSOR_SET;
1246 }
1247
1248 if (flavor == PROCESSOR_SET_LOAD_INFO) {
1249 processor_set_load_info_t load_info;
1250
1251 if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1252 return KERN_FAILURE;
1253 }
1254
1255 load_info = (processor_set_load_info_t) info;
1256
1257 load_info->mach_factor = sched_mach_factor;
1258 load_info->load_average = sched_load_average;
1259
1260 load_info->task_count = tasks_count;
1261 load_info->thread_count = threads_count;
1262
1263 *count = PROCESSOR_SET_LOAD_INFO_COUNT;
1264 return KERN_SUCCESS;
1265 }
1266
1267 return KERN_INVALID_ARGUMENT;
1268 }
1269
1270 /*
1271 * processor_set_things:
1272 *
1273 * Common internals for processor_set_{threads,tasks}
1274 */
1275 static kern_return_t
processor_set_things(processor_set_t pset,void ** thing_list,mach_msg_type_number_t * countp,int type,mach_task_flavor_t flavor)1276 processor_set_things(
1277 processor_set_t pset,
1278 void **thing_list,
1279 mach_msg_type_number_t *countp,
1280 int type,
1281 mach_task_flavor_t flavor)
1282 {
1283 unsigned int i;
1284 task_t task;
1285 thread_t thread;
1286
1287 task_t *task_list;
1288 vm_size_t actual_tasks, task_count_cur, task_count_needed;
1289
1290 thread_t *thread_list;
1291 vm_size_t actual_threads, thread_count_cur, thread_count_needed;
1292
1293 void *addr, *newaddr;
1294 vm_size_t count, count_needed;
1295
1296 if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1297 return KERN_INVALID_ARGUMENT;
1298 }
1299
1300 task_count_cur = 0;
1301 task_count_needed = 0;
1302 task_list = NULL;
1303 actual_tasks = 0;
1304
1305 thread_count_cur = 0;
1306 thread_count_needed = 0;
1307 thread_list = NULL;
1308 actual_threads = 0;
1309
1310 for (;;) {
1311 lck_mtx_lock(&tasks_threads_lock);
1312
1313 /* do we have the memory we need? */
1314 if (type == PSET_THING_THREAD) {
1315 thread_count_needed = threads_count;
1316 }
1317 #if !CONFIG_MACF
1318 else
1319 #endif
1320 task_count_needed = tasks_count;
1321
1322 if (task_count_needed <= task_count_cur &&
1323 thread_count_needed <= thread_count_cur) {
1324 break;
1325 }
1326
1327 /* unlock and allocate more memory */
1328 lck_mtx_unlock(&tasks_threads_lock);
1329
1330 /* grow task array */
1331 if (task_count_needed > task_count_cur) {
1332 kfree_type(task_t, task_count_cur, task_list);
1333 assert(task_count_needed > 0);
1334 task_count_cur = task_count_needed;
1335
1336 task_list = kalloc_type(task_t, task_count_cur, Z_WAITOK | Z_ZERO);
1337 if (task_list == NULL) {
1338 kfree_type(thread_t, thread_count_cur, thread_list);
1339 return KERN_RESOURCE_SHORTAGE;
1340 }
1341 }
1342
1343 /* grow thread array */
1344 if (thread_count_needed > thread_count_cur) {
1345 kfree_type(thread_t, thread_count_cur, thread_list);
1346
1347 assert(thread_count_needed > 0);
1348 thread_count_cur = thread_count_needed;
1349
1350 thread_list = kalloc_type(thread_t, thread_count_cur, Z_WAITOK | Z_ZERO);
1351 if (thread_list == NULL) {
1352 kfree_type(task_t, task_count_cur, task_list);
1353 return KERN_RESOURCE_SHORTAGE;
1354 }
1355 }
1356 }
1357
1358 /* OK, have memory and the list locked */
1359
1360 /* If we need it, get the thread list */
1361 if (type == PSET_THING_THREAD) {
1362 queue_iterate(&threads, thread, thread_t, threads) {
1363 task = get_threadtask(thread);
1364 #if defined(SECURE_KERNEL)
1365 if (task == kernel_task) {
1366 /* skip threads belonging to kernel_task */
1367 continue;
1368 }
1369 #endif
1370 if (!task->ipc_active || task_is_exec_copy(task)) {
1371 /* skip threads in inactive tasks (in the middle of exec/fork/spawn) */
1372 continue;
1373 }
1374
1375 thread_reference(thread);
1376 thread_list[actual_threads++] = thread;
1377 }
1378 }
1379 #if !CONFIG_MACF
1380 else
1381 #endif
1382 {
1383 /* get a list of the tasks */
1384 queue_iterate(&tasks, task, task_t, tasks) {
1385 #if defined(SECURE_KERNEL)
1386 if (task == kernel_task) {
1387 /* skip kernel_task */
1388 continue;
1389 }
1390 #endif
1391 if (!task->ipc_active || task_is_exec_copy(task)) {
1392 /* skip inactive tasks (in the middle of exec/fork/spawn) */
1393 continue;
1394 }
1395
1396 task_reference(task);
1397 task_list[actual_tasks++] = task;
1398 }
1399 }
1400
1401 lck_mtx_unlock(&tasks_threads_lock);
1402
1403 #if CONFIG_MACF
1404 unsigned int j, used;
1405
1406 /* for each task, make sure we are allowed to examine it */
1407 for (i = used = 0; i < actual_tasks; i++) {
1408 if (mac_task_check_expose_task(task_list[i], flavor)) {
1409 task_deallocate(task_list[i]);
1410 continue;
1411 }
1412 task_list[used++] = task_list[i];
1413 }
1414 actual_tasks = used;
1415 task_count_needed = actual_tasks;
1416
1417 if (type == PSET_THING_THREAD) {
1418 /* for each thread (if any), make sure it's task is in the allowed list */
1419 for (i = used = 0; i < actual_threads; i++) {
1420 boolean_t found_task = FALSE;
1421
1422 task = get_threadtask(thread_list[i]);
1423 for (j = 0; j < actual_tasks; j++) {
1424 if (task_list[j] == task) {
1425 found_task = TRUE;
1426 break;
1427 }
1428 }
1429 if (found_task) {
1430 thread_list[used++] = thread_list[i];
1431 } else {
1432 thread_deallocate(thread_list[i]);
1433 }
1434 }
1435 actual_threads = used;
1436 thread_count_needed = actual_threads;
1437
1438 /* done with the task list */
1439 for (i = 0; i < actual_tasks; i++) {
1440 task_deallocate(task_list[i]);
1441 }
1442 kfree_type(task_t, task_count_cur, task_list);
1443 task_count_cur = 0;
1444 actual_tasks = 0;
1445 task_list = NULL;
1446 }
1447 #endif
1448
1449 if (type == PSET_THING_THREAD) {
1450 if (actual_threads == 0) {
1451 /* no threads available to return */
1452 assert(task_count_cur == 0);
1453 kfree_type(thread_t, thread_count_cur, thread_list);
1454 *thing_list = NULL;
1455 *countp = 0;
1456 return KERN_SUCCESS;
1457 }
1458 count_needed = actual_threads;
1459 count = thread_count_cur;
1460 addr = thread_list;
1461 } else {
1462 if (actual_tasks == 0) {
1463 /* no tasks available to return */
1464 assert(thread_count_cur == 0);
1465 kfree_type(task_t, task_count_cur, task_list);
1466 *thing_list = NULL;
1467 *countp = 0;
1468 return KERN_SUCCESS;
1469 }
1470 count_needed = actual_tasks;
1471 count = task_count_cur;
1472 addr = task_list;
1473 }
1474
1475 /* if we allocated too much, must copy */
1476 if (count_needed < count) {
1477 newaddr = kalloc_type(void *, count_needed, Z_WAITOK | Z_ZERO);
1478 if (newaddr == 0) {
1479 for (i = 0; i < actual_tasks; i++) {
1480 if (type == PSET_THING_THREAD) {
1481 thread_deallocate(thread_list[i]);
1482 } else {
1483 task_deallocate(task_list[i]);
1484 }
1485 }
1486 kfree_type(void *, count, addr);
1487 return KERN_RESOURCE_SHORTAGE;
1488 }
1489
1490 bcopy(addr, newaddr, count_needed * sizeof(void *));
1491 kfree_type(void *, count, addr);
1492
1493 addr = newaddr;
1494 count = count_needed;
1495 }
1496
1497 *thing_list = (void **)addr;
1498 *countp = (mach_msg_type_number_t)count;
1499
1500 return KERN_SUCCESS;
1501 }
1502
1503 /*
1504 * processor_set_tasks:
1505 *
1506 * List all tasks in the processor set.
1507 */
1508 static kern_return_t
processor_set_tasks_internal(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count,mach_task_flavor_t flavor)1509 processor_set_tasks_internal(
1510 processor_set_t pset,
1511 task_array_t *task_list,
1512 mach_msg_type_number_t *count,
1513 mach_task_flavor_t flavor)
1514 {
1515 kern_return_t ret;
1516 mach_msg_type_number_t i;
1517
1518 ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK, flavor);
1519 if (ret != KERN_SUCCESS) {
1520 return ret;
1521 }
1522
1523 /* do the conversion that Mig should handle */
1524 switch (flavor) {
1525 case TASK_FLAVOR_CONTROL:
1526 for (i = 0; i < *count; i++) {
1527 if ((*task_list)[i] == current_task()) {
1528 /* if current_task(), return pinned port */
1529 (*task_list)[i] = (task_t)convert_task_to_port_pinned((*task_list)[i]);
1530 } else {
1531 (*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
1532 }
1533 }
1534 break;
1535 case TASK_FLAVOR_READ:
1536 for (i = 0; i < *count; i++) {
1537 (*task_list)[i] = (task_t)convert_task_read_to_port((*task_list)[i]);
1538 }
1539 break;
1540 case TASK_FLAVOR_INSPECT:
1541 for (i = 0; i < *count; i++) {
1542 (*task_list)[i] = (task_t)convert_task_inspect_to_port((*task_list)[i]);
1543 }
1544 break;
1545 case TASK_FLAVOR_NAME:
1546 for (i = 0; i < *count; i++) {
1547 (*task_list)[i] = (task_t)convert_task_name_to_port((*task_list)[i]);
1548 }
1549 break;
1550 default:
1551 return KERN_INVALID_ARGUMENT;
1552 }
1553
1554 return KERN_SUCCESS;
1555 }
1556
1557 kern_return_t
processor_set_tasks(processor_set_t pset,task_array_t * task_list,mach_msg_type_number_t * count)1558 processor_set_tasks(
1559 processor_set_t pset,
1560 task_array_t *task_list,
1561 mach_msg_type_number_t *count)
1562 {
1563 return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1564 }
1565
1566 /*
1567 * processor_set_tasks_with_flavor:
1568 *
1569 * Based on flavor, return task/inspect/read port to all tasks in the processor set.
1570 */
1571 kern_return_t
processor_set_tasks_with_flavor(processor_set_t pset,mach_task_flavor_t flavor,task_array_t * task_list,mach_msg_type_number_t * count)1572 processor_set_tasks_with_flavor(
1573 processor_set_t pset,
1574 mach_task_flavor_t flavor,
1575 task_array_t *task_list,
1576 mach_msg_type_number_t *count)
1577 {
1578 switch (flavor) {
1579 case TASK_FLAVOR_CONTROL:
1580 case TASK_FLAVOR_READ:
1581 case TASK_FLAVOR_INSPECT:
1582 case TASK_FLAVOR_NAME:
1583 return processor_set_tasks_internal(pset, task_list, count, flavor);
1584 default:
1585 return KERN_INVALID_ARGUMENT;
1586 }
1587 }
1588
1589 /*
1590 * processor_set_threads:
1591 *
1592 * List all threads in the processor set.
1593 */
1594 #if defined(SECURE_KERNEL)
1595 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1596 processor_set_threads(
1597 __unused processor_set_t pset,
1598 __unused thread_array_t *thread_list,
1599 __unused mach_msg_type_number_t *count)
1600 {
1601 return KERN_FAILURE;
1602 }
1603 #elif !defined(XNU_TARGET_OS_OSX)
1604 kern_return_t
processor_set_threads(__unused processor_set_t pset,__unused thread_array_t * thread_list,__unused mach_msg_type_number_t * count)1605 processor_set_threads(
1606 __unused processor_set_t pset,
1607 __unused thread_array_t *thread_list,
1608 __unused mach_msg_type_number_t *count)
1609 {
1610 return KERN_NOT_SUPPORTED;
1611 }
1612 #else
1613 kern_return_t
processor_set_threads(processor_set_t pset,thread_array_t * thread_list,mach_msg_type_number_t * count)1614 processor_set_threads(
1615 processor_set_t pset,
1616 thread_array_t *thread_list,
1617 mach_msg_type_number_t *count)
1618 {
1619 kern_return_t ret;
1620 mach_msg_type_number_t i;
1621
1622 ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1623 if (ret != KERN_SUCCESS) {
1624 return ret;
1625 }
1626
1627 /* do the conversion that Mig should handle */
1628 for (i = 0; i < *count; i++) {
1629 (*thread_list)[i] = (thread_t)convert_thread_to_port((*thread_list)[i]);
1630 }
1631 return KERN_SUCCESS;
1632 }
1633 #endif
1634
1635 pset_cluster_type_t
recommended_pset_type(thread_t thread)1636 recommended_pset_type(thread_t thread)
1637 {
1638 #if CONFIG_THREAD_GROUPS && __AMP__
1639 if (thread == THREAD_NULL) {
1640 return PSET_AMP_E;
1641 }
1642
1643 #if DEVELOPMENT || DEBUG
1644 extern bool system_ecore_only;
1645 extern int enable_task_set_cluster_type;
1646 task_t task = get_threadtask(thread);
1647 if (enable_task_set_cluster_type && (task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE)) {
1648 processor_set_t pset_hint = task->pset_hint;
1649 if (pset_hint) {
1650 return pset_hint->pset_cluster_type;
1651 }
1652 }
1653
1654 if (system_ecore_only) {
1655 return PSET_AMP_E;
1656 }
1657 #endif
1658
1659 if (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) {
1660 return pset_array[thread->th_bound_cluster_id]->pset_cluster_type;
1661 }
1662
1663 if (thread->base_pri <= MAXPRI_THROTTLE) {
1664 if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1665 return PSET_AMP_E;
1666 }
1667 } else if (thread->base_pri <= BASEPRI_UTILITY) {
1668 if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1669 return PSET_AMP_E;
1670 }
1671 }
1672
1673 struct thread_group *tg = thread_group_get(thread);
1674 cluster_type_t recommendation = thread_group_recommendation(tg);
1675 switch (recommendation) {
1676 case CLUSTER_TYPE_SMP:
1677 default:
1678 if (get_threadtask(thread) == kernel_task) {
1679 return PSET_AMP_E;
1680 }
1681 return PSET_AMP_P;
1682 case CLUSTER_TYPE_E:
1683 return PSET_AMP_E;
1684 case CLUSTER_TYPE_P:
1685 return PSET_AMP_P;
1686 }
1687 #else
1688 (void)thread;
1689 return PSET_SMP;
1690 #endif
1691 }
1692
1693 #if CONFIG_THREAD_GROUPS && __AMP__
1694
1695 void
sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class,boolean_t inherit)1696 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1697 {
1698 sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1699
1700 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1701
1702 switch (perfctl_class) {
1703 case PERFCONTROL_CLASS_UTILITY:
1704 os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1705 break;
1706 case PERFCONTROL_CLASS_BACKGROUND:
1707 os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1708 break;
1709 default:
1710 panic("perfctl_class invalid");
1711 break;
1712 }
1713 }
1714
1715 #elif defined(__arm64__)
1716
1717 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1718 void
sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class,__unused boolean_t inherit)1719 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1720 {
1721 }
1722
1723 #endif /* defined(__arm64__) */
1724