1 /*
2 * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm/machine_cpu.h>
30 #include <arm/cpu_internal.h>
31 #include <arm/cpuid.h>
32 #include <arm/cpuid_internal.h>
33 #include <arm/cpu_data.h>
34 #include <arm/cpu_data_internal.h>
35 #include <arm/misc_protos.h>
36 #include <arm/machdep_call.h>
37 #include <arm/machine_routines.h>
38 #include <arm/rtclock.h>
39 #include <kern/machine.h>
40 #include <kern/thread.h>
41 #include <kern/thread_group.h>
42 #include <kern/policy_internal.h>
43 #include <kern/sched_hygiene.h>
44 #include <kern/startup.h>
45 #include <kern/monotonic.h>
46 #include <kern/timeout.h>
47 #include <machine/config.h>
48 #include <machine/atomic.h>
49 #include <machine/monotonic.h>
50 #include <pexpert/pexpert.h>
51 #include <pexpert/device_tree.h>
52 #include <pexpert/arm64/apple_arm64_cpu.h>
53
54 #include <mach/machine.h>
55 #include <mach/machine/sdt.h>
56
57 #if !HAS_CONTINUOUS_HWCLOCK
58 extern uint64_t mach_absolutetime_asleep;
59 #else
60 extern uint64_t wake_abstime;
61 static uint64_t wake_conttime = UINT64_MAX;
62 #endif
63
64 extern volatile uint32_t debug_enabled;
65 extern _Atomic unsigned int cluster_type_num_active_cpus[MAX_CPU_TYPES];
66 const char *cluster_type_names[MAX_CPU_TYPES] = {
67 [CLUSTER_TYPE_SMP] = "Standard",
68 [CLUSTER_TYPE_E] = "Efficiency",
69 [CLUSTER_TYPE_P] = "Performance",
70 };
71
72 static int max_cpus_initialized = 0;
73 #define MAX_CPUS_SET 0x1
74 #define MAX_CPUS_WAIT 0x2
75
76 LCK_GRP_DECLARE(max_cpus_grp, "max_cpus");
77 LCK_MTX_DECLARE(max_cpus_lock, &max_cpus_grp);
78 uint32_t lockdown_done = 0;
79 boolean_t is_clock_configured = FALSE;
80
81 static void
sched_perfcontrol_oncore_default(perfcontrol_state_t new_thread_state __unused,going_on_core_t on __unused)82 sched_perfcontrol_oncore_default(perfcontrol_state_t new_thread_state __unused, going_on_core_t on __unused)
83 {
84 }
85
86 static void
sched_perfcontrol_switch_default(perfcontrol_state_t old_thread_state __unused,perfcontrol_state_t new_thread_state __unused)87 sched_perfcontrol_switch_default(perfcontrol_state_t old_thread_state __unused, perfcontrol_state_t new_thread_state __unused)
88 {
89 }
90
91 static void
sched_perfcontrol_offcore_default(perfcontrol_state_t old_thread_state __unused,going_off_core_t off __unused,boolean_t thread_terminating __unused)92 sched_perfcontrol_offcore_default(perfcontrol_state_t old_thread_state __unused, going_off_core_t off __unused, boolean_t thread_terminating __unused)
93 {
94 }
95
96 static void
sched_perfcontrol_thread_group_default(thread_group_data_t data __unused)97 sched_perfcontrol_thread_group_default(thread_group_data_t data __unused)
98 {
99 }
100
101 static void
sched_perfcontrol_max_runnable_latency_default(perfcontrol_max_runnable_latency_t latencies __unused)102 sched_perfcontrol_max_runnable_latency_default(perfcontrol_max_runnable_latency_t latencies __unused)
103 {
104 }
105
106 static void
sched_perfcontrol_work_interval_notify_default(perfcontrol_state_t thread_state __unused,perfcontrol_work_interval_t work_interval __unused)107 sched_perfcontrol_work_interval_notify_default(perfcontrol_state_t thread_state __unused,
108 perfcontrol_work_interval_t work_interval __unused)
109 {
110 }
111
112 static void
sched_perfcontrol_work_interval_ctl_default(perfcontrol_state_t thread_state __unused,perfcontrol_work_interval_instance_t instance __unused)113 sched_perfcontrol_work_interval_ctl_default(perfcontrol_state_t thread_state __unused,
114 perfcontrol_work_interval_instance_t instance __unused)
115 {
116 }
117
118 static void
sched_perfcontrol_deadline_passed_default(__unused uint64_t deadline)119 sched_perfcontrol_deadline_passed_default(__unused uint64_t deadline)
120 {
121 }
122
123 static void
sched_perfcontrol_csw_default(__unused perfcontrol_event event,__unused uint32_t cpu_id,__unused uint64_t timestamp,__unused uint32_t flags,__unused struct perfcontrol_thread_data * offcore,__unused struct perfcontrol_thread_data * oncore,__unused struct perfcontrol_cpu_counters * cpu_counters,__unused uint64_t * timeout_ticks)124 sched_perfcontrol_csw_default(
125 __unused perfcontrol_event event, __unused uint32_t cpu_id, __unused uint64_t timestamp,
126 __unused uint32_t flags, __unused struct perfcontrol_thread_data *offcore,
127 __unused struct perfcontrol_thread_data *oncore,
128 __unused struct perfcontrol_cpu_counters *cpu_counters, __unused uint64_t *timeout_ticks)
129 {
130 }
131
132 static void
sched_perfcontrol_state_update_default(__unused perfcontrol_event event,__unused uint32_t cpu_id,__unused uint64_t timestamp,__unused uint32_t flags,__unused struct perfcontrol_thread_data * thr_data,__unused uint64_t * timeout_ticks)133 sched_perfcontrol_state_update_default(
134 __unused perfcontrol_event event, __unused uint32_t cpu_id, __unused uint64_t timestamp,
135 __unused uint32_t flags, __unused struct perfcontrol_thread_data *thr_data,
136 __unused uint64_t *timeout_ticks)
137 {
138 }
139
140 static void
sched_perfcontrol_thread_group_blocked_default(__unused thread_group_data_t blocked_tg,__unused thread_group_data_t blocking_tg,__unused uint32_t flags,__unused perfcontrol_state_t blocked_thr_state)141 sched_perfcontrol_thread_group_blocked_default(
142 __unused thread_group_data_t blocked_tg, __unused thread_group_data_t blocking_tg,
143 __unused uint32_t flags, __unused perfcontrol_state_t blocked_thr_state)
144 {
145 }
146
147 static void
sched_perfcontrol_thread_group_unblocked_default(__unused thread_group_data_t unblocked_tg,__unused thread_group_data_t unblocking_tg,__unused uint32_t flags,__unused perfcontrol_state_t unblocked_thr_state)148 sched_perfcontrol_thread_group_unblocked_default(
149 __unused thread_group_data_t unblocked_tg, __unused thread_group_data_t unblocking_tg,
150 __unused uint32_t flags, __unused perfcontrol_state_t unblocked_thr_state)
151 {
152 }
153
154 static void
sched_perfcontrol_running_timer_expire_default(__unused uint64_t now,__unused uint32_t flags,__unused uint32_t cpu_id,__unused uint64_t * timeout_ticks)155 sched_perfcontrol_running_timer_expire_default(
156 __unused uint64_t now, __unused uint32_t flags, __unused uint32_t cpu_id, __unused uint64_t *timeout_ticks)
157 {
158 }
159
160 sched_perfcontrol_offcore_t sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
161 sched_perfcontrol_context_switch_t sched_perfcontrol_switch = sched_perfcontrol_switch_default;
162 sched_perfcontrol_oncore_t sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
163 sched_perfcontrol_thread_group_init_t sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
164 sched_perfcontrol_thread_group_deinit_t sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
165 sched_perfcontrol_thread_group_flags_update_t sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
166 sched_perfcontrol_max_runnable_latency_t sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
167 sched_perfcontrol_work_interval_notify_t sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
168 sched_perfcontrol_work_interval_ctl_t sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
169 sched_perfcontrol_deadline_passed_t sched_perfcontrol_deadline_passed = sched_perfcontrol_deadline_passed_default;
170 sched_perfcontrol_csw_t sched_perfcontrol_csw = sched_perfcontrol_csw_default;
171 sched_perfcontrol_state_update_t sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
172 sched_perfcontrol_thread_group_blocked_t sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
173 sched_perfcontrol_thread_group_unblocked_t sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
174 sched_perfcontrol_running_timer_expire_t sched_perfcontrol_running_timer_expire = sched_perfcontrol_running_timer_expire_default;
175 boolean_t sched_perfcontrol_thread_shared_rsrc_flags_enabled = false;
176
177 void
sched_perfcontrol_register_callbacks(sched_perfcontrol_callbacks_t callbacks,unsigned long size_of_state)178 sched_perfcontrol_register_callbacks(sched_perfcontrol_callbacks_t callbacks, unsigned long size_of_state)
179 {
180 assert(callbacks == NULL || callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_2);
181
182 if (size_of_state > sizeof(struct perfcontrol_state)) {
183 panic("%s: Invalid required state size %lu", __FUNCTION__, size_of_state);
184 }
185
186 if (callbacks) {
187 #if CONFIG_THREAD_GROUPS
188 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_3) {
189 if (callbacks->thread_group_init != NULL) {
190 sched_perfcontrol_thread_group_init = callbacks->thread_group_init;
191 } else {
192 sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
193 }
194 if (callbacks->thread_group_deinit != NULL) {
195 sched_perfcontrol_thread_group_deinit = callbacks->thread_group_deinit;
196 } else {
197 sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
198 }
199 // tell CLPC about existing thread groups
200 thread_group_resync(TRUE);
201 }
202
203 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_6) {
204 if (callbacks->thread_group_flags_update != NULL) {
205 sched_perfcontrol_thread_group_flags_update = callbacks->thread_group_flags_update;
206 } else {
207 sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
208 }
209 }
210
211 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_8) {
212 if (callbacks->thread_group_blocked != NULL) {
213 sched_perfcontrol_thread_group_blocked = callbacks->thread_group_blocked;
214 } else {
215 sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
216 }
217
218 if (callbacks->thread_group_unblocked != NULL) {
219 sched_perfcontrol_thread_group_unblocked = callbacks->thread_group_unblocked;
220 } else {
221 sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
222 }
223 }
224 #endif
225 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_9) {
226 sched_perfcontrol_thread_shared_rsrc_flags_enabled = true;
227 }
228
229 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_10) {
230 sched_perfcontrol_running_timer_expire = callbacks->running_timer_expire;
231 }
232
233 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_7) {
234 if (callbacks->work_interval_ctl != NULL) {
235 sched_perfcontrol_work_interval_ctl = callbacks->work_interval_ctl;
236 } else {
237 sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
238 }
239 }
240
241 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_5) {
242 if (callbacks->csw != NULL) {
243 sched_perfcontrol_csw = callbacks->csw;
244 } else {
245 sched_perfcontrol_csw = sched_perfcontrol_csw_default;
246 }
247
248 if (callbacks->state_update != NULL) {
249 sched_perfcontrol_state_update = callbacks->state_update;
250 } else {
251 sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
252 }
253 }
254
255 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_4) {
256 if (callbacks->deadline_passed != NULL) {
257 sched_perfcontrol_deadline_passed = callbacks->deadline_passed;
258 } else {
259 sched_perfcontrol_deadline_passed = sched_perfcontrol_deadline_passed_default;
260 }
261 }
262
263 if (callbacks->offcore != NULL) {
264 sched_perfcontrol_offcore = callbacks->offcore;
265 } else {
266 sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
267 }
268
269 if (callbacks->context_switch != NULL) {
270 sched_perfcontrol_switch = callbacks->context_switch;
271 } else {
272 sched_perfcontrol_switch = sched_perfcontrol_switch_default;
273 }
274
275 if (callbacks->oncore != NULL) {
276 sched_perfcontrol_oncore = callbacks->oncore;
277 } else {
278 sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
279 }
280
281 if (callbacks->max_runnable_latency != NULL) {
282 sched_perfcontrol_max_runnable_latency = callbacks->max_runnable_latency;
283 } else {
284 sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
285 }
286
287 if (callbacks->work_interval_notify != NULL) {
288 sched_perfcontrol_work_interval_notify = callbacks->work_interval_notify;
289 } else {
290 sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
291 }
292 } else {
293 /* reset to defaults */
294 #if CONFIG_THREAD_GROUPS
295 thread_group_resync(FALSE);
296 #endif
297 sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
298 sched_perfcontrol_switch = sched_perfcontrol_switch_default;
299 sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
300 sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
301 sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
302 sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
303 sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
304 sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
305 sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
306 sched_perfcontrol_csw = sched_perfcontrol_csw_default;
307 sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
308 sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
309 sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
310 }
311 }
312
313
314 static void
machine_switch_populate_perfcontrol_thread_data(struct perfcontrol_thread_data * data,thread_t thread,uint64_t same_pri_latency)315 machine_switch_populate_perfcontrol_thread_data(struct perfcontrol_thread_data *data,
316 thread_t thread,
317 uint64_t same_pri_latency)
318 {
319 bzero(data, sizeof(struct perfcontrol_thread_data));
320 data->perfctl_class = thread_get_perfcontrol_class(thread);
321 data->energy_estimate_nj = 0;
322 data->thread_id = thread->thread_id;
323 #if CONFIG_THREAD_GROUPS
324 struct thread_group *tg = thread_group_get(thread);
325 data->thread_group_id = thread_group_get_id(tg);
326 data->thread_group_data = thread_group_get_machine_data(tg);
327 #endif
328 data->scheduling_latency_at_same_basepri = same_pri_latency;
329 data->perfctl_state = FIND_PERFCONTROL_STATE(thread);
330 }
331
332 static void
machine_switch_populate_perfcontrol_cpu_counters(struct perfcontrol_cpu_counters * cpu_counters)333 machine_switch_populate_perfcontrol_cpu_counters(struct perfcontrol_cpu_counters *cpu_counters)
334 {
335 #if CONFIG_CPU_COUNTERS
336 mt_perfcontrol(&cpu_counters->instructions, &cpu_counters->cycles);
337 #else /* CONFIG_CPU_COUNTERS */
338 cpu_counters->instructions = 0;
339 cpu_counters->cycles = 0;
340 #endif /* !CONFIG_CPU_COUNTERS */
341 }
342
343 int perfcontrol_callout_stats_enabled = 0;
344 static _Atomic uint64_t perfcontrol_callout_stats[PERFCONTROL_CALLOUT_MAX][PERFCONTROL_STAT_MAX];
345 static _Atomic uint64_t perfcontrol_callout_count[PERFCONTROL_CALLOUT_MAX];
346
347 #if CONFIG_CPU_COUNTERS
348 static inline
349 bool
perfcontrol_callout_counters_begin(uint64_t * counters)350 perfcontrol_callout_counters_begin(uint64_t *counters)
351 {
352 if (!perfcontrol_callout_stats_enabled) {
353 return false;
354 }
355 mt_fixed_counts(counters);
356 return true;
357 }
358
359 static inline
360 void
perfcontrol_callout_counters_end(uint64_t * start_counters,perfcontrol_callout_type_t type)361 perfcontrol_callout_counters_end(uint64_t *start_counters,
362 perfcontrol_callout_type_t type)
363 {
364 uint64_t end_counters[MT_CORE_NFIXED];
365 mt_fixed_counts(end_counters);
366 os_atomic_add(&perfcontrol_callout_stats[type][PERFCONTROL_STAT_CYCLES],
367 end_counters[MT_CORE_CYCLES] - start_counters[MT_CORE_CYCLES], relaxed);
368 os_atomic_add(&perfcontrol_callout_stats[type][PERFCONTROL_STAT_INSTRS],
369 end_counters[MT_CORE_INSTRS] - start_counters[MT_CORE_INSTRS], relaxed);
370 os_atomic_inc(&perfcontrol_callout_count[type], relaxed);
371 }
372 #endif /* CONFIG_CPU_COUNTERS */
373
374 uint64_t
perfcontrol_callout_stat_avg(perfcontrol_callout_type_t type,perfcontrol_callout_stat_t stat)375 perfcontrol_callout_stat_avg(perfcontrol_callout_type_t type,
376 perfcontrol_callout_stat_t stat)
377 {
378 if (!perfcontrol_callout_stats_enabled) {
379 return 0;
380 }
381 return os_atomic_load_wide(&perfcontrol_callout_stats[type][stat], relaxed) /
382 os_atomic_load_wide(&perfcontrol_callout_count[type], relaxed);
383 }
384
385 #if CONFIG_SCHED_EDGE
386
387 /*
388 * The Edge scheduler allows the performance controller to update properties about the
389 * threads as part of the callouts. These properties typically include shared cluster
390 * resource usage. This allows the scheduler to manage specific threads within the
391 * workload more optimally.
392 */
393 static void
sched_perfcontrol_thread_flags_update(thread_t thread,struct perfcontrol_thread_data * thread_data,shared_rsrc_policy_agent_t agent)394 sched_perfcontrol_thread_flags_update(thread_t thread,
395 struct perfcontrol_thread_data *thread_data,
396 shared_rsrc_policy_agent_t agent)
397 {
398 kern_return_t kr = KERN_SUCCESS;
399 if (thread_data->thread_flags_mask & PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_RR) {
400 if (thread_data->thread_flags & PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_RR) {
401 kr = thread_shared_rsrc_policy_set(thread, 0, CLUSTER_SHARED_RSRC_TYPE_RR, agent);
402 } else {
403 kr = thread_shared_rsrc_policy_clear(thread, CLUSTER_SHARED_RSRC_TYPE_RR, agent);
404 }
405 }
406 if (thread_data->thread_flags_mask & PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_NATIVE_FIRST) {
407 if (thread_data->thread_flags & PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_NATIVE_FIRST) {
408 kr = thread_shared_rsrc_policy_set(thread, 0, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST, agent);
409 } else {
410 kr = thread_shared_rsrc_policy_clear(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST, agent);
411 }
412 }
413 /*
414 * The thread_shared_rsrc_policy_* routines only fail if the performance controller is
415 * attempting to double set/clear a policy on the thread.
416 */
417 assert(kr == KERN_SUCCESS);
418 }
419
420 #endif /* CONFIG_SCHED_EDGE */
421
422 void
machine_switch_perfcontrol_context(perfcontrol_event event,uint64_t timestamp,uint32_t flags,uint64_t new_thread_same_pri_latency,thread_t old,thread_t new)423 machine_switch_perfcontrol_context(perfcontrol_event event,
424 uint64_t timestamp,
425 uint32_t flags,
426 uint64_t new_thread_same_pri_latency,
427 thread_t old,
428 thread_t new)
429 {
430
431 if (sched_perfcontrol_switch != sched_perfcontrol_switch_default) {
432 perfcontrol_state_t old_perfcontrol_state = FIND_PERFCONTROL_STATE(old);
433 perfcontrol_state_t new_perfcontrol_state = FIND_PERFCONTROL_STATE(new);
434 sched_perfcontrol_switch(old_perfcontrol_state, new_perfcontrol_state);
435 }
436
437 if (sched_perfcontrol_csw != sched_perfcontrol_csw_default) {
438 uint32_t cpu_id = (uint32_t)cpu_number();
439 struct perfcontrol_cpu_counters cpu_counters;
440 struct perfcontrol_thread_data offcore, oncore;
441 machine_switch_populate_perfcontrol_thread_data(&offcore, old, 0);
442 machine_switch_populate_perfcontrol_thread_data(&oncore, new,
443 new_thread_same_pri_latency);
444 machine_switch_populate_perfcontrol_cpu_counters(&cpu_counters);
445 uint64_t timeout_ticks = 0;
446
447 #if CONFIG_CPU_COUNTERS
448 uint64_t counters[MT_CORE_NFIXED];
449 bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
450 #endif /* CONFIG_CPU_COUNTERS */
451 sched_perfcontrol_csw(event, cpu_id, timestamp, flags,
452 &offcore, &oncore, &cpu_counters, &timeout_ticks);
453 #if CONFIG_CPU_COUNTERS
454 if (ctrs_enabled) {
455 perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_CONTEXT);
456 }
457 #endif /* CONFIG_CPU_COUNTERS */
458
459 recount_add_energy(old, get_threadtask(old),
460 offcore.energy_estimate_nj);
461
462 #if CONFIG_SCHED_EDGE
463 if (sched_perfcontrol_thread_shared_rsrc_flags_enabled) {
464 sched_perfcontrol_thread_flags_update(old, &offcore, SHARED_RSRC_POLICY_AGENT_PERFCTL_CSW);
465 }
466 if (timeout_ticks != 0) {
467 cpu_set_perfcontrol_timer(timestamp, timeout_ticks);
468 }
469 #endif /* CONFIG_SCHED_EDGE */
470 }
471 }
472
473 void
machine_switch_perfcontrol_state_update(perfcontrol_event event,uint64_t timestamp,uint32_t flags,thread_t thread)474 machine_switch_perfcontrol_state_update(perfcontrol_event event,
475 uint64_t timestamp,
476 uint32_t flags,
477 thread_t thread)
478 {
479
480 if (sched_perfcontrol_state_update == sched_perfcontrol_state_update_default) {
481 return;
482 }
483 uint32_t cpu_id = (uint32_t)cpu_number();
484 struct perfcontrol_thread_data data;
485 machine_switch_populate_perfcontrol_thread_data(&data, thread, 0);
486 uint64_t timeout_ticks = 0;
487
488 #if CONFIG_CPU_COUNTERS
489 uint64_t counters[MT_CORE_NFIXED];
490 bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
491 #endif /* CONFIG_CPU_COUNTERS */
492 sched_perfcontrol_state_update(event, cpu_id, timestamp, flags,
493 &data, &timeout_ticks);
494 #if CONFIG_CPU_COUNTERS
495 if (ctrs_enabled) {
496 perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_STATE_UPDATE);
497 }
498 #endif /* CONFIG_CPU_COUNTERS */
499
500 #if CONFIG_PERVASIVE_ENERGY
501 recount_add_energy(thread, get_threadtask(thread), data.energy_estimate_nj);
502 #endif /* CONFIG_PERVASIVE_ENERGY */
503
504 #if CONFIG_SCHED_EDGE
505 if (sched_perfcontrol_thread_shared_rsrc_flags_enabled && (event == QUANTUM_EXPIRY)) {
506 sched_perfcontrol_thread_flags_update(thread, &data, SHARED_RSRC_POLICY_AGENT_PERFCTL_QUANTUM);
507 } else {
508 assert(data.thread_flags_mask == 0);
509 }
510 if (timeout_ticks != 0) {
511 cpu_set_perfcontrol_timer(timestamp, timeout_ticks);
512 }
513 #endif /* CONFIG_SCHED_EDGE */
514 }
515
516 void
machine_thread_going_on_core(thread_t new_thread,thread_urgency_t urgency,uint64_t sched_latency,uint64_t same_pri_latency,uint64_t timestamp)517 machine_thread_going_on_core(thread_t new_thread,
518 thread_urgency_t urgency,
519 uint64_t sched_latency,
520 uint64_t same_pri_latency,
521 uint64_t timestamp)
522 {
523 if (sched_perfcontrol_oncore == sched_perfcontrol_oncore_default) {
524 return;
525 }
526 struct going_on_core on_core;
527 perfcontrol_state_t state = FIND_PERFCONTROL_STATE(new_thread);
528
529 on_core.thread_id = new_thread->thread_id;
530 on_core.energy_estimate_nj = 0;
531 on_core.qos_class = (uint16_t)proc_get_effective_thread_policy(new_thread, TASK_POLICY_QOS);
532 on_core.urgency = (uint16_t)urgency;
533 on_core.is_32_bit = thread_is_64bit_data(new_thread) ? FALSE : TRUE;
534 on_core.is_kernel_thread = get_threadtask(new_thread) == kernel_task;
535 #if CONFIG_THREAD_GROUPS
536 struct thread_group *tg = thread_group_get(new_thread);
537 on_core.thread_group_id = thread_group_get_id(tg);
538 on_core.thread_group_data = thread_group_get_machine_data(tg);
539 #endif
540 on_core.scheduling_latency = sched_latency;
541 on_core.start_time = timestamp;
542 on_core.scheduling_latency_at_same_basepri = same_pri_latency;
543
544 #if CONFIG_CPU_COUNTERS
545 uint64_t counters[MT_CORE_NFIXED];
546 bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
547 #endif /* CONFIG_CPU_COUNTERS */
548 sched_perfcontrol_oncore(state, &on_core);
549 #if CONFIG_CPU_COUNTERS
550 if (ctrs_enabled) {
551 perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_ON_CORE);
552 }
553 #endif /* CONFIG_CPU_COUNTERS */
554 }
555
556 void
machine_thread_going_off_core(thread_t old_thread,boolean_t thread_terminating,uint64_t last_dispatch,__unused boolean_t thread_runnable)557 machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating,
558 uint64_t last_dispatch, __unused boolean_t thread_runnable)
559 {
560 if (sched_perfcontrol_offcore == sched_perfcontrol_offcore_default) {
561 return;
562 }
563 struct going_off_core off_core;
564 perfcontrol_state_t state = FIND_PERFCONTROL_STATE(old_thread);
565
566 off_core.thread_id = old_thread->thread_id;
567 off_core.energy_estimate_nj = 0;
568 off_core.end_time = last_dispatch;
569 #if CONFIG_THREAD_GROUPS
570 struct thread_group *tg = thread_group_get(old_thread);
571 off_core.thread_group_id = thread_group_get_id(tg);
572 off_core.thread_group_data = thread_group_get_machine_data(tg);
573 #endif
574
575 #if CONFIG_CPU_COUNTERS
576 uint64_t counters[MT_CORE_NFIXED];
577 bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
578 #endif /* CONFIG_CPU_COUNTERS */
579 sched_perfcontrol_offcore(state, &off_core, thread_terminating);
580 #if CONFIG_CPU_COUNTERS
581 if (ctrs_enabled) {
582 perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_OFF_CORE);
583 }
584 #endif /* CONFIG_CPU_COUNTERS */
585 }
586
587 #if CONFIG_THREAD_GROUPS
588 void
machine_thread_group_init(struct thread_group * tg)589 machine_thread_group_init(struct thread_group *tg)
590 {
591 if (sched_perfcontrol_thread_group_init == sched_perfcontrol_thread_group_default) {
592 return;
593 }
594 struct thread_group_data data;
595 data.thread_group_id = thread_group_get_id(tg);
596 data.thread_group_data = thread_group_get_machine_data(tg);
597 data.thread_group_size = thread_group_machine_data_size();
598 data.thread_group_flags = thread_group_get_flags(tg);
599 sched_perfcontrol_thread_group_init(&data);
600 }
601
602 void
machine_thread_group_deinit(struct thread_group * tg)603 machine_thread_group_deinit(struct thread_group *tg)
604 {
605 if (sched_perfcontrol_thread_group_deinit == sched_perfcontrol_thread_group_default) {
606 return;
607 }
608 struct thread_group_data data;
609 data.thread_group_id = thread_group_get_id(tg);
610 data.thread_group_data = thread_group_get_machine_data(tg);
611 data.thread_group_size = thread_group_machine_data_size();
612 data.thread_group_flags = thread_group_get_flags(tg);
613 sched_perfcontrol_thread_group_deinit(&data);
614 }
615
616 void
machine_thread_group_flags_update(struct thread_group * tg,uint32_t flags)617 machine_thread_group_flags_update(struct thread_group *tg, uint32_t flags)
618 {
619 if (sched_perfcontrol_thread_group_flags_update == sched_perfcontrol_thread_group_default) {
620 return;
621 }
622 struct thread_group_data data;
623 data.thread_group_id = thread_group_get_id(tg);
624 data.thread_group_data = thread_group_get_machine_data(tg);
625 data.thread_group_size = thread_group_machine_data_size();
626 data.thread_group_flags = flags;
627 sched_perfcontrol_thread_group_flags_update(&data);
628 }
629
630 void
machine_thread_group_blocked(struct thread_group * blocked_tg,struct thread_group * blocking_tg,uint32_t flags,thread_t blocked_thread)631 machine_thread_group_blocked(struct thread_group *blocked_tg,
632 struct thread_group *blocking_tg,
633 uint32_t flags,
634 thread_t blocked_thread)
635 {
636 if (sched_perfcontrol_thread_group_blocked == sched_perfcontrol_thread_group_blocked_default) {
637 return;
638 }
639
640 spl_t s = splsched();
641
642 perfcontrol_state_t state = FIND_PERFCONTROL_STATE(blocked_thread);
643 struct thread_group_data blocked_data;
644 assert(blocked_tg != NULL);
645
646 blocked_data.thread_group_id = thread_group_get_id(blocked_tg);
647 blocked_data.thread_group_data = thread_group_get_machine_data(blocked_tg);
648 blocked_data.thread_group_size = thread_group_machine_data_size();
649
650 if (blocking_tg == NULL) {
651 /*
652 * For special cases such as the render server, the blocking TG is a
653 * well known TG. Only in that case, the blocking_tg should be NULL.
654 */
655 assert(flags & PERFCONTROL_CALLOUT_BLOCKING_TG_RENDER_SERVER);
656 sched_perfcontrol_thread_group_blocked(&blocked_data, NULL, flags, state);
657 } else {
658 struct thread_group_data blocking_data;
659 blocking_data.thread_group_id = thread_group_get_id(blocking_tg);
660 blocking_data.thread_group_data = thread_group_get_machine_data(blocking_tg);
661 blocking_data.thread_group_size = thread_group_machine_data_size();
662 sched_perfcontrol_thread_group_blocked(&blocked_data, &blocking_data, flags, state);
663 }
664 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_BLOCK) | DBG_FUNC_START,
665 thread_tid(blocked_thread), thread_group_get_id(blocked_tg),
666 blocking_tg ? thread_group_get_id(blocking_tg) : THREAD_GROUP_INVALID,
667 flags);
668
669 splx(s);
670 }
671
672 void
machine_thread_group_unblocked(struct thread_group * unblocked_tg,struct thread_group * unblocking_tg,uint32_t flags,thread_t unblocked_thread)673 machine_thread_group_unblocked(struct thread_group *unblocked_tg,
674 struct thread_group *unblocking_tg,
675 uint32_t flags,
676 thread_t unblocked_thread)
677 {
678 if (sched_perfcontrol_thread_group_unblocked == sched_perfcontrol_thread_group_unblocked_default) {
679 return;
680 }
681
682 spl_t s = splsched();
683
684 perfcontrol_state_t state = FIND_PERFCONTROL_STATE(unblocked_thread);
685 struct thread_group_data unblocked_data;
686 assert(unblocked_tg != NULL);
687
688 unblocked_data.thread_group_id = thread_group_get_id(unblocked_tg);
689 unblocked_data.thread_group_data = thread_group_get_machine_data(unblocked_tg);
690 unblocked_data.thread_group_size = thread_group_machine_data_size();
691
692 if (unblocking_tg == NULL) {
693 /*
694 * For special cases such as the render server, the unblocking TG is a
695 * well known TG. Only in that case, the unblocking_tg should be NULL.
696 */
697 assert(flags & PERFCONTROL_CALLOUT_BLOCKING_TG_RENDER_SERVER);
698 sched_perfcontrol_thread_group_unblocked(&unblocked_data, NULL, flags, state);
699 } else {
700 struct thread_group_data unblocking_data;
701 unblocking_data.thread_group_id = thread_group_get_id(unblocking_tg);
702 unblocking_data.thread_group_data = thread_group_get_machine_data(unblocking_tg);
703 unblocking_data.thread_group_size = thread_group_machine_data_size();
704 sched_perfcontrol_thread_group_unblocked(&unblocked_data, &unblocking_data, flags, state);
705 }
706 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_BLOCK) | DBG_FUNC_END,
707 thread_tid(unblocked_thread), thread_group_get_id(unblocked_tg),
708 unblocking_tg ? thread_group_get_id(unblocking_tg) : THREAD_GROUP_INVALID,
709 flags);
710
711 splx(s);
712 }
713
714 #endif /* CONFIG_THREAD_GROUPS */
715
716 void
machine_perfcontrol_running_timer_expire(uint64_t now,uint32_t flags,int cpu_id,uint64_t * timeout_ticks)717 machine_perfcontrol_running_timer_expire(uint64_t now,
718 uint32_t flags,
719 int cpu_id,
720 uint64_t *timeout_ticks)
721 {
722 if (sched_perfcontrol_running_timer_expire != sched_perfcontrol_running_timer_expire_default) {
723 sched_perfcontrol_running_timer_expire(now, flags, cpu_id, timeout_ticks);
724 }
725 }
726
727 void
machine_max_runnable_latency(uint64_t bg_max_latency,uint64_t default_max_latency,uint64_t realtime_max_latency)728 machine_max_runnable_latency(uint64_t bg_max_latency,
729 uint64_t default_max_latency,
730 uint64_t realtime_max_latency)
731 {
732 if (sched_perfcontrol_max_runnable_latency == sched_perfcontrol_max_runnable_latency_default) {
733 return;
734 }
735 struct perfcontrol_max_runnable_latency latencies = {
736 .max_scheduling_latencies = {
737 [THREAD_URGENCY_NONE] = 0,
738 [THREAD_URGENCY_BACKGROUND] = bg_max_latency,
739 [THREAD_URGENCY_NORMAL] = default_max_latency,
740 [THREAD_URGENCY_REAL_TIME] = realtime_max_latency
741 }
742 };
743
744 sched_perfcontrol_max_runnable_latency(&latencies);
745 }
746
747 void
machine_work_interval_notify(thread_t thread,struct kern_work_interval_args * kwi_args)748 machine_work_interval_notify(thread_t thread,
749 struct kern_work_interval_args* kwi_args)
750 {
751 if (sched_perfcontrol_work_interval_notify == sched_perfcontrol_work_interval_notify_default) {
752 return;
753 }
754 perfcontrol_state_t state = FIND_PERFCONTROL_STATE(thread);
755 struct perfcontrol_work_interval work_interval = {
756 .thread_id = thread->thread_id,
757 .qos_class = (uint16_t)proc_get_effective_thread_policy(thread, TASK_POLICY_QOS),
758 .urgency = kwi_args->urgency,
759 .flags = kwi_args->notify_flags,
760 .work_interval_id = kwi_args->work_interval_id,
761 .start = kwi_args->start,
762 .finish = kwi_args->finish,
763 .deadline = kwi_args->deadline,
764 .next_start = kwi_args->next_start,
765 .create_flags = kwi_args->create_flags,
766 };
767 #if CONFIG_THREAD_GROUPS
768 struct thread_group *tg;
769 tg = thread_group_get(thread);
770 work_interval.thread_group_id = thread_group_get_id(tg);
771 work_interval.thread_group_data = thread_group_get_machine_data(tg);
772 #endif
773 sched_perfcontrol_work_interval_notify(state, &work_interval);
774 }
775
776
777 void
machine_perfcontrol_deadline_passed(uint64_t deadline)778 machine_perfcontrol_deadline_passed(uint64_t deadline)
779 {
780 if (sched_perfcontrol_deadline_passed != sched_perfcontrol_deadline_passed_default) {
781 sched_perfcontrol_deadline_passed(deadline);
782 }
783 }
784
785 /*
786 * Get a character representing the current thread's type of CPU core.
787 */
788 char
ml_get_current_core_type(void)789 ml_get_current_core_type(void)
790 {
791 const thread_t thread = current_thread();
792
793 #if __AMP__
794 processor_t processor = thread->last_processor;
795 if (!processor) {
796 return '!';
797 }
798 switch (processor->processor_set->pset_cluster_type) {
799 case PSET_AMP_P:
800 return 'P';
801 case PSET_AMP_E:
802 return 'E';
803 default:
804 return '?';
805 }
806 #else // __AMP__
807 #pragma unused(thread)
808 return '-';
809 #endif // !__AMP__
810 }
811
812 #if SCHED_HYGIENE_DEBUG
813
814 __options_decl(int_mask_hygiene_flags_t, uint8_t, {
815 INT_MASK_BASE = 0x00,
816 INT_MASK_FROM_HANDLER = 0x01,
817 INT_MASK_IS_STACKSHOT = 0x02,
818 });
819
820 /*
821 * ml_spin_debug_reset()
822 * Reset the timestamp on a thread that has been unscheduled
823 * to avoid false alarms. Alarm will go off if interrupts are held
824 * disabled for too long, starting from now.
825 */
826 void
ml_spin_debug_reset(thread_t thread)827 ml_spin_debug_reset(thread_t thread)
828 {
829 const timeout_flags_t flags = ML_TIMEOUT_TIMEBASE_FLAGS | ML_TIMEOUT_PMC_FLAGS;
830
831 kern_timeout_restart(&thread->machine.int_timeout, flags);
832 }
833
834 /*
835 * ml_spin_debug_clear()
836 * Clear the timestamp and cycle/instruction counts on a thread that
837 * has been unscheduled to avoid false alarms
838 */
839 void
ml_spin_debug_clear(thread_t thread)840 ml_spin_debug_clear(thread_t thread)
841 {
842 kern_timeout_override(&thread->machine.int_timeout);
843 }
844
845 /*
846 * ml_spin_debug_clear_self()
847 * Clear the timestamp on the current thread to prevent
848 * false alarms
849 */
850 void
ml_spin_debug_clear_self(void)851 ml_spin_debug_clear_self(void)
852 {
853 ml_spin_debug_clear(current_thread());
854 }
855
856 void
_ml_interrupt_masked_debug_start(uintptr_t handler_addr,int type)857 _ml_interrupt_masked_debug_start(uintptr_t handler_addr, int type)
858 {
859 const timeout_flags_t flags = ML_TIMEOUT_TIMEBASE_FLAGS | ML_TIMEOUT_PMC_FLAGS;
860 const thread_t thread = current_thread();
861
862 thread->machine.int_type = type;
863 thread->machine.int_handler_addr = (uintptr_t)VM_KERNEL_STRIP_UPTR(handler_addr);
864 thread->machine.int_vector = (uintptr_t)NULL;
865 kern_timeout_start(&thread->machine.int_timeout, flags);
866 }
867
868 void
_ml_interrupt_masked_debug_end(void)869 _ml_interrupt_masked_debug_end(void)
870 {
871 const timeout_flags_t flags = ML_TIMEOUT_TIMEBASE_FLAGS;
872 const thread_t thread = current_thread();
873
874 kern_timeout_end(&thread->machine.int_timeout, flags);
875 if (os_atomic_load(&interrupt_masked_timeout, relaxed) > 0) {
876 ml_handle_interrupt_handler_duration(thread);
877 }
878 os_compiler_barrier();
879 thread->machine.int_type = 0;
880 thread->machine.int_handler_addr = (uintptr_t)NULL;
881 thread->machine.int_vector = (uintptr_t)NULL;
882 }
883
884 #ifndef KASAN
885
886 #define PREFIX_STRING_SIZE 256
887
888 static void
__ml_trigger_interrupts_disabled_handle(thread_t thread,uint64_t timeout,int_mask_hygiene_flags_t int_flags)889 __ml_trigger_interrupts_disabled_handle(thread_t thread, uint64_t timeout, int_mask_hygiene_flags_t int_flags)
890 {
891 #if __AMP__
892 if (int_flags == INT_MASK_IS_STACKSHOT && interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
893 /*
894 * If there are no recommended performance cores, we double the timeout to compensate
895 * for the difference in time it takes Stackshot to run on efficiency cores, and then
896 * recheck if we still exceeded the adjusted timeout.
897 */
898 int cpu;
899 int max_cpu;
900
901 max_cpu = ml_get_max_cpu_number();
902 for (cpu = 0; cpu <= max_cpu; cpu++) {
903 processor_t processor = cpu_to_processor(cpu);
904 if (processor->is_recommended &&
905 processor->processor_set->pset_cluster_type == PSET_AMP_P) {
906 break;
907 }
908 }
909 if (cpu > max_cpu) {
910 uint64_t time_elapsed = kern_timeout_gross_duration(&thread->machine.int_timeout);
911 if (time_elapsed < timeout * 2) {
912 return;
913 }
914 }
915 }
916 #endif /* __AMP__ */
917
918 if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
919 char prefix_string[PREFIX_STRING_SIZE] = { '\0' };
920
921 if (int_flags & INT_MASK_FROM_HANDLER) {
922 snprintf(prefix_string, PREFIX_STRING_SIZE,
923 "Processing of an interrupt (type = %u, handler address = %p, vector = %p) "
924 "timed out:", thread->machine.int_type,
925 (void *)thread->machine.int_handler_addr,
926 (void *)thread->machine.int_vector);
927 } else if (int_flags & INT_MASK_IS_STACKSHOT) {
928 snprintf(prefix_string, PREFIX_STRING_SIZE,
929 "Stackshot duration timed out:");
930 } else {
931 snprintf(prefix_string, PREFIX_STRING_SIZE,
932 "Interrupts held disabled timed out:");
933 }
934 kern_timeout_try_panic(KERN_TIMEOUT_INTERRUPT, thread->machine.int_type,
935 &thread->machine.int_timeout, prefix_string, timeout);
936 } else if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_TRACE) {
937 uint64_t time_elapsed = kern_timeout_gross_duration(&thread->machine.int_timeout);
938 uint64_t cycles_elapsed;
939 uint64_t instrs_elapsed;
940
941 kern_timeout_cycles_instrs(&thread->machine.int_timeout,
942 &cycles_elapsed, &instrs_elapsed);
943
944 if (int_flags != INT_MASK_BASE) {
945 static const uint32_t interrupt_handled_dbgid =
946 MACHDBG_CODE(DBG_MACH_SCHED, MACH_INT_HANDLED_EXPIRED);
947 DTRACE_SCHED3(interrupt_handled_dbgid, uint64_t, time_elapsed,
948 uint64_t, cycles_elapsed, uint64_t, instrs_elapsed);
949 KDBG(interrupt_handled_dbgid, time_elapsed,
950 cycles_elapsed, instrs_elapsed);
951 } else {
952 static const uint32_t interrupt_masked_dbgid =
953 MACHDBG_CODE(DBG_MACH_SCHED, MACH_INT_MASKED_EXPIRED);
954 DTRACE_SCHED3(interrupt_masked_dbgid, uint64_t, time_elapsed,
955 uint64_t, cycles_elapsed, uint64_t, instrs_elapsed);
956 KDBG(interrupt_masked_dbgid, time_elapsed,
957 cycles_elapsed, instrs_elapsed);
958 }
959 }
960 }
961 #endif // !defined(KASAN)
962
963 static inline void
__ml_handle_interrupts_disabled_duration(thread_t thread,uint64_t timeout,int_mask_hygiene_flags_t int_flags)964 __ml_handle_interrupts_disabled_duration(thread_t thread, uint64_t timeout, int_mask_hygiene_flags_t int_flags)
965 {
966 const timeout_flags_t flags = ML_TIMEOUT_TIMEBASE_FLAGS;
967
968 if (timeout == 0) {
969 return; // 0 means timeout disabled.
970 }
971
972 kern_timeout_end(&thread->machine.int_timeout, flags);
973
974 if (__improbable(interrupt_masked_debug_mode &&
975 kern_timeout_gross_duration(&thread->machine.int_timeout)
976 >= timeout * debug_cpu_performance_degradation_factor)) {
977 /*
978 * Disable the actual panic for KASAN due to the overhead of KASAN itself, leave the rest of the
979 * mechanism enabled so that KASAN can catch any bugs in the mechanism itself.
980 */
981 #ifndef KASAN
982 __ml_trigger_interrupts_disabled_handle(thread, timeout, int_flags);
983 #endif
984 }
985
986 if (int_flags != INT_MASK_BASE) {
987 uint64_t const duration = kern_timeout_gross_duration(&thread->machine.int_timeout);
988 /*
989 * No need for an atomic add, the only thread modifying
990 * this is ourselves. Other threads querying will just see
991 * either the old or the new value. (This will also just
992 * resolve to regular loads and stores on relevant
993 * platforms.)
994 */
995 uint64_t const old_duration = os_atomic_load(&thread->machine.int_time_mt, relaxed);
996 os_atomic_store(&thread->machine.int_time_mt, old_duration + duration, relaxed);
997 }
998
999 /*
1000 * There are some circumstances where interrupts will be disabled
1001 * outside of the KPIs and then re-enabled, so we don't want to reuse
1002 * an old start time in that case (which will blow up with timeout
1003 * exceeded), so we just unconditionally reset the start time here.
1004 */
1005 kern_timeout_override(&thread->machine.int_timeout);
1006 }
1007
1008 void
ml_handle_interrupts_disabled_duration(thread_t thread)1009 ml_handle_interrupts_disabled_duration(thread_t thread)
1010 {
1011 __ml_handle_interrupts_disabled_duration(thread, os_atomic_load(&interrupt_masked_timeout, relaxed), INT_MASK_BASE);
1012 }
1013
1014 void
ml_handle_stackshot_interrupt_disabled_duration(thread_t thread)1015 ml_handle_stackshot_interrupt_disabled_duration(thread_t thread)
1016 {
1017 /* Use MAX() to let the user bump the timeout further if needed */
1018 uint64_t stackshot_timeout = os_atomic_load(&stackshot_interrupt_masked_timeout, relaxed);
1019 uint64_t normal_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
1020 uint64_t timeout = MAX(stackshot_timeout, normal_timeout);
1021 __ml_handle_interrupts_disabled_duration(thread, timeout, INT_MASK_IS_STACKSHOT);
1022 }
1023
1024 void
ml_handle_interrupt_handler_duration(thread_t thread)1025 ml_handle_interrupt_handler_duration(thread_t thread)
1026 {
1027 __ml_handle_interrupts_disabled_duration(thread, os_atomic_load(&interrupt_masked_timeout, relaxed), INT_MASK_FROM_HANDLER);
1028 }
1029
1030 void
ml_irq_debug_start(uintptr_t handler,uintptr_t vector)1031 ml_irq_debug_start(uintptr_t handler, uintptr_t vector)
1032 {
1033 ml_interrupt_masked_debug_start((void *)handler, DBG_INTR_TYPE_OTHER);
1034 current_thread()->machine.int_vector = (uintptr_t)VM_KERNEL_STRIP_PTR(vector);
1035 }
1036
1037 void
ml_irq_debug_end()1038 ml_irq_debug_end()
1039 {
1040 ml_interrupt_masked_debug_end();
1041 }
1042
1043 /*
1044 * Abandon a potential timeout when handling an interrupt. It is important to
1045 * continue to keep track of the interrupt time so the time-stamp can't be
1046 * reset. (Interrupt time is subtracted from preemption time to maintain
1047 * accurate preemption time measurement).
1048 * When `inthandler_abandon` is true, a timeout will be ignored when the
1049 * interrupt handler finishes.
1050 */
1051 void
ml_irq_debug_abandon(void)1052 ml_irq_debug_abandon(void)
1053 {
1054 assert(!ml_get_interrupts_enabled());
1055
1056 thread_t thread = current_thread();
1057 kern_timeout_override(&thread->machine.int_timeout);
1058 }
1059
1060 static void
ml_interrupt_masked_debug_timestamp(thread_t thread)1061 ml_interrupt_masked_debug_timestamp(thread_t thread)
1062 {
1063 const timeout_flags_t flags = ML_TIMEOUT_TIMEBASE_FLAGS | ML_TIMEOUT_PMC_FLAGS;
1064
1065 kern_timeout_start(&thread->machine.int_timeout, flags);
1066 }
1067 #endif /* SCHED_HYGIENE_DEBUG */
1068
1069 __mockable boolean_t
ml_set_interrupts_enabled_with_debug(boolean_t enable,boolean_t __unused debug)1070 ml_set_interrupts_enabled_with_debug(boolean_t enable, boolean_t __unused debug)
1071 {
1072 thread_t thread;
1073 uint64_t state;
1074
1075 thread = current_thread();
1076
1077 state = __builtin_arm_rsr("DAIF");
1078
1079 if (__improbable(!(state & DAIF_DEBUGF))) {
1080 panic("%s: debug exceptions enabled in kernel mode", __func__);
1081 }
1082 if (enable && (state & DAIF_STANDARD_DISABLE)) {
1083 assert3u(state & DAIF_STANDARD_DISABLE, ==, DAIF_STANDARD_DISABLE);
1084 assert(getCpuDatap()->cpu_int_state == NULL); // Make sure we're not enabling interrupts from primary interrupt context
1085 #if SCHED_HYGIENE_DEBUG
1086 if (__probable(debug && static_if(sched_debug_interrupt_disable))) {
1087 // Interrupts are currently masked, we will enable them (after finishing this check)
1088 if (stackshot_active()) {
1089 ml_handle_stackshot_interrupt_disabled_duration(thread);
1090 } else {
1091 ml_handle_interrupts_disabled_duration(thread);
1092 }
1093 }
1094 #endif // SCHED_HYGIENE_DEBUG
1095 if (get_preemption_level() == 0) {
1096 while (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
1097 #if __ARM_USER_PROTECT__
1098 uintptr_t up = arm_user_protect_begin(thread);
1099 #endif
1100 ast_taken_kernel();
1101 #if __ARM_USER_PROTECT__
1102 arm_user_protect_end(thread, up, FALSE);
1103 #endif
1104 }
1105 }
1106 __builtin_arm_wsr("DAIFClr", DAIFSC_STANDARD_DISABLE);
1107 } else if (!enable && ((state & DAIF_STANDARD_DISABLE) != DAIF_STANDARD_DISABLE)) {
1108 assert3u(state & DAIF_STANDARD_DISABLE, ==, 0);
1109 __builtin_arm_wsr("DAIFSet", DAIFSC_STANDARD_DISABLE);
1110
1111 #if SCHED_HYGIENE_DEBUG
1112 if (__probable(debug && static_if(sched_debug_interrupt_disable))) {
1113 // Interrupts were enabled, we just masked them
1114 ml_interrupt_masked_debug_timestamp(thread);
1115 }
1116 #endif
1117 }
1118 return (state & DAIF_STANDARD_DISABLE) != DAIF_STANDARD_DISABLE;
1119 }
1120
1121 boolean_t
ml_set_interrupts_enabled(boolean_t enable)1122 ml_set_interrupts_enabled(boolean_t enable)
1123 {
1124 return ml_set_interrupts_enabled_with_debug(enable, true);
1125 }
1126
1127 boolean_t
ml_early_set_interrupts_enabled(boolean_t enable)1128 ml_early_set_interrupts_enabled(boolean_t enable)
1129 {
1130 return ml_set_interrupts_enabled(enable);
1131 }
1132
1133 /*
1134 * Interrupt enable function exported for AppleCLPC without
1135 * measurements enabled.
1136 *
1137 * Only for AppleCLPC!
1138 */
1139 boolean_t
sched_perfcontrol_ml_set_interrupts_without_measurement(boolean_t enable)1140 sched_perfcontrol_ml_set_interrupts_without_measurement(boolean_t enable)
1141 {
1142 return ml_set_interrupts_enabled_with_debug(enable, false);
1143 }
1144
1145 /*
1146 * Routine: ml_at_interrupt_context
1147 * Function: Check if running at interrupt context
1148 */
1149 boolean_t
ml_at_interrupt_context(void)1150 ml_at_interrupt_context(void)
1151 {
1152 /* Do not use a stack-based check here, as the top-level exception handler
1153 * is free to use some other stack besides the per-CPU interrupt stack.
1154 * Interrupts should always be disabled if we're at interrupt context.
1155 * Check that first, as we may be in a preemptible non-interrupt context, in
1156 * which case we could be migrated to a different CPU between obtaining
1157 * the per-cpu data pointer and loading cpu_int_state. We then might end
1158 * up checking the interrupt state of a different CPU, resulting in a false
1159 * positive. But if interrupts are disabled, we also know we cannot be
1160 * preempted. */
1161 return !ml_get_interrupts_enabled() && (getCpuDatap()->cpu_int_state != NULL);
1162 }
1163
1164 /*
1165 * This answers the question
1166 * "after returning from this interrupt handler with the AST_URGENT bit set,
1167 * will I end up in ast_taken_user or ast_taken_kernel?"
1168 *
1169 * If it's called in non-interrupt context (e.g. regular syscall), it should
1170 * return false.
1171 *
1172 * Must be called with interrupts disabled.
1173 */
1174 bool
ml_did_interrupt_userspace(void)1175 ml_did_interrupt_userspace(void)
1176 {
1177 assert(ml_get_interrupts_enabled() == false);
1178
1179 struct arm_saved_state *state = getCpuDatap()->cpu_int_state;
1180
1181 return state && PSR64_IS_USER(get_saved_state_cpsr(state));
1182 }
1183
1184
1185 vm_offset_t
ml_stack_remaining(void)1186 ml_stack_remaining(void)
1187 {
1188 uintptr_t local = (uintptr_t) &local;
1189 vm_offset_t intstack_top_ptr;
1190
1191 /* Since this is a stack-based check, we don't need to worry about
1192 * preemption as we do in ml_at_interrupt_context(). If we are preemptible,
1193 * then the sp should never be within any CPU's interrupt stack unless
1194 * something has gone horribly wrong. */
1195 intstack_top_ptr = getCpuDatap()->intstack_top;
1196 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1197 return local - (getCpuDatap()->intstack_top - INTSTACK_SIZE);
1198 } else {
1199 return local - current_thread()->kernel_stack;
1200 }
1201 }
1202
1203 static boolean_t ml_quiescing = FALSE;
1204
1205 void
ml_set_is_quiescing(boolean_t quiescing)1206 ml_set_is_quiescing(boolean_t quiescing)
1207 {
1208 assert(ml_quiescing != quiescing);
1209 ml_quiescing = quiescing;
1210 os_atomic_thread_fence(release);
1211 }
1212
1213 boolean_t
ml_is_quiescing(void)1214 ml_is_quiescing(void)
1215 {
1216 os_atomic_thread_fence(acquire);
1217 return ml_quiescing;
1218 }
1219
1220 uint64_t
ml_get_booter_memory_size(void)1221 ml_get_booter_memory_size(void)
1222 {
1223 #if CONFIG_SPTM
1224 extern uint64_t memSize;
1225 #endif /* CONFIG_SPTM */
1226 uint64_t size;
1227 uint64_t roundsize = 512 * 1024 * 1024ULL;
1228 size = BootArgs->memSizeActual;
1229 if (!size) {
1230 #if CONFIG_SPTM
1231 /*
1232 * SPTM systems cache [memSize] in a CTRR-protected variable rather
1233 * than relying on [BootArgs]. This is to enable the possibility
1234 * for XNU to modify it before machine lockdown, which happens in
1235 * KASAN kernels. If we did not do this, XNU would fault on the first
1236 * attempt to overwrite [BootArgs->memSize].
1237 */
1238 size = memSize;
1239 #else
1240 size = BootArgs->memSize;
1241 #endif /* CONFIG_SPTM */
1242 if (size < (2 * roundsize)) {
1243 roundsize >>= 1;
1244 }
1245 size = (size + roundsize - 1) & ~(roundsize - 1);
1246 }
1247
1248 #if CONFIG_SPTM
1249 size -= memSize;
1250 #else
1251 size -= BootArgs->memSize;
1252 #endif /* CONFIG_SPTM */
1253
1254 return size;
1255 }
1256
1257 uint64_t
ml_get_abstime_offset(void)1258 ml_get_abstime_offset(void)
1259 {
1260 return rtclock_base_abstime;
1261 }
1262
1263 uint64_t
ml_get_conttime_offset(void)1264 ml_get_conttime_offset(void)
1265 {
1266 #if HIBERNATION && HAS_CONTINUOUS_HWCLOCK
1267 return hwclock_conttime_offset;
1268 #elif HAS_CONTINUOUS_HWCLOCK
1269 return 0;
1270 #else
1271 return rtclock_base_abstime + mach_absolutetime_asleep;
1272 #endif
1273 }
1274
1275 uint64_t
ml_get_time_since_reset(void)1276 ml_get_time_since_reset(void)
1277 {
1278 #if HAS_CONTINUOUS_HWCLOCK
1279 if (wake_conttime == UINT64_MAX) {
1280 return UINT64_MAX;
1281 } else {
1282 return mach_continuous_time() - wake_conttime;
1283 }
1284 #else
1285 /* The timebase resets across S2R, so just return the raw value. */
1286 return ml_get_hwclock();
1287 #endif
1288 }
1289
1290 void
ml_set_reset_time(__unused uint64_t wake_time)1291 ml_set_reset_time(__unused uint64_t wake_time)
1292 {
1293 #if HAS_CONTINUOUS_HWCLOCK
1294 wake_conttime = wake_time;
1295 #endif
1296 }
1297
1298 uint64_t
ml_get_conttime_wake_time(void)1299 ml_get_conttime_wake_time(void)
1300 {
1301 #if HAS_CONTINUOUS_HWCLOCK
1302 /*
1303 * For now, we will reconstitute the timebase value from
1304 * cpu_timebase_init and use it as the wake time.
1305 */
1306 return wake_abstime - ml_get_abstime_offset();
1307 #else /* HAS_CONTINOUS_HWCLOCK */
1308 /* The wake time is simply our continuous time offset. */
1309 return ml_get_conttime_offset();
1310 #endif /* HAS_CONTINOUS_HWCLOCK */
1311 }
1312
1313 /*
1314 * ml_snoop_thread_is_on_core(thread_t thread)
1315 * Check if the given thread is currently on core. This function does not take
1316 * locks, disable preemption, or otherwise guarantee synchronization. The
1317 * result should be considered advisory.
1318 */
1319 bool
ml_snoop_thread_is_on_core(thread_t thread)1320 ml_snoop_thread_is_on_core(thread_t thread)
1321 {
1322 unsigned int cur_cpu_num = 0;
1323 const unsigned int max_cpu_id = ml_get_max_cpu_number();
1324
1325 for (cur_cpu_num = 0; cur_cpu_num <= max_cpu_id; cur_cpu_num++) {
1326 if (CpuDataEntries[cur_cpu_num].cpu_data_vaddr) {
1327 if (CpuDataEntries[cur_cpu_num].cpu_data_vaddr->cpu_active_thread == thread) {
1328 return true;
1329 }
1330 }
1331 }
1332
1333 return false;
1334 }
1335
1336 int
ml_early_cpu_max_number(void)1337 ml_early_cpu_max_number(void)
1338 {
1339 assert(startup_phase >= STARTUP_SUB_TUNABLES);
1340 return ml_get_max_cpu_number();
1341 }
1342
1343 void
ml_set_max_cpus(unsigned int max_cpus __unused)1344 ml_set_max_cpus(unsigned int max_cpus __unused)
1345 {
1346 lck_mtx_lock(&max_cpus_lock);
1347 if (max_cpus_initialized != MAX_CPUS_SET) {
1348 if (max_cpus_initialized == MAX_CPUS_WAIT) {
1349 thread_wakeup((event_t) &max_cpus_initialized);
1350 }
1351 max_cpus_initialized = MAX_CPUS_SET;
1352 }
1353 lck_mtx_unlock(&max_cpus_lock);
1354 }
1355
1356 unsigned int
ml_wait_max_cpus(void)1357 ml_wait_max_cpus(void)
1358 {
1359 assert(lockdown_done);
1360 lck_mtx_lock(&max_cpus_lock);
1361 while (max_cpus_initialized != MAX_CPUS_SET) {
1362 max_cpus_initialized = MAX_CPUS_WAIT;
1363 lck_mtx_sleep(&max_cpus_lock, LCK_SLEEP_DEFAULT, &max_cpus_initialized, THREAD_UNINT);
1364 }
1365 lck_mtx_unlock(&max_cpus_lock);
1366 return machine_info.max_cpus;
1367 }
1368
1369 void
ml_cpu_get_info_type(ml_cpu_info_t * ml_cpu_info,cluster_type_t cluster_type)1370 ml_cpu_get_info_type(ml_cpu_info_t * ml_cpu_info, cluster_type_t cluster_type)
1371 {
1372 cache_info_t *cpuid_cache_info;
1373
1374 cpuid_cache_info = cache_info_type(cluster_type);
1375 ml_cpu_info->vector_unit = 0;
1376 ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
1377 ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
1378 ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
1379
1380 #if (__ARM_ARCH__ >= 8)
1381 ml_cpu_info->l2_settings = 1;
1382 ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
1383 #else
1384 #error Unsupported arch
1385 #endif
1386 ml_cpu_info->l3_settings = 0;
1387 ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
1388 }
1389
1390 /*
1391 * Routine: ml_cpu_get_info
1392 * Function: Fill out the ml_cpu_info_t structure with parameters associated
1393 * with the boot cluster.
1394 */
1395 void
ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)1396 ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
1397 {
1398 ml_cpu_get_info_type(ml_cpu_info, ml_get_topology_info()->boot_cpu->cluster_type);
1399 }
1400
1401 unsigned int
ml_get_cpu_number_type(cluster_type_t cluster_type,bool logical,bool available)1402 ml_get_cpu_number_type(cluster_type_t cluster_type, bool logical, bool available)
1403 {
1404 /*
1405 * At present no supported ARM system features SMT, so the "logical"
1406 * parameter doesn't have an impact on the result.
1407 */
1408 if (logical && available) {
1409 return os_atomic_load(&cluster_type_num_active_cpus[cluster_type], relaxed);
1410 } else if (logical && !available) {
1411 return ml_get_topology_info()->cluster_type_num_cpus[cluster_type];
1412 } else if (!logical && available) {
1413 return os_atomic_load(&cluster_type_num_active_cpus[cluster_type], relaxed);
1414 } else {
1415 return ml_get_topology_info()->cluster_type_num_cpus[cluster_type];
1416 }
1417 }
1418
1419 void
ml_get_cluster_type_name(cluster_type_t cluster_type,char * name,size_t name_size)1420 ml_get_cluster_type_name(cluster_type_t cluster_type, char *name, size_t name_size)
1421 {
1422 strlcpy(name, cluster_type_names[cluster_type], name_size);
1423 }
1424
1425 unsigned int
ml_get_cluster_number_type(cluster_type_t cluster_type)1426 ml_get_cluster_number_type(cluster_type_t cluster_type)
1427 {
1428 return ml_get_topology_info()->cluster_type_num_clusters[cluster_type];
1429 }
1430
1431 unsigned int
ml_cpu_cache_sharing(unsigned int level,cluster_type_t cluster_type,bool include_all_cpu_types __unused)1432 ml_cpu_cache_sharing(unsigned int level, cluster_type_t cluster_type, bool include_all_cpu_types __unused)
1433 {
1434 unsigned int cpu_number = 0, cluster_types = 0;
1435
1436 /*
1437 * Level 0 corresponds to main memory, which is shared across all cores.
1438 */
1439 if (level == 0) {
1440 return ml_get_topology_info()->num_cpus;
1441 }
1442
1443 /*
1444 * At present no supported ARM system features more than 2 levels of caches.
1445 */
1446 if (level > 2) {
1447 return 0;
1448 }
1449
1450 /*
1451 * L1 caches are always per core.
1452 */
1453 if (level == 1) {
1454 return 1;
1455 }
1456
1457 cluster_types = (1 << cluster_type);
1458
1459 /*
1460 * Traverse clusters until we find the one(s) of the desired type(s).
1461 */
1462 for (int i = 0; i < ml_get_topology_info()->num_clusters; i++) {
1463 ml_topology_cluster_t *cluster = &ml_get_topology_info()->clusters[i];
1464 if ((1 << cluster->cluster_type) & cluster_types) {
1465 cpu_number += cluster->num_cpus;
1466 cluster_types &= ~(1 << cluster->cluster_type);
1467 if (!cluster_types) {
1468 break;
1469 }
1470 }
1471 }
1472
1473 return cpu_number;
1474 }
1475
1476 unsigned int
ml_get_cpu_types(void)1477 ml_get_cpu_types(void)
1478 {
1479 return ml_get_topology_info()->cluster_types;
1480 }
1481
1482 void
machine_conf(void)1483 machine_conf(void)
1484 {
1485 /*
1486 * This is known to be inaccurate. mem_size should always be capped at 2 GB
1487 */
1488 machine_info.memory_size = (uint32_t)mem_size;
1489
1490 // rdar://problem/58285685: Userland expects _COMM_PAGE_LOGICAL_CPUS to report
1491 // (max_cpu_id+1) rather than a literal *count* of logical CPUs.
1492 unsigned int num_cpus = ml_get_topology_info()->max_cpu_id + 1;
1493 machine_info.max_cpus = num_cpus;
1494 machine_info.physical_cpu_max = num_cpus;
1495 machine_info.logical_cpu_max = num_cpus;
1496 }
1497
1498 void
machine_init(void)1499 machine_init(void)
1500 {
1501 debug_log_init();
1502 clock_config();
1503 is_clock_configured = TRUE;
1504 if (debug_enabled) {
1505 pmap_map_globals();
1506 }
1507 ml_lockdown_init();
1508 }
1509