1 /*
2 * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31
32 #ifndef _ARM_MACHINE_ROUTINES_H_
33 #define _ARM_MACHINE_ROUTINES_H_
34
35 #include <mach/mach_types.h>
36 #include <mach/vm_types.h>
37 #include <mach/boolean.h>
38 #include <kern/kern_types.h>
39 #include <pexpert/pexpert.h>
40
41 #include <sys/cdefs.h>
42 #include <sys/appleapiopts.h>
43
44 #include <stdarg.h>
45
46 #ifdef XNU_KERNEL_PRIVATE
47 #include <kern/sched_hygiene.h>
48 #include <kern/startup.h>
49 #endif /* XNU_KERNEL_PRIVATE */
50
51 __BEGIN_DECLS
52 #ifdef XNU_KERNEL_PRIVATE
53 #ifdef __arm64__
54 typedef bool (*expected_fault_handler_t)(arm_saved_state_t *);
55 #endif /* __arm64__ */
56 #endif /* XNU_KERNEL_PRIVATE */
57
58 /* Interrupt handling */
59
60 void ml_cpu_signal(unsigned int cpu_id);
61 void ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs);
62 uint64_t ml_cpu_signal_deferred_get_timer(void);
63 void ml_cpu_signal_deferred(unsigned int cpu_id);
64 void ml_cpu_signal_retract(unsigned int cpu_id);
65
66 #ifdef XNU_KERNEL_PRIVATE
67 extern void ml_wait_for_cpu_signal_to_enable(void);
68 extern void assert_ml_cpu_signal_is_enabled(bool enabled);
69 #endif /* XNU_KERNEL_PRIVATE */
70
71 /* Initialize Interrupts */
72 void ml_init_interrupt(void);
73
74 /* Get Interrupts Enabled */
75 boolean_t ml_get_interrupts_enabled(void);
76
77 /* Set Interrupts Enabled */
78 #if __has_feature(ptrauth_calls)
79 uint64_t ml_pac_safe_interrupts_disable(void);
80 void ml_pac_safe_interrupts_restore(uint64_t);
81 #endif /* __has_feature(ptrauth_calls) */
82 boolean_t ml_set_interrupts_enabled_with_debug(boolean_t enable, boolean_t debug);
83 boolean_t ml_set_interrupts_enabled(boolean_t enable);
84 boolean_t ml_early_set_interrupts_enabled(boolean_t enable);
85
86 /*
87 * Functions for disabling measurements for AppleCLPC only.
88 */
89 boolean_t sched_perfcontrol_ml_set_interrupts_without_measurement(boolean_t enable);
90 void sched_perfcontrol_abandon_preemption_disable_measurement(void);
91
92 /* Check if running at interrupt context */
93 boolean_t ml_at_interrupt_context(void);
94
95
96 /* Generate a fake interrupt */
97 void ml_cause_interrupt(void);
98
99 void siq_init(void);
100 void siq_cpu_init(void);
101
102 #ifdef XNU_KERNEL_PRIVATE
103
104 char ml_get_current_core_type(void);
105
106 /* did this interrupt context interrupt userspace? */
107 bool ml_did_interrupt_userspace(void);
108
109 #if SCHED_HYGIENE_DEBUG
110 void _ml_interrupt_masked_debug_start(uintptr_t handler_addr, int type);
111 void _ml_interrupt_masked_debug_end(void);
112 #endif /* SCHED_HYGIENE_DEBUG */
113
114 static inline void
ml_interrupt_masked_debug_start(void * handler_addr,int type)115 ml_interrupt_masked_debug_start(void *handler_addr, int type)
116 {
117 #if SCHED_HYGIENE_DEBUG
118 if (static_if(sched_debug_interrupt_disable)) {
119 _ml_interrupt_masked_debug_start((uintptr_t)handler_addr, type);
120 }
121 #else /* !SCHED_HYGIENE_DEBUG */
122 #pragma unused(handler_addr, type)
123 #endif /* SCHED_HYGIENE_DEBUG */
124 }
125
126 static inline void
ml_interrupt_masked_debug_end(void)127 ml_interrupt_masked_debug_end(void)
128 {
129 #if SCHED_HYGIENE_DEBUG
130 if (static_if(sched_debug_interrupt_disable)) {
131 _ml_interrupt_masked_debug_end();
132 }
133 #endif /* SCHED_HYGIENE_DEBUG */
134 }
135
136 #if SCHED_HYGIENE_DEBUG
137 void ml_irq_debug_start(uintptr_t handler, uintptr_t vector);
138 void ml_irq_debug_end(void);
139 void ml_irq_debug_abandon(void);
140
141 void ml_spin_debug_reset(thread_t thread);
142 void ml_spin_debug_clear(thread_t thread);
143 void ml_spin_debug_clear_self(void);
144 void ml_handle_interrupts_disabled_duration(thread_t thread);
145 void ml_handle_stackshot_interrupt_disabled_duration(thread_t thread);
146 void ml_handle_interrupt_handler_duration(thread_t thread);
147 #endif /* SCHED_HYGIENE_DEBUG */
148
149 extern bool ml_snoop_thread_is_on_core(thread_t thread);
150 extern boolean_t ml_is_quiescing(void);
151 extern void ml_set_is_quiescing(boolean_t);
152 extern uint64_t ml_get_booter_memory_size(void);
153
154 #endif /* XNU_KERNEL_PRIVATE */
155
156 /* Type for the Time Base Enable function */
157 typedef void (*time_base_enable_t)(cpu_id_t cpu_id, boolean_t enable);
158
159 #if defined(PEXPERT_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE)
160 /* Type for the Processor Cache Dispatch function */
161 typedef void (*cache_dispatch_t)(cpu_id_t cpu_id, unsigned int select, unsigned int param0, unsigned int param1);
162
163 typedef uint32_t (*get_decrementer_t)(void);
164 typedef void (*set_decrementer_t)(uint32_t);
165 typedef void (*fiq_handler_t)(void);
166
167 #endif
168
169 #define CacheConfig 0x00000000UL
170 #define CacheControl 0x00000001UL
171 #define CacheClean 0x00000002UL
172 #define CacheCleanRegion 0x00000003UL
173 #define CacheCleanFlush 0x00000004UL
174 #define CacheCleanFlushRegion 0x00000005UL
175 #define CacheShutdown 0x00000006UL
176
177 #define CacheControlEnable 0x00000000UL
178
179 #define CacheConfigCCSIDR 0x00000001UL
180 #define CacheConfigSize 0x00000100UL
181
182 /* Type for the Processor Idle function */
183 typedef void (*processor_idle_t)(cpu_id_t cpu_id, boolean_t enter, uint64_t *new_timeout_ticks);
184
185 /* Type for the Idle Tickle function */
186 typedef void (*idle_tickle_t)(void);
187
188 /* Type for the Idle Timer function */
189 typedef void (*idle_timer_t)(void *refcon, uint64_t *new_timeout_ticks);
190
191 /* Type for the IPI Hander */
192 typedef void (*ipi_handler_t)(void);
193
194 /* Type for the Lockdown Hander */
195 typedef void (*lockdown_handler_t)(void *);
196
197 /* Type for the Platform specific Error Handler */
198 typedef void (*platform_error_handler_t)(void *refcon, vm_offset_t fault_addr);
199
200 /*
201 * The exception callback (ex_cb) module is obsolete. Some definitions related
202 * to ex_cb were exported through the SDK, and are only left here for historical
203 * reasons.
204 */
205
206 /* Unused. Left for historical reasons. */
207 typedef enum{
208 EXCB_CLASS_ILLEGAL_INSTR_SET,
209 #ifdef CONFIG_XNUPOST
210 EXCB_CLASS_TEST1,
211 EXCB_CLASS_TEST2,
212 EXCB_CLASS_TEST3,
213 #endif
214 EXCB_CLASS_MAX
215 }
216 ex_cb_class_t;
217
218 /* Unused. Left for historical reasons. */
219 typedef enum{
220 EXCB_ACTION_RERUN,
221 EXCB_ACTION_NONE,
222 #ifdef CONFIG_XNUPOST
223 EXCB_ACTION_TEST_FAIL,
224 #endif
225 }
226 ex_cb_action_t;
227
228 /* Unused. Left for historical reasons. */
229 typedef struct{
230 vm_offset_t far;
231 }
232 ex_cb_state_t;
233
234 /* Unused. Left for historical reasons. */
235 typedef ex_cb_action_t (*ex_cb_t) (
236 ex_cb_class_t cb_class,
237 void *refcon,
238 const ex_cb_state_t *state
239 );
240
241 /*
242 * This function is unimplemented. Its definition is left for historical
243 * reasons.
244 */
245 kern_return_t ex_cb_register(
246 ex_cb_class_t cb_class,
247 ex_cb_t cb,
248 void *refcon );
249
250 /*
251 * This function is unimplemented. Its definition is left for historical
252 * reasons.
253 */
254 ex_cb_action_t ex_cb_invoke(
255 ex_cb_class_t cb_class,
256 vm_offset_t far);
257
258 typedef enum {
259 CLUSTER_TYPE_INVALID = -1,
260 CLUSTER_TYPE_SMP = 0,
261 CLUSTER_TYPE_E = 1,
262 CLUSTER_TYPE_P = 2,
263 MAX_CPU_TYPES,
264 } cluster_type_t;
265
266 #ifdef XNU_KERNEL_PRIVATE
267 void ml_parse_cpu_topology(void);
268 #endif /* XNU_KERNEL_PRIVATE */
269
270 unsigned int ml_get_cpu_count(void);
271
272 unsigned int ml_get_cpu_number_type(cluster_type_t cluster_type, bool logical, bool available);
273
274 unsigned int ml_get_cluster_number_type(cluster_type_t cluster_type);
275
276 unsigned int ml_cpu_cache_sharing(unsigned int level, cluster_type_t cluster_type, bool include_all_cpu_types);
277
278 unsigned int ml_get_cpu_types(void);
279
280 int ml_get_boot_cpu_number(void);
281
282 int ml_get_cpu_number(uint32_t phys_id);
283
284 unsigned int ml_get_cpu_number_local(void);
285
286 int ml_get_cluster_number(uint32_t phys_id);
287
288 int ml_get_max_cpu_number(void);
289
290 int ml_get_max_cluster_number(void);
291
292 /*
293 * Return the id of a cluster's first cpu.
294 */
295 unsigned int ml_get_first_cpu_id(unsigned int cluster_id);
296
297 /*
298 * Return the die id of a cluster.
299 */
300 unsigned int ml_get_die_id(unsigned int cluster_id);
301
302 /*
303 * Return the index of a cluster in its die.
304 */
305 unsigned int ml_get_die_cluster_id(unsigned int cluster_id);
306
307 /*
308 * Return the highest die id of the system.
309 */
310 unsigned int ml_get_max_die_id(void);
311
312 #ifdef __arm64__
313 int ml_get_cluster_number_local(void);
314 #endif /* __arm64__ */
315
316 /* Struct for ml_cpu_get_info */
317 struct ml_cpu_info {
318 unsigned long vector_unit;
319 unsigned long cache_line_size;
320 unsigned long l1_icache_size;
321 unsigned long l1_dcache_size;
322 unsigned long l2_settings;
323 unsigned long l2_cache_size;
324 unsigned long l3_settings;
325 unsigned long l3_cache_size;
326 };
327 typedef struct ml_cpu_info ml_cpu_info_t;
328
329 cluster_type_t ml_get_boot_cluster_type(void);
330
331 #ifdef KERNEL_PRIVATE
332 #include "cpu_topology.h"
333 #endif /* KERNEL_PRIVATE */
334
335 /*!
336 * @function ml_map_cpu_pio
337 * @brief Maps per-CPU and per-cluster PIO registers found in EDT. This needs to be
338 * called after arm_vm_init() so it can't be part of ml_parse_cpu_topology().
339 */
340 void ml_map_cpu_pio(void);
341
342 /* Struct for ml_processor_register */
343 struct ml_processor_info {
344 cpu_id_t cpu_id;
345 vm_offset_t start_paddr;
346 boolean_t supports_nap;
347 void *platform_cache_dispatch;
348 time_base_enable_t time_base_enable;
349 processor_idle_t processor_idle;
350 idle_tickle_t *idle_tickle;
351 idle_timer_t idle_timer;
352 void *idle_timer_refcon;
353 vm_offset_t powergate_stub_addr;
354 uint32_t powergate_stub_length;
355 uint32_t powergate_latency;
356 platform_error_handler_t platform_error_handler;
357 uint64_t regmap_paddr;
358 uint32_t phys_id;
359 uint32_t log_id;
360 uint32_t l2_access_penalty; /* unused */
361 uint32_t cluster_id;
362
363 cluster_type_t cluster_type;
364
365 uint32_t l2_cache_id;
366 uint32_t l2_cache_size;
367 uint32_t l3_cache_id;
368 uint32_t l3_cache_size;
369 };
370 typedef struct ml_processor_info ml_processor_info_t;
371
372 #if defined(PEXPERT_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE)
373 /* Struct for ml_init_timebase */
374 struct tbd_ops {
375 fiq_handler_t tbd_fiq_handler;
376 get_decrementer_t tbd_get_decrementer;
377 set_decrementer_t tbd_set_decrementer;
378 };
379 typedef struct tbd_ops *tbd_ops_t;
380 typedef struct tbd_ops tbd_ops_data_t;
381 #endif
382
383
384 /*!
385 * @function ml_processor_register
386 *
387 * @abstract callback from platform kext to register processor
388 *
389 * @discussion This function is called by the platform kext when a processor is
390 * being registered. This is called while running on the CPU itself, as part of
391 * its initialization.
392 *
393 * @param ml_processor_info provides machine-specific information about the
394 * processor to xnu.
395 *
396 * @param processor is set as an out-parameter to an opaque handle that should
397 * be used by the platform kext when referring to this processor in the future.
398 *
399 * @param ipi_handler is set as an out-parameter to the function that should be
400 * registered as the IPI handler.
401 *
402 * @param pmi_handler is set as an out-parameter to the function that should be
403 * registered as the PMI handler.
404 *
405 * @returns KERN_SUCCESS on success and an error code, otherwise.
406 */
407 kern_return_t ml_processor_register(ml_processor_info_t *ml_processor_info,
408 processor_t *processor, ipi_handler_t *ipi_handler,
409 perfmon_interrupt_handler_func *pmi_handler);
410
411 /* Register a lockdown handler */
412 kern_return_t ml_lockdown_handler_register(lockdown_handler_t, void *);
413
414 /* Register a M$ flushing */
415 typedef kern_return_t (*mcache_flush_function)(void *service);
416 kern_return_t ml_mcache_flush_callback_register(mcache_flush_function func, void *service);
417 kern_return_t ml_mcache_flush(void);
418
419 #if XNU_KERNEL_PRIVATE
420
421 void ml_lockdown_init(void);
422
423 /* Machine layer routine for intercepting panics */
424 __printflike(1, 0)
425 void ml_panic_trap_to_debugger(const char *panic_format_str,
426 va_list *panic_args,
427 unsigned int reason,
428 void *ctx,
429 uint64_t panic_options_mask,
430 unsigned long panic_caller,
431 const char *panic_initiator);
432
433 /* Initialize Interrupts */
434 void ml_install_interrupt_handler(
435 void *nub,
436 int source,
437 void *target,
438 IOInterruptHandler handler,
439 void *refCon);
440
441 #endif /* XNU_KERNEL_PRIVATE */
442
443 vm_offset_t
444 ml_static_vtop(
445 vm_offset_t);
446
447 kern_return_t
448 ml_static_verify_page_protections(
449 uint64_t base, uint64_t size, vm_prot_t prot);
450
451 vm_offset_t
452 ml_static_ptovirt(
453 vm_offset_t);
454
455 /* Offset required to obtain absolute time value from tick counter */
456 uint64_t ml_get_abstime_offset(void);
457
458 /* Offset required to obtain continuous time value from tick counter */
459 uint64_t ml_get_conttime_offset(void);
460
461 #ifdef __APPLE_API_UNSTABLE
462 /* PCI config cycle probing */
463 boolean_t ml_probe_read(
464 vm_offset_t paddr,
465 unsigned int *val);
466 boolean_t ml_probe_read_64(
467 addr64_t paddr,
468 unsigned int *val);
469
470 /* Read physical address byte */
471 unsigned int ml_phys_read_byte(
472 vm_offset_t paddr);
473 unsigned int ml_phys_read_byte_64(
474 addr64_t paddr);
475
476 /* Read physical address half word */
477 unsigned int ml_phys_read_half(
478 vm_offset_t paddr);
479 unsigned int ml_phys_read_half_64(
480 addr64_t paddr);
481
482 /* Read physical address word*/
483 unsigned int ml_phys_read(
484 vm_offset_t paddr);
485 unsigned int ml_phys_read_64(
486 addr64_t paddr);
487 unsigned int ml_phys_read_word(
488 vm_offset_t paddr);
489 unsigned int ml_phys_read_word_64(
490 addr64_t paddr);
491
492 /* Read physical address double word */
493 unsigned long long ml_phys_read_double(
494 vm_offset_t paddr);
495 unsigned long long ml_phys_read_double_64(
496 addr64_t paddr);
497
498 /* Write physical address byte */
499 void ml_phys_write_byte(
500 vm_offset_t paddr, unsigned int data);
501 void ml_phys_write_byte_64(
502 addr64_t paddr, unsigned int data);
503
504 /* Write physical address half word */
505 void ml_phys_write_half(
506 vm_offset_t paddr, unsigned int data);
507 void ml_phys_write_half_64(
508 addr64_t paddr, unsigned int data);
509
510 /* Write physical address word */
511 void ml_phys_write(
512 vm_offset_t paddr, unsigned int data);
513 void ml_phys_write_64(
514 addr64_t paddr, unsigned int data);
515 void ml_phys_write_word(
516 vm_offset_t paddr, unsigned int data);
517 void ml_phys_write_word_64(
518 addr64_t paddr, unsigned int data);
519
520 /* Write physical address double word */
521 void ml_phys_write_double(
522 vm_offset_t paddr, unsigned long long data);
523 void ml_phys_write_double_64(
524 addr64_t paddr, unsigned long long data);
525
526 #if defined(__SIZEOF_INT128__) && APPLE_ARM64_ARCH_FAMILY
527 /*
528 * Not all dependent projects consuming `machine_routines.h` are built using
529 * toolchains that support 128-bit integers.
530 */
531 #define BUILD_QUAD_WORD_FUNCS 1
532 #else
533 #define BUILD_QUAD_WORD_FUNCS 0
534 #endif /* defined(__SIZEOF_INT128__) && APPLE_ARM64_ARCH_FAMILY */
535
536 #if BUILD_QUAD_WORD_FUNCS
537 /*
538 * Not all dependent projects have their own typedef of `uint128_t` at the
539 * time they consume `machine_routines.h`.
540 */
541 typedef unsigned __int128 uint128_t;
542
543 /* Read physical address quad word */
544 uint128_t ml_phys_read_quad(
545 vm_offset_t paddr);
546 uint128_t ml_phys_read_quad_64(
547 addr64_t paddr);
548
549 /* Write physical address quad word */
550 void ml_phys_write_quad(
551 vm_offset_t paddr, uint128_t data);
552 void ml_phys_write_quad_64(
553 addr64_t paddr, uint128_t data);
554 #endif /* BUILD_QUAD_WORD_FUNCS */
555
556 void ml_static_mfree(
557 vm_offset_t,
558 vm_size_t);
559
560 kern_return_t
561 ml_static_protect(
562 vm_offset_t start,
563 vm_size_t size,
564 vm_prot_t new_prot);
565
566 /* virtual to physical on wired pages */
567 vm_offset_t ml_vtophys(
568 vm_offset_t vaddr);
569
570 /* Get processor cache info */
571 void ml_cpu_get_info(ml_cpu_info_t *ml_cpu_info);
572 void ml_cpu_get_info_type(ml_cpu_info_t * ml_cpu_info, cluster_type_t cluster_type);
573
574 #endif /* __APPLE_API_UNSTABLE */
575
576 typedef int ml_page_protection_t;
577
578 /* Return the type of page protection supported */
579 ml_page_protection_t ml_page_protection_type(void);
580
581 #ifdef __APPLE_API_PRIVATE
582 #ifdef XNU_KERNEL_PRIVATE
583 vm_size_t ml_nofault_copy(
584 vm_offset_t virtsrc,
585 vm_offset_t virtdst,
586 vm_size_t size);
587 boolean_t ml_validate_nofault(
588 vm_offset_t virtsrc, vm_size_t size);
589 #endif /* XNU_KERNEL_PRIVATE */
590 #if defined(PEXPERT_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE)
591 /* IO memory map services */
592
593 extern vm_offset_t io_map(
594 vm_map_offset_t phys_addr,
595 vm_size_t size,
596 unsigned int flags,
597 vm_prot_t prot,
598 bool unmappable);
599
600 /* Map memory map IO space */
601 vm_offset_t ml_io_map(
602 vm_offset_t phys_addr,
603 vm_size_t size);
604
605 vm_offset_t ml_io_map_wcomb(
606 vm_offset_t phys_addr,
607 vm_size_t size);
608
609 vm_offset_t ml_io_map_unmappable(
610 vm_offset_t phys_addr,
611 vm_size_t size,
612 uint32_t flags);
613
614 vm_offset_t ml_io_map_with_prot(
615 vm_offset_t phys_addr,
616 vm_size_t size,
617 vm_prot_t prot);
618
619 void ml_io_unmap(
620 vm_offset_t addr,
621 vm_size_t sz);
622
623 void ml_get_bouncepool_info(
624 vm_offset_t *phys_addr,
625 vm_size_t *size);
626
627 vm_map_address_t ml_map_high_window(
628 vm_offset_t phys_addr,
629 vm_size_t len);
630
631 void ml_init_timebase(
632 void *args,
633 tbd_ops_t tbd_funcs,
634 vm_offset_t int_address,
635 vm_offset_t int_value);
636
637 uint64_t ml_get_timebase(void);
638
639 #if MACH_KERNEL_PRIVATE
640 void ml_memory_to_timebase_fence(void);
641 void ml_timebase_to_memory_fence(void);
642 #endif /* MACH_KERNEL_PRIVATE */
643
644 uint64_t ml_get_speculative_timebase(void);
645
646 uint64_t ml_get_timebase_entropy(void);
647
648 boolean_t ml_delay_should_spin(uint64_t interval);
649
650 void ml_delay_on_yield(void);
651
652 uint32_t ml_get_decrementer(void);
653
654 #include <machine/config.h>
655
656 uint64_t ml_get_hwclock(void);
657
658 uint64_t ml_get_hwclock_speculative(void);
659
660 #ifdef __arm64__
661 boolean_t ml_get_timer_pending(void);
662 #endif
663
664 void platform_syscall(
665 struct arm_saved_state *);
666
667 void ml_set_decrementer(
668 uint32_t dec_value);
669
670 boolean_t is_user_contex(
671 void);
672
673 void ml_init_arm_debug_interface(void *args, vm_offset_t virt_address);
674
675 /* These calls are only valid if __ARM_USER_PROTECT__ is defined */
676 uintptr_t arm_user_protect_begin(
677 thread_t thread);
678
679 void arm_user_protect_end(
680 thread_t thread,
681 uintptr_t up,
682 boolean_t disable_interrupts);
683
684 #endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE */
685
686 /* Zero bytes starting at a physical address */
687 void bzero_phys(
688 addr64_t phys_address,
689 vm_size_t length);
690
691 void bzero_phys_nc(addr64_t src64, vm_size_t bytes);
692
693 void bzero_phys_with_options(addr64_t src, vm_size_t bytes, int options);
694
695 #if MACH_KERNEL_PRIVATE
696 #ifdef __arm64__
697 /* Pattern-fill buffer with zeros or a 32-bit pattern;
698 * target must be 128-byte aligned and sized a multiple of 128
699 * Both variants emit stores with non-temporal properties.
700 */
701 void fill32_dczva(addr64_t, vm_size_t);
702 void fill32_nt(addr64_t, vm_size_t, uint32_t);
703 bool cpu_interrupt_is_pending(void);
704
705 #endif // __arm64__
706 #endif // MACH_KERNEL_PRIVATE
707
708 void ml_thread_policy(
709 thread_t thread,
710 unsigned policy_id,
711 unsigned policy_info);
712
713 #define MACHINE_GROUP 0x00000001
714 #define MACHINE_NETWORK_GROUP 0x10000000
715 #define MACHINE_NETWORK_WORKLOOP 0x00000001
716 #define MACHINE_NETWORK_NETISR 0x00000002
717
718 /* Set the maximum number of CPUs */
719 void ml_set_max_cpus(
720 unsigned int max_cpus);
721
722 /* Return the maximum number of CPUs set by ml_set_max_cpus(), waiting if necessary */
723 unsigned int ml_wait_max_cpus(
724 void);
725
726 /* Return the maximum memory size */
727 unsigned int ml_get_machine_mem(void);
728
729 #ifdef XNU_KERNEL_PRIVATE
730 /* Return max offset */
731 vm_map_offset_t ml_get_max_offset(
732 boolean_t is64,
733 unsigned int option);
734 #define MACHINE_MAX_OFFSET_DEFAULT 0x01
735 #define MACHINE_MAX_OFFSET_MIN 0x02
736 #define MACHINE_MAX_OFFSET_MAX 0x04
737 #define MACHINE_MAX_OFFSET_DEVICE 0x08
738 #endif
739
740 extern void ml_cpu_init_completed(void);
741 extern void ml_cpu_up(void);
742 extern void ml_cpu_down(void);
743 extern int ml_find_next_up_processor(void);
744
745 /*
746 * The update to CPU counts needs to be separate from other actions
747 * in ml_cpu_up() and ml_cpu_down()
748 * because we don't update the counts when CLPC causes temporary
749 * cluster powerdown events, as these must be transparent to the user.
750 */
751 extern void ml_cpu_up_update_counts(int cpu_id);
752 extern void ml_cpu_down_update_counts(int cpu_id);
753 extern void ml_arm_sleep(void);
754
755 extern uint64_t ml_get_wake_timebase(void);
756 extern uint64_t ml_get_conttime_wake_time(void);
757
758 /* Time since the system was reset (as part of boot/wake) */
759 uint64_t ml_get_time_since_reset(void);
760
761 /*
762 * Called by ApplePMGR to set wake time. Units and epoch are identical
763 * to mach_continuous_time(). Has no effect on !HAS_CONTINUOUS_HWCLOCK
764 * chips. If wake_time == UINT64_MAX, that means the wake time is
765 * unknown and calls to ml_get_time_since_reset() will return UINT64_MAX.
766 */
767 void ml_set_reset_time(uint64_t wake_time);
768
769 #ifdef XNU_KERNEL_PRIVATE
770 /* Just a stub on ARM */
771 extern kern_return_t ml_interrupt_prewarm(uint64_t deadline);
772 #define TCOAL_DEBUG(x, a, b, c, d, e) do { } while(0)
773 #endif /* XNU_KERNEL_PRIVATE */
774
775 /* Bytes available on current stack */
776 vm_offset_t ml_stack_remaining(void);
777
778 #ifdef MACH_KERNEL_PRIVATE
779 uint32_t get_fpscr(void);
780 void set_fpscr(uint32_t);
781 void machine_conf(void);
782 void machine_lockdown(void);
783
784 #ifdef __arm64__
785 unsigned long update_mdscr(unsigned long clear, unsigned long set);
786 #endif /* __arm64__ */
787
788 extern void arm_debug_set_cp14(arm_debug_state_t *debug_state);
789 extern void fiq_context_init(boolean_t enable_fiq);
790
791 extern void reenable_async_aborts(void);
792
793 #ifdef __arm64__
794 uint64_t ml_cluster_wfe_timeout(uint32_t wfe_cluster_id);
795 #endif
796
797 #ifdef MONITOR
798 #define MONITOR_SET_ENTRY 0x800 /* Set kernel entry point from monitor */
799 #define MONITOR_LOCKDOWN 0x801 /* Enforce kernel text/rodata integrity */
800 unsigned long monitor_call(uintptr_t callnum, uintptr_t arg1,
801 uintptr_t arg2, uintptr_t arg3);
802 #endif /* MONITOR */
803
804 #if __ARM_KERNEL_PROTECT__
805 extern void set_vbar_el1(uint64_t);
806 #endif /* __ARM_KERNEL_PROTECT__ */
807
808 #if HAS_MTE
809 extern void arm_mte_tag_generator_init(bool is_boot_cpu);
810 #endif
811
812 #endif /* MACH_KERNEL_PRIVATE */
813
814 extern uint32_t arm_debug_read_dscr(void);
815
816 extern int set_be_bit(void);
817 extern int clr_be_bit(void);
818 extern int be_tracing(void);
819
820 /* Please note that cpu_broadcast_xcall is not as simple is you would like it to be.
821 * It will sometimes put the calling thread to sleep, and it is up to your callback
822 * to wake it up as needed, where "as needed" is defined as "all other CPUs have
823 * called the broadcast func". Look around the kernel for examples, or instead use
824 * cpu_broadcast_xcall_simple() which does indeed act like you would expect, given
825 * the prototype.
826 */
827 typedef void (*broadcastFunc) (void *);
828 unsigned int cpu_broadcast_xcall(uint32_t *, boolean_t, broadcastFunc, void *);
829 unsigned int cpu_broadcast_xcall_simple(boolean_t, broadcastFunc, void *);
830 __result_use_check kern_return_t cpu_xcall(int, broadcastFunc, void *);
831 __result_use_check kern_return_t cpu_immediate_xcall(int, broadcastFunc, void *);
832
833 #ifdef KERNEL_PRIVATE
834
835 /* Interface to be used by the perf. controller to register a callback, in a
836 * single-threaded fashion. The callback will receive notifications of
837 * processor performance quality-of-service changes from the scheduler.
838 */
839
840 #ifdef __arm64__
841 typedef void (*cpu_qos_update_t)(int throughput_qos, uint64_t qos_param1, uint64_t qos_param2);
842 void cpu_qos_update_register(cpu_qos_update_t);
843 #endif /* __arm64__ */
844
845 struct going_on_core {
846 uint64_t thread_id;
847 uint16_t qos_class;
848 uint16_t urgency; /* XCPM compatibility */
849 uint32_t is_32_bit : 1; /* uses 32-bit ISA/register state in userspace (which may differ from address space size) */
850 uint32_t is_kernel_thread : 1;
851 uint64_t thread_group_id;
852 void *thread_group_data;
853 uint64_t scheduling_latency; /* absolute time between when thread was made runnable and this ctx switch */
854 uint64_t start_time;
855 uint64_t scheduling_latency_at_same_basepri;
856 uint32_t energy_estimate_nj; /* return: In nanojoules */
857 /* smaller of the time between last change to base priority and ctx switch and scheduling_latency */
858 };
859 typedef struct going_on_core *going_on_core_t;
860
861 struct going_off_core {
862 uint64_t thread_id;
863 uint32_t energy_estimate_nj; /* return: In nanojoules */
864 uint32_t reserved;
865 uint64_t end_time;
866 uint64_t thread_group_id;
867 void *thread_group_data;
868 };
869 typedef struct going_off_core *going_off_core_t;
870
871 struct thread_group_data {
872 uint64_t thread_group_id;
873 void *thread_group_data;
874 uint32_t thread_group_size;
875 uint32_t thread_group_flags;
876 };
877 typedef struct thread_group_data *thread_group_data_t;
878
879 struct perfcontrol_max_runnable_latency {
880 uint64_t max_scheduling_latencies[4 /* THREAD_URGENCY_MAX */];
881 };
882 typedef struct perfcontrol_max_runnable_latency *perfcontrol_max_runnable_latency_t;
883
884 struct perfcontrol_work_interval {
885 uint64_t thread_id;
886 uint16_t qos_class;
887 uint16_t urgency;
888 uint32_t flags; // notify
889 uint64_t work_interval_id;
890 uint64_t start;
891 uint64_t finish;
892 uint64_t deadline;
893 uint64_t next_start;
894 uint64_t thread_group_id;
895 void *thread_group_data;
896 uint32_t create_flags;
897 };
898 typedef struct perfcontrol_work_interval *perfcontrol_work_interval_t;
899
900 typedef enum {
901 WORK_INTERVAL_START,
902 WORK_INTERVAL_UPDATE,
903 WORK_INTERVAL_FINISH,
904 WORK_INTERVAL_CREATE,
905 WORK_INTERVAL_DEALLOCATE,
906 } work_interval_ctl_t;
907
908 struct perfcontrol_work_interval_instance {
909 work_interval_ctl_t ctl;
910 uint32_t create_flags;
911 uint64_t complexity;
912 uint64_t thread_id;
913 uint64_t work_interval_id;
914 uint64_t instance_id; /* out: start, in: update/finish */
915 uint64_t start;
916 uint64_t finish;
917 uint64_t deadline;
918 uint64_t thread_group_id;
919 void *thread_group_data;
920 };
921 typedef struct perfcontrol_work_interval_instance *perfcontrol_work_interval_instance_t;
922
923 /*
924 * Structure to export per-CPU counters as part of the CLPC callout.
925 * Contains only the fixed CPU counters (instructions and cycles); CLPC
926 * would call back into XNU to get the configurable counters if needed.
927 */
928 struct perfcontrol_cpu_counters {
929 uint64_t instructions;
930 uint64_t cycles;
931 };
932
933 __options_decl(perfcontrol_thread_flags_mask_t, uint64_t, {
934 PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_RR = 1 << 0,
935 PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_NATIVE_FIRST = 1 << 1,
936 });
937
938
939 /*
940 * Structure used to pass information about a thread to CLPC
941 */
942 struct perfcontrol_thread_data {
943 /*
944 * Energy estimate (return value)
945 * The field is populated by CLPC and used to update the
946 * energy estimate of the thread
947 */
948 uint32_t energy_estimate_nj;
949 /* Perfcontrol class for thread */
950 perfcontrol_class_t perfctl_class;
951 /* Thread ID for the thread */
952 uint64_t thread_id;
953 /* Thread Group ID */
954 uint64_t thread_group_id;
955 /*
956 * Scheduling latency for threads at the same base priority.
957 * Calculated by the scheduler and passed into CLPC. The field is
958 * populated only in the thread_data structure for the thread
959 * going on-core.
960 */
961 uint64_t scheduling_latency_at_same_basepri;
962 /* Thread Group data pointer */
963 void *thread_group_data;
964 /* perfctl state pointer */
965 void *perfctl_state;
966 /* Bitmask to indicate which thread flags have been updated as part of the callout */
967 perfcontrol_thread_flags_mask_t thread_flags_mask;
968 /* Actual values for the flags that are getting updated in the callout */
969 perfcontrol_thread_flags_mask_t thread_flags;
970 };
971
972 /*
973 * All callouts from the scheduler are executed with interrupts
974 * disabled. Callouts should be implemented in C with minimal
975 * abstractions, and only use KPI exported by the mach/libkern
976 * symbolset, restricted to routines like spinlocks and atomic
977 * operations and scheduler routines as noted below. Spinlocks that
978 * are used to synchronize data in the perfcontrol_state_t should only
979 * ever be acquired with interrupts disabled, to avoid deadlocks where
980 * an quantum expiration timer interrupt attempts to perform a callout
981 * that attempts to lock a spinlock that is already held.
982 */
983
984 /*
985 * When a processor is switching between two threads (after the
986 * scheduler has chosen a new thread), the low-level platform layer
987 * will call this routine, which should perform required timestamps,
988 * MMIO register reads, or other state switching. No scheduler locks
989 * are held during this callout.
990 *
991 * This function is called with interrupts ENABLED.
992 */
993 typedef void (*sched_perfcontrol_context_switch_t)(perfcontrol_state_t, perfcontrol_state_t);
994
995 /*
996 * Once the processor has switched to the new thread, the offcore
997 * callout will indicate the old thread that is no longer being
998 * run. The thread's scheduler lock is held, so it will not begin
999 * running on another processor (in the case of preemption where it
1000 * remains runnable) until it completes. If the "thread_terminating"
1001 * boolean is TRUE, this will be the last callout for this thread_id.
1002 */
1003 typedef void (*sched_perfcontrol_offcore_t)(perfcontrol_state_t, going_off_core_t /* populated by callee */, boolean_t);
1004
1005 /*
1006 * After the offcore callout and after the old thread can potentially
1007 * start running on another processor, the oncore callout will be
1008 * called with the thread's scheduler lock held. The oncore callout is
1009 * also called any time one of the parameters in the going_on_core_t
1010 * structure changes, like priority/QoS changes, and quantum
1011 * expiration, so the callout must not assume callouts are paired with
1012 * offcore callouts.
1013 */
1014 typedef void (*sched_perfcontrol_oncore_t)(perfcontrol_state_t, going_on_core_t);
1015
1016 /*
1017 * Periodically (on hundreds of ms scale), the scheduler will perform
1018 * maintenance and report the maximum latency for runnable (but not currently
1019 * running) threads for each urgency class.
1020 */
1021 typedef void (*sched_perfcontrol_max_runnable_latency_t)(perfcontrol_max_runnable_latency_t);
1022
1023 /*
1024 * When the kernel receives information about work intervals from userland,
1025 * it is passed along using this callback. No locks are held, although the state
1026 * object will not go away during the callout.
1027 */
1028 typedef void (*sched_perfcontrol_work_interval_notify_t)(perfcontrol_state_t, perfcontrol_work_interval_t);
1029
1030 /*
1031 * Start, update and finish work interval instance with optional complexity estimate.
1032 */
1033 typedef void (*sched_perfcontrol_work_interval_ctl_t)(perfcontrol_state_t, perfcontrol_work_interval_instance_t);
1034
1035 /*
1036 * These callbacks are used when thread groups are added, removed or properties
1037 * updated.
1038 * No blocking allocations (or anything else blocking) are allowed inside these
1039 * callbacks. No locks allowed in these callbacks as well since the kernel might
1040 * be holding the thread/task locks.
1041 */
1042 typedef void (*sched_perfcontrol_thread_group_init_t)(thread_group_data_t);
1043 typedef void (*sched_perfcontrol_thread_group_deinit_t)(thread_group_data_t);
1044 typedef void (*sched_perfcontrol_thread_group_flags_update_t)(thread_group_data_t);
1045
1046 /*
1047 * Sometime after the timeout set by sched_perfcontrol_update_callback_deadline has passed,
1048 * this function will be called, passing the timeout deadline that was previously armed as an argument.
1049 *
1050 * This is called inside context-switch/quantum-interrupt context and must follow the safety rules for that context.
1051 */
1052 typedef void (*sched_perfcontrol_deadline_passed_t)(uint64_t deadline);
1053
1054 /*
1055 * Context Switch Callout
1056 *
1057 * Parameters:
1058 * event - The perfcontrol_event for this callout
1059 * cpu_id - The CPU doing the context switch
1060 * timestamp - The timestamp for the context switch
1061 * flags - Flags for other relevant information
1062 * offcore - perfcontrol_data structure for thread going off-core
1063 * oncore - perfcontrol_data structure for thread going on-core
1064 * cpu_counters - perfcontrol_cpu_counters for the CPU doing the switch
1065 * timeout_ticks - Per core timer timeout
1066 */
1067 typedef void (*sched_perfcontrol_csw_t)(
1068 perfcontrol_event event, uint32_t cpu_id, uint64_t timestamp, uint32_t flags,
1069 struct perfcontrol_thread_data *offcore, struct perfcontrol_thread_data *oncore,
1070 struct perfcontrol_cpu_counters *cpu_counters, uint64_t *timeout_ticks);
1071
1072
1073 /*
1074 * Thread State Update Callout
1075 *
1076 * Parameters:
1077 * event - The perfcontrol_event for this callout
1078 * cpu_id - The CPU doing the state update
1079 * timestamp - The timestamp for the state update
1080 * flags - Flags for other relevant information
1081 * thr_data - perfcontrol_data structure for the thread being updated
1082 * timeout_ticks - Per core timer timeout
1083 */
1084 typedef void (*sched_perfcontrol_state_update_t)(
1085 perfcontrol_event event, uint32_t cpu_id, uint64_t timestamp, uint32_t flags,
1086 struct perfcontrol_thread_data *thr_data, uint64_t *timeout_ticks);
1087
1088 /*
1089 * Thread Group Blocking Relationship Callout
1090 *
1091 * Parameters:
1092 * blocked_tg - Thread group blocking on progress of another thread group
1093 * blocking_tg - Thread group blocking progress of another thread group
1094 * flags - Flags for other relevant information
1095 * blocked_thr_state - Per-thread perfcontrol state for blocked thread
1096 */
1097 typedef void (*sched_perfcontrol_thread_group_blocked_t)(
1098 thread_group_data_t blocked_tg, thread_group_data_t blocking_tg, uint32_t flags, perfcontrol_state_t blocked_thr_state);
1099
1100 /*
1101 * Thread Group Unblocking Callout
1102 *
1103 * Parameters:
1104 * unblocked_tg - Thread group being unblocked from making forward progress
1105 * unblocking_tg - Thread group unblocking progress of another thread group
1106 * flags - Flags for other relevant information
1107 * unblocked_thr_state - Per-thread perfcontrol state for unblocked thread
1108 */
1109 typedef void (*sched_perfcontrol_thread_group_unblocked_t)(
1110 thread_group_data_t unblocked_tg, thread_group_data_t unblocking_tg, uint32_t flags, perfcontrol_state_t unblocked_thr_state);
1111
1112 /*
1113 * Per core timer expired callout
1114 *
1115 * Parameters:
1116 * now - Current time
1117 * flags - Flags for other relevant information
1118 * cpu_id - The CPU for which the timer expired
1119 * timeout_ticks - Per core timer timeout
1120 */
1121 typedef void (*sched_perfcontrol_running_timer_expire_t)(
1122 uint64_t now, uint32_t flags, uint32_t cpu_id, uint64_t *timeout_ticks);
1123
1124
1125 /*
1126 * Callers should always use the CURRENT version so that the kernel can detect both older
1127 * and newer structure layouts. New callbacks should always be added at the end of the
1128 * structure, and xnu should expect existing source recompiled against newer headers
1129 * to pass NULL for unimplemented callbacks. Pass NULL as the as the callbacks parameter
1130 * to reset callbacks to their default in-kernel values.
1131 */
1132
1133 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_0 (0) /* up-to oncore */
1134 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_1 (1) /* up-to max_runnable_latency */
1135 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_2 (2) /* up-to work_interval_notify */
1136 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_3 (3) /* up-to thread_group_deinit */
1137 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_4 (4) /* up-to deadline_passed */
1138 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_5 (5) /* up-to state_update */
1139 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_6 (6) /* up-to thread_group_flags_update */
1140 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_7 (7) /* up-to work_interval_ctl */
1141 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_8 (8) /* up-to thread_group_unblocked */
1142 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_9 (9) /* allows CLPC to specify resource contention flags */
1143 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_10 (10) /* allows CLPC to register a per core timer callback */
1144 #define SCHED_PERFCONTROL_CALLBACKS_VERSION_CURRENT SCHED_PERFCONTROL_CALLBACKS_VERSION_10
1145
1146 struct sched_perfcontrol_callbacks {
1147 unsigned long version; /* Use SCHED_PERFCONTROL_CALLBACKS_VERSION_CURRENT */
1148 sched_perfcontrol_offcore_t offcore;
1149 sched_perfcontrol_context_switch_t context_switch;
1150 sched_perfcontrol_oncore_t oncore;
1151 sched_perfcontrol_max_runnable_latency_t max_runnable_latency;
1152 sched_perfcontrol_work_interval_notify_t work_interval_notify;
1153 sched_perfcontrol_thread_group_init_t thread_group_init;
1154 sched_perfcontrol_thread_group_deinit_t thread_group_deinit;
1155 sched_perfcontrol_deadline_passed_t deadline_passed;
1156 sched_perfcontrol_csw_t csw;
1157 sched_perfcontrol_state_update_t state_update;
1158 sched_perfcontrol_thread_group_flags_update_t thread_group_flags_update;
1159 sched_perfcontrol_work_interval_ctl_t work_interval_ctl;
1160 sched_perfcontrol_thread_group_blocked_t thread_group_blocked;
1161 sched_perfcontrol_thread_group_unblocked_t thread_group_unblocked;
1162 sched_perfcontrol_running_timer_expire_t running_timer_expire;
1163 };
1164 typedef struct sched_perfcontrol_callbacks *sched_perfcontrol_callbacks_t;
1165
1166 extern void sched_perfcontrol_register_callbacks(sched_perfcontrol_callbacks_t callbacks, unsigned long size_of_state);
1167 extern void sched_perfcontrol_thread_group_recommend(void *data, cluster_type_t recommendation);
1168 extern void sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit);
1169 extern const char* sched_perfcontrol_thread_group_get_name(void *data);
1170
1171 /*
1172 * Edge Scheduler-CLPC Interface
1173 *
1174 * sched_perfcontrol_thread_group_preferred_clusters_set()
1175 *
1176 * The Edge scheduler expects thread group recommendations to be specific clusters rather
1177 * than just E/P. In order to allow more fine grained control, CLPC can specify an override
1178 * preferred cluster per QoS bucket. CLPC passes a common preferred cluster `tg_preferred_cluster`
1179 * and an array of size [PERFCONTROL_CLASS_MAX] with overrides for specific perfctl classes.
1180 * The scheduler translates these preferences into sched_bucket
1181 * preferences and applies the changes.
1182 *
1183 */
1184 /* Token to indicate a particular perfctl class is not overriden */
1185 #define SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE ((uint32_t)~0)
1186
1187 /*
1188 * CLPC can also indicate if there should be an immediate rebalancing of threads of this TG as
1189 * part of this preferred cluster change. It does that by specifying the following options.
1190 */
1191 #define SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING 0x1
1192 #define SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNABLE 0x2
1193 typedef uint64_t sched_perfcontrol_preferred_cluster_options_t;
1194
1195 extern void sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
1196 uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options);
1197
1198 /*
1199 * Edge Scheduler-CLPC Interface
1200 *
1201 * sched_perfcontrol_edge_matrix_by_qos_get()/sched_perfcontrol_edge_matrix_by_qos_set()
1202 *
1203 * For each QoS, the Edge scheduler uses edges between clusters to define the likelihood of
1204 * migrating threads of that QoS across the clusters. The edge config between any two clusters
1205 * defines the edge weight and whether migation and steal operations are allowed across that
1206 * edge. The getter and setter allow CLPC to query and configure edge properties between various
1207 * clusters on the platform.
1208 *
1209 * The edge_matrix is a flattened array of dimension num_psets X num_psets X num_classes, where
1210 * num_classes equals PERFCONTROL_CLASS_MAX and the scheduler will map perfcontrol classes onto
1211 * QoS buckets. For perfcontrol classes lacking an equivalent QoS bucket, the "set" operation is
1212 * a no-op, and the "get" operation returns zeroed edges.
1213 */
1214
1215 extern void sched_perfcontrol_edge_matrix_by_qos_get(sched_clutch_edge *edge_matrix, bool *edge_requested, uint64_t flags, uint64_t num_psets, uint64_t num_classes);
1216 extern void sched_perfcontrol_edge_matrix_by_qos_set(sched_clutch_edge *edge_matrix, bool *edge_changed, uint64_t flags, uint64_t num_psets, uint64_t num_classes);
1217
1218 /*
1219 * sched_perfcontrol_edge_matrix_get()/sched_perfcontrol_edge_matrix_set()
1220 *
1221 * Legacy interface for getting/setting the edge config properties, which determine the edge
1222 * weight and whether steal and migration are allowed between any two clusters. Since the
1223 * edge matrix has a per-QoS dimension, sched_perfcontrol_edge_matrix_set() sets the
1224 * configuration to be the same across all QoSes. sched_perfcontrol_edge_matrix_get() reads
1225 * the edge matrix setting from the highest QoS (fixed priority).
1226 *
1227 * Superceded by sched_perfcontrol_edge_matrix_by_qos_get()/sched_perfcontrol_edge_matrix_by_qos_set()
1228 */
1229
1230 extern void sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_requested, uint64_t flags, uint64_t matrix_order);
1231 extern void sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changed, uint64_t flags, uint64_t matrix_order);
1232
1233 /*
1234 * sched_perfcontrol_edge_cpu_rotation_bitmasks_get()/sched_perfcontrol_edge_cpu_rotation_bitmasks_set()
1235 *
1236 * In order to drive intra-cluster core rotation CLPC supplies the edge scheduler with a pair of
1237 * per-cluster bitmasks. The preferred_bitmask is a bitmask of CPU cores where if a bit is set,
1238 * CLPC would prefer threads to be scheduled on that core if it is idle. The migration_bitmask
1239 * is a bitmask of CPU cores where if a bit is set, CLPC would prefer threads no longer continue
1240 * running on that core if there is any other non-avoided idle core in the cluster that is available.
1241 */
1242
1243 extern void sched_perfcontrol_edge_cpu_rotation_bitmasks_set(uint32_t cluster_id, uint64_t preferred_bitmask, uint64_t migration_bitmask);
1244 extern void sched_perfcontrol_edge_cpu_rotation_bitmasks_get(uint32_t cluster_id, uint64_t *preferred_bitmask, uint64_t *migration_bitmask);
1245
1246 /*
1247 * Update the deadline after which sched_perfcontrol_deadline_passed will be called.
1248 * Returns TRUE if it successfully canceled a previously set callback,
1249 * and FALSE if it did not (i.e. one wasn't set, or callback already fired / is in flight).
1250 * The callback is automatically canceled when it fires, and does not repeat unless rearmed.
1251 *
1252 * This 'timer' executes as the scheduler switches between threads, on a non-idle core
1253 *
1254 * There can be only one outstanding timer globally.
1255 */
1256 extern boolean_t sched_perfcontrol_update_callback_deadline(uint64_t deadline);
1257
1258 /*
1259 * SFI configuration.
1260 */
1261 extern kern_return_t sched_perfcontrol_sfi_set_window(uint64_t window_usecs);
1262 extern kern_return_t sched_perfcontrol_sfi_set_bg_offtime(uint64_t offtime_usecs);
1263 extern kern_return_t sched_perfcontrol_sfi_set_utility_offtime(uint64_t offtime_usecs);
1264
1265 typedef enum perfcontrol_callout_type {
1266 PERFCONTROL_CALLOUT_ON_CORE,
1267 PERFCONTROL_CALLOUT_OFF_CORE,
1268 PERFCONTROL_CALLOUT_CONTEXT,
1269 PERFCONTROL_CALLOUT_STATE_UPDATE,
1270 /* Add other callout types here */
1271 PERFCONTROL_CALLOUT_MAX
1272 } perfcontrol_callout_type_t;
1273
1274 typedef enum perfcontrol_callout_stat {
1275 PERFCONTROL_STAT_INSTRS,
1276 PERFCONTROL_STAT_CYCLES,
1277 /* Add other stat types here */
1278 PERFCONTROL_STAT_MAX
1279 } perfcontrol_callout_stat_t;
1280
1281 uint64_t perfcontrol_callout_stat_avg(perfcontrol_callout_type_t type,
1282 perfcontrol_callout_stat_t stat);
1283
1284 #ifdef __arm64__
1285 /* The performance controller may use this interface to recommend
1286 * that CPUs in the designated cluster employ WFE rather than WFI
1287 * within the idle loop, falling back to WFI after the specified
1288 * timeout. The updates are expected to be serialized by the caller,
1289 * the implementation is not required to perform internal synchronization.
1290 */
1291 uint32_t ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, uint64_t wfe_hint_flags);
1292 #endif /* __arm64__ */
1293
1294 #if defined(HAS_APPLE_PAC)
1295 #define ONES(x) (BIT((x))-1)
1296 #define PTR_MASK ONES(64-T1SZ_BOOT)
1297 #define PAC_MASK ~PTR_MASK
1298 #define SIGN(p) ((p) & BIT(55))
1299 #define UNSIGN_PTR(p) \
1300 SIGN(p) ? ((p) | PAC_MASK) : ((p) & ~PAC_MASK)
1301
1302 uint64_t ml_default_rop_pid(void);
1303 uint64_t ml_default_jop_pid(void);
1304 uint64_t ml_non_arm64e_user_jop_pid(void);
1305 void ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit);
1306 void ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit, boolean_t disable_user_jop);
1307 void ml_task_set_jop_pid_from_shared_region(task_t task, boolean_t disable_user_jop);
1308 uint8_t ml_task_get_disable_user_jop(task_t task);
1309 void ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop);
1310 void ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop);
1311 void ml_thread_set_jop_pid(thread_t thread, task_t task);
1312
1313 #if !__has_ptrcheck
1314
1315 /*
1316 * There are two implementations of _ml_auth_ptr_unchecked(). Non-FPAC CPUs
1317 * take a fast path that directly auths the pointer, relying on the CPU to
1318 * poison invald pointers without trapping. FPAC CPUs take a slower path which
1319 * emulates a non-trapping auth using strip + sign + compare, and manually
1320 * poisons the output when necessary.
1321 *
1322 * The FPAC implementation is also safe for non-FPAC CPUs, but less efficient;
1323 * guest kernels need to use it because it does not know at compile time whether
1324 * the host CPU supports FPAC.
1325 */
1326
1327 void *
1328 ml_poison_ptr(void *ptr, ptrauth_key key);
1329
1330 #if __ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM
1331 /*
1332 * ptrauth_sign_unauthenticated() reimplemented using asm volatile, forcing the
1333 * compiler to assume this operation has side-effects and cannot be reordered
1334 */
1335 #define ptrauth_sign_volatile(__value, __suffix, __data) \
1336 ({ \
1337 void *__ret = __value; \
1338 asm volatile ( \
1339 "pac" #__suffix " %[value], %[data]" \
1340 : [value] "+r"(__ret) \
1341 : [data] "r"(__data) \
1342 ); \
1343 __ret; \
1344 })
1345
1346 #define ml_auth_ptr_unchecked_for_key(_ptr, _suffix, _key, _modifier) \
1347 do { \
1348 void *stripped = ptrauth_strip(_ptr, _key); \
1349 void *reauthed = ptrauth_sign_volatile(stripped, _suffix, _modifier); \
1350 if (__probable(_ptr == reauthed)) { \
1351 _ptr = stripped; \
1352 } else { \
1353 _ptr = ml_poison_ptr(stripped, _key); \
1354 } \
1355 } while (0)
1356
1357 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
1358 ml_auth_ptr_unchecked_for_key(_ptr, _suffix, ptrauth_key_as ## _suffix, _modifier)
1359 #else
1360 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
1361 asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
1362 #endif /* __ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM */
1363
1364 /**
1365 * Authenticates a signed pointer without trapping on failure.
1366 *
1367 * @warning This function must be called with interrupts disabled.
1368 *
1369 * @warning Pointer authentication failure should normally be treated as a fatal
1370 * error. This function is intended for a handful of callers that cannot panic
1371 * on failure, and that understand the risks in handling a poisoned return
1372 * value. Other code should generally use the trapping variant
1373 * ptrauth_auth_data() instead.
1374 *
1375 * @param ptr the pointer to authenticate
1376 * @param key which key to use for authentication
1377 * @param modifier a modifier to mix into the key
1378 * @return an authenticated version of ptr, possibly with poison bits set
1379 */
1380 static inline OS_ALWAYS_INLINE void *
ml_auth_ptr_unchecked(void * ptr,ptrauth_key key,uint64_t modifier)1381 ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
1382 {
1383 switch (key & 0x3) {
1384 case ptrauth_key_asia:
1385 _ml_auth_ptr_unchecked(ptr, ia, modifier);
1386 break;
1387 case ptrauth_key_asib:
1388 _ml_auth_ptr_unchecked(ptr, ib, modifier);
1389 break;
1390 case ptrauth_key_asda:
1391 _ml_auth_ptr_unchecked(ptr, da, modifier);
1392 break;
1393 case ptrauth_key_asdb:
1394 _ml_auth_ptr_unchecked(ptr, db, modifier);
1395 break;
1396 }
1397
1398 return ptr;
1399 }
1400
1401 #endif /* !__has_ptrcheck */
1402
1403 uint64_t ml_enable_user_jop_key(uint64_t user_jop_key);
1404
1405 /**
1406 * Restores the previous JOP key state after a previous ml_enable_user_jop_key()
1407 * call.
1408 *
1409 * @param user_jop_key The userspace JOP key previously passed to
1410 * ml_enable_user_jop_key()
1411 * @param saved_jop_state The saved JOP state returned by
1412 * ml_enable_user_jop_key()
1413 */
1414 void ml_disable_user_jop_key(uint64_t user_jop_key, uint64_t saved_jop_state);
1415 #endif /* defined(HAS_APPLE_PAC) */
1416
1417 void ml_enable_monitor(void);
1418
1419 #endif /* KERNEL_PRIVATE */
1420
1421 boolean_t machine_timeout_suspended(void);
1422 void ml_get_power_state(boolean_t *, boolean_t *);
1423
1424 uint32_t get_arm_cpu_version(void);
1425 boolean_t user_cont_hwclock_allowed(void);
1426 uint8_t user_timebase_type(void);
1427 boolean_t ml_thread_is64bit(thread_t thread);
1428
1429 #ifdef __arm64__
1430 bool ml_feature_supported(uint64_t feature_bit);
1431 void ml_set_align_checking(void);
1432 extern void wfe_timeout_configure(void);
1433 extern void wfe_timeout_init(void);
1434 #endif /* __arm64__ */
1435
1436 void ml_timer_evaluate(void);
1437 boolean_t ml_timer_forced_evaluation(void);
1438 void ml_gpu_stat_update(uint64_t);
1439 uint64_t ml_gpu_stat(thread_t);
1440 #endif /* __APPLE_API_PRIVATE */
1441
1442
1443
1444 #if __arm64__ && defined(CONFIG_XNUPOST) && defined(XNU_KERNEL_PRIVATE)
1445 extern void ml_expect_fault_begin(expected_fault_handler_t, uintptr_t);
1446 extern void ml_expect_fault_pc_begin(expected_fault_handler_t, uintptr_t);
1447 extern void ml_expect_fault_end(void);
1448 #endif /* __arm64__ && defined(CONFIG_XNUPOST) && defined(XNU_KERNEL_PRIVATE) */
1449
1450 #if defined(HAS_OBJC_BP_HELPER) && defined(XNU_KERNEL_PRIVATE)
1451 kern_return_t objc_bp_assist_cfg(uint64_t adr, uint64_t ctl);
1452 #endif /* defined(HAS_OBJC_BP_HELPER) && defined(XNU_KERNEL_PRIVATE) */
1453
1454 extern uint32_t phy_read_panic;
1455 extern uint32_t phy_write_panic;
1456 #if DEVELOPMENT || DEBUG
1457 extern uint64_t simulate_stretched_io;
1458 #endif
1459
1460 void ml_hibernate_active_pre(void);
1461 void ml_hibernate_active_post(void);
1462
1463 void ml_report_minor_badness(uint32_t badness_id);
1464 #define ML_MINOR_BADNESS_CONSOLE_BUFFER_FULL 0
1465 #define ML_MINOR_BADNESS_MEMFAULT_REPORTING_NOT_ENABLED 1
1466 #define ML_MINOR_BADNESS_PIO_WRITTEN_FROM_USERSPACE 2
1467
1468 #ifdef XNU_KERNEL_PRIVATE
1469 /**
1470 * Depending on the system, by the time a backtracer starts inspecting an
1471 * interrupted CPU's register state, the value of the PC might have been
1472 * modified. In those cases, the original PC value is placed into a different
1473 * register. This function abstracts out those differences for a backtracer
1474 * wanting the PC of an interrupted CPU.
1475 *
1476 * @param state The ARM register state to parse.
1477 *
1478 * @return The original PC of the interrupted CPU.
1479 */
1480 uint64_t ml_get_backtrace_pc(struct arm_saved_state *state);
1481
1482 /**
1483 * Returns whether a secure hibernation flow is supported.
1484 *
1485 * @note Hibernation itself might still be supported even if this function
1486 * returns false. This function just denotes whether a hibernation process
1487 * which securely hashes and stores the hibernation image is supported.
1488 *
1489 * @return True if the kernel supports a secure hibernation process, false
1490 * otherwise.
1491 */
1492 bool ml_is_secure_hib_supported(void);
1493
1494 /**
1495 * Returns whether the task should use 1 GHz timebase.
1496 *
1497 * @return True if the task should use 1 GHz timebase, false
1498 * otherwise.
1499 */
1500 bool ml_task_uses_1ghz_timebase(const task_t task);
1501 #endif /* XNU_KERNEL_PRIVATE */
1502
1503
1504
1505 #if HAS_MTE && XNU_KERNEL_PRIVATE
1506 bool ml_thread_get_sec_override(thread_t thread);
1507 void ml_thread_set_sec_override(thread_t thread, bool sec_override);
1508 #endif /* HAS_MTE && XNU_KERNEL_PRIVATE */
1509
1510 __END_DECLS
1511
1512 #endif /* _ARM_MACHINE_ROUTINES_H_ */
1513