xref: /xnu-10063.121.3/osfmk/arm64/machine_routines.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <arm64/machine_machdep.h>
30 #include <arm64/proc_reg.h>
31 #include <arm/machine_cpu.h>
32 #include <arm/cpu_internal.h>
33 #include <arm/cpuid.h>
34 #include <arm/cpu_data.h>
35 #include <arm/cpu_data_internal.h>
36 #include <arm/caches_internal.h>
37 #include <arm/misc_protos.h>
38 #include <arm/machdep_call.h>
39 #include <arm/machine_routines.h>
40 #include <arm/rtclock.h>
41 #include <arm/cpuid_internal.h>
42 #include <arm/cpu_capabilities.h>
43 #include <console/serial_protos.h>
44 #include <kern/machine.h>
45 #include <kern/misc_protos.h>
46 #include <prng/random.h>
47 #include <kern/startup.h>
48 #include <kern/thread.h>
49 #include <kern/timer_queue.h>
50 #include <mach/machine.h>
51 #include <machine/atomic.h>
52 #include <machine/config.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_shared_region.h>
56 #include <vm/vm_map.h>
57 #include <sys/codesign.h>
58 #include <sys/kdebug.h>
59 #include <kern/coalition.h>
60 #include <pexpert/device_tree.h>
61 
62 #include <IOKit/IOPlatformExpert.h>
63 #if HIBERNATION
64 #include <IOKit/IOHibernatePrivate.h>
65 #endif /* HIBERNATION */
66 
67 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
68 #include <arm64/amcc_rorgn.h>
69 #endif
70 
71 
72 #if CONFIG_SPTM
73 #include <arm64/sptm/sptm.h>
74 #endif /* CONFIG_SPTM */
75 
76 #include <libkern/section_keywords.h>
77 
78 /**
79  * On supported hardware, debuggable builds make the HID bits read-only
80  * without locking them.  This lets people manually modify HID bits while
81  * debugging, since they can use a debugging tool to first reset the HID
82  * bits back to read/write.  However it will still catch xnu changes that
83  * accidentally write to HID bits after they've been made read-only.
84  */
85 SECURITY_READ_ONLY_LATE(bool) skip_spr_lockdown_glb = 0;
86 
87 /*
88  * On some SoCs, PIO lockdown is applied in assembly in early boot by
89  * secondary CPUs.
90  * Since the cluster_pio_ro_ctl value is dynamic, it is stored here by the
91  * primary CPU so that it doesn't have to be computed each time by the
92  * startup code.
93  */
94 SECURITY_READ_ONLY_LATE(uint64_t) cluster_pio_ro_ctl_mask_glb = 0;
95 
96 #if CONFIG_CPU_COUNTERS
97 #include <kern/kpc.h>
98 #endif /* CONFIG_CPU_COUNTERS */
99 
100 #define MPIDR_CPU_ID(mpidr_el1_val)             (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
101 #define MPIDR_CLUSTER_ID(mpidr_el1_val)         (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
102 
103 #if HAS_CLUSTER
104 static uint8_t cluster_initialized = 0;
105 #endif
106 
107 MACHINE_TIMEOUT_DEV_WRITEABLE(LockTimeOut, "lock", 6e6 /* 0.25s */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
108 machine_timeout_t LockTimeOutUsec; // computed in ml_init_lock_timeout
109 
110 MACHINE_TIMEOUT_DEV_WRITEABLE(TLockTimeOut, "ticket-lock", 3e6 /* 0.125s */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
111 
112 MACHINE_TIMEOUT_DEV_WRITEABLE(MutexSpin, "mutex-spin", 240 /* 10us */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
113 
114 uint64_t low_MutexSpin;
115 int64_t high_MutexSpin;
116 
117 
118 
119 static uint64_t ml_wfe_hint_max_interval;
120 #define MAX_WFE_HINT_INTERVAL_US (500ULL)
121 
122 /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
123 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
124 
125 extern vm_offset_t   segLOWEST;
126 extern vm_offset_t   segLOWESTTEXT;
127 extern vm_offset_t   segLASTB;
128 extern unsigned long segSizeLAST;
129 
130 /* ARM64 specific bounds; used to test for presence in the kernelcache. */
131 extern vm_offset_t   vm_kernelcache_base;
132 extern vm_offset_t   vm_kernelcache_top;
133 
134 /* Location of the physmap / physical aperture */
135 extern uint64_t physmap_base;
136 
137 #if defined(CONFIG_SPTM)
138 extern const arm_physrange_t *arm_vm_kernelcache_ranges;
139 extern int arm_vm_kernelcache_numranges;
140 #else /* defined(CONFIG_SPTM) */
141 extern vm_offset_t arm_vm_kernelcache_phys_start;
142 extern vm_offset_t arm_vm_kernelcache_phys_end;
143 #endif /* defined(CONFIG_SPTM) */
144 
145 #if defined(HAS_IPI)
146 unsigned int gFastIPI = 1;
147 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
148 static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
149     kDeferredIPITimerDefault);
150 #endif /* defined(HAS_IPI) */
151 
152 thread_t Idle_context(void);
153 
154 SECURITY_READ_ONLY_LATE(bool) cpu_config_correct = true;
155 
156 SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
157 SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
158 SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
159 	.version = CPU_TOPOLOGY_VERSION,
160 	.cpus = topology_cpu_array,
161 	.clusters = topology_cluster_array,
162 };
163 
164 _Atomic unsigned int cluster_type_num_active_cpus[MAX_CPU_TYPES];
165 
166 /**
167  * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
168  * entries of an arbitrary data type.  This is intended for use by specialized consumers
169  * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
170  * as follows:
171  *	hypothetical_array[cluster_offsets[AFF1] + AFF0]
172  * Most consumers should instead use general-purpose facilities such as PERCPU or
173  * ml_get_cpu_number().
174  */
175 SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
176 
177 SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
178 
179 extern uint32_t lockdown_done;
180 
181 /**
182  * Represents regions of virtual address space that should be reserved
183  * (pre-mapped) in each user address space.
184  */
185 static const struct vm_reserved_region vm_reserved_regions[] = {
186 	{
187 		.vmrr_name = "GPU Carveout",
188 		.vmrr_addr = MACH_VM_MIN_GPU_CARVEOUT_ADDRESS,
189 		.vmrr_size = (vm_map_size_t)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS - MACH_VM_MIN_GPU_CARVEOUT_ADDRESS)
190 	},
191 	/*
192 	 * Reserve the virtual memory space representing the commpage nesting region
193 	 * to prevent user processes from allocating memory within it. The actual
194 	 * page table entries for the commpage are inserted by vm_commpage_enter().
195 	 * This vm_map_enter() just prevents userspace from allocating/deallocating
196 	 * anything within the entire commpage nested region.
197 	 */
198 	{
199 		.vmrr_name = "commpage nesting",
200 		.vmrr_addr = _COMM_PAGE64_NESTING_START,
201 		.vmrr_size = _COMM_PAGE64_NESTING_SIZE
202 	}
203 };
204 
205 uint32_t get_arm_cpu_version(void);
206 
207 #if defined(HAS_IPI)
208 static inline void
ml_cpu_signal_type(unsigned int cpu_mpidr,uint32_t type)209 ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
210 {
211 #if HAS_CLUSTER
212 	uint64_t local_mpidr;
213 	/* NOTE: this logic expects that we are called in a non-preemptible
214 	 * context, or at least one in which the calling thread is bound
215 	 * to a single CPU.  Otherwise we may migrate between choosing which
216 	 * IPI mechanism to use and issuing the IPI. */
217 	MRS(local_mpidr, "MPIDR_EL1");
218 	if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
219 		uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
220 		MSR("S3_5_C15_C0_0", x);
221 	} else {
222 		#define IPI_RR_TARGET_CLUSTER_SHIFT 16
223 		uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
224 		MSR("S3_5_C15_C0_1", x);
225 	}
226 #else
227 	uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
228 	MSR("S3_5_C15_C0_1", x);
229 #endif
230 	/* The recommended local/global IPI sequence is:
231 	 *   DSB <sys> (This ensures visibility of e.g. older stores to the
232 	 *     pending CPU signals bit vector in DRAM prior to IPI reception,
233 	 *     and is present in cpu_signal_internal())
234 	 *   MSR S3_5_C15_C0_1, Xt
235 	 *   ISB
236 	 */
237 	__builtin_arm_isb(ISB_SY);
238 }
239 #endif
240 
241 #if !defined(HAS_IPI)
242 __dead2
243 #endif
244 void
ml_cpu_signal(unsigned int cpu_mpidr __unused)245 ml_cpu_signal(unsigned int cpu_mpidr __unused)
246 {
247 #if defined(HAS_IPI)
248 	ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
249 #else
250 	panic("Platform does not support ACC Fast IPI");
251 #endif
252 }
253 
254 #if !defined(HAS_IPI)
255 __dead2
256 #endif
257 void
ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)258 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
259 {
260 #if defined(HAS_IPI)
261 	/* adjust IPI_CR timer countdown value for deferred IPI
262 	 * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
263 	 * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
264 	 *
265 	 * global register, should only require a single write to update all
266 	 * CPU cores: from Skye ACC user spec section 5.7.3.3
267 	 *
268 	 * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
269 	 * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
270 	 */
271 	uint64_t abstime;
272 
273 	nanoseconds_to_absolutetime(nanosecs, &abstime);
274 
275 	abstime = MIN(abstime, 0xFFFF);
276 
277 	/* update deferred_ipi_timer_ns with the new clamped value */
278 	absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
279 
280 	MSR("S3_5_C15_C3_1", abstime);
281 #else
282 	(void)nanosecs;
283 	panic("Platform does not support ACC Fast IPI");
284 #endif
285 }
286 
287 uint64_t
ml_cpu_signal_deferred_get_timer()288 ml_cpu_signal_deferred_get_timer()
289 {
290 #if defined(HAS_IPI)
291 	return deferred_ipi_timer_ns;
292 #else
293 	return 0;
294 #endif
295 }
296 
297 #if !defined(HAS_IPI)
298 __dead2
299 #endif
300 void
ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)301 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
302 {
303 #if defined(HAS_IPI)
304 	ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
305 #else
306 	panic("Platform does not support ACC Fast IPI deferral");
307 #endif
308 }
309 
310 #if !defined(HAS_IPI)
311 __dead2
312 #endif
313 void
ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)314 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
315 {
316 #if defined(HAS_IPI)
317 	ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
318 #else
319 	panic("Platform does not support ACC Fast IPI retraction");
320 #endif
321 }
322 
323 extern uint32_t idle_proximate_io_wfe_unmasked;
324 
325 #define CPUPM_IDLE_WFE 0x5310300
326 static bool
wfe_process_recommendation(void)327 wfe_process_recommendation(void)
328 {
329 	bool ipending = false;
330 	if (__probable(idle_proximate_io_wfe_unmasked == 1)) {
331 		/* Check for an active perf. controller generated
332 		 * WFE recommendation for this cluster.
333 		 */
334 		cpu_data_t *cdp = getCpuDatap();
335 		uint32_t cid = cdp->cpu_cluster_id;
336 		uint64_t wfe_ttd = 0;
337 		uint64_t wfe_deadline = 0;
338 
339 		if ((wfe_ttd = ml_cluster_wfe_timeout(cid)) != 0) {
340 			wfe_deadline = mach_absolute_time() + wfe_ttd;
341 		}
342 
343 		if (wfe_deadline != 0) {
344 			/* Poll issuing event-bounded WFEs until an interrupt
345 			 * arrives or the WFE recommendation expires
346 			 */
347 #if DEVELOPMENT || DEBUG
348 			uint64_t wc = cdp->wfe_count;
349 			KDBG(CPUPM_IDLE_WFE | DBG_FUNC_START, ipending, wc, wfe_ttd, cdp->cpu_stat.irq_ex_cnt_wake);
350 #endif
351 			/* Issue WFE until the recommendation expires,
352 			 * with IRQs unmasked.
353 			 */
354 			ipending = wfe_to_deadline_or_interrupt(cid, wfe_deadline, cdp, true, true);
355 #if DEVELOPMENT || DEBUG
356 			KDBG(CPUPM_IDLE_WFE | DBG_FUNC_END, ipending, cdp->wfe_count - wc, wfe_deadline, cdp->cpu_stat.irq_ex_cnt_wake);
357 #endif
358 		}
359 	}
360 	return ipending;
361 }
362 
363 void
machine_idle(void)364 machine_idle(void)
365 {
366 	/* Interrupts are expected to be masked on entry or re-entry via
367 	 * Idle_load_context()
368 	 */
369 	assert((__builtin_arm_rsr("DAIF") & (DAIF_IRQF | DAIF_FIQF)) == (DAIF_IRQF | DAIF_FIQF));
370 	/* Check for, and act on, a WFE recommendation.
371 	 * Bypasses context spill/fill for a minor perf. increment.
372 	 * May unmask and restore IRQ+FIQ mask.
373 	 */
374 	if (wfe_process_recommendation() == false) {
375 		/* If WFE recommendation absent, or WFE deadline
376 		 * arrived with no interrupt pending/processed,
377 		 * fall back to WFI.
378 		 */
379 		Idle_context();
380 	}
381 	__builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
382 }
383 
384 void
OSSynchronizeIO(void)385 OSSynchronizeIO(void)
386 {
387 	__builtin_arm_dsb(DSB_SY);
388 }
389 
390 uint64_t
get_aux_control(void)391 get_aux_control(void)
392 {
393 	uint64_t        value;
394 
395 	MRS(value, "ACTLR_EL1");
396 	return value;
397 }
398 
399 uint64_t
get_mmu_control(void)400 get_mmu_control(void)
401 {
402 	uint64_t        value;
403 
404 	MRS(value, "SCTLR_EL1");
405 	return value;
406 }
407 
408 uint64_t
get_tcr(void)409 get_tcr(void)
410 {
411 	uint64_t        value;
412 
413 	MRS(value, "TCR_EL1");
414 	return value;
415 }
416 
417 boolean_t
ml_get_interrupts_enabled(void)418 ml_get_interrupts_enabled(void)
419 {
420 	uint64_t        value;
421 
422 	MRS(value, "DAIF");
423 	if (value & DAIF_IRQF) {
424 		return FALSE;
425 	}
426 	return TRUE;
427 }
428 
429 pmap_paddr_t
get_mmu_ttb(void)430 get_mmu_ttb(void)
431 {
432 	pmap_paddr_t    value;
433 
434 	MRS(value, "TTBR0_EL1");
435 	return value;
436 }
437 
438 uint32_t
get_arm_cpu_version(void)439 get_arm_cpu_version(void)
440 {
441 	uint32_t value = machine_read_midr();
442 
443 	/* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
444 	return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
445 }
446 
447 bool
ml_feature_supported(uint32_t feature_bit)448 ml_feature_supported(uint32_t feature_bit)
449 {
450 	uint64_t aidr_el1_value = 0;
451 
452 	MRS(aidr_el1_value, "AIDR_EL1");
453 
454 #ifdef APPLEAVALANCHE
455 #endif // APPLEAVALANCHE
456 
457 	return aidr_el1_value & feature_bit;
458 }
459 
460 /*
461  * user_cont_hwclock_allowed()
462  *
463  * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
464  * as a continuous time source (e.g. from mach_continuous_time)
465  */
466 boolean_t
user_cont_hwclock_allowed(void)467 user_cont_hwclock_allowed(void)
468 {
469 #if HAS_CONTINUOUS_HWCLOCK
470 	return TRUE;
471 #else
472 	return FALSE;
473 #endif
474 }
475 
476 /*
477  * user_timebase_type()
478  *
479  * Indicates type of EL0 virtual timebase read (CNTVCT_EL0).
480  *
481  * USER_TIMEBASE_NONE: EL0 has no access to timebase register
482  * USER_TIMEBASE_SPEC: EL0 has access to speculative timebase reads (CNTVCT_EL0)
483  * USER_TIMEBASE_NOSPEC: EL0 has access to non speculative timebase reads (CNTVCTSS_EL0)
484  *
485  */
486 
487 uint8_t
user_timebase_type(void)488 user_timebase_type(void)
489 {
490 #if HAS_ACNTVCT
491 	return USER_TIMEBASE_NOSPEC_APPLE;
492 #elif __ARM_ARCH_8_6__
493 	return USER_TIMEBASE_NOSPEC;
494 #else
495 	return USER_TIMEBASE_SPEC;
496 #endif
497 }
498 
499 void
machine_startup(__unused boot_args * args)500 machine_startup(__unused boot_args * args)
501 {
502 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
503 	if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
504 		gFastIPI = 1;
505 	}
506 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
507 
508 
509 	machine_conf();
510 
511 
512 	/*
513 	 * Kick off the kernel bootstrap.
514 	 */
515 	kernel_bootstrap();
516 	/* NOTREACHED */
517 }
518 
519 typedef void (*invalidate_fn_t)(void);
520 
521 static SECURITY_READ_ONLY_LATE(invalidate_fn_t) invalidate_hmac_function = NULL;
522 
523 void set_invalidate_hmac_function(invalidate_fn_t fn);
524 
525 void
set_invalidate_hmac_function(invalidate_fn_t fn)526 set_invalidate_hmac_function(invalidate_fn_t fn)
527 {
528 	if (NULL != invalidate_hmac_function) {
529 		panic("Invalidate HMAC function already set");
530 	}
531 
532 	invalidate_hmac_function = fn;
533 }
534 
535 void
machine_lockdown(void)536 machine_lockdown(void)
537 {
538 
539 #if CONFIG_SPTM
540 	/**
541 	 * On devices that make use of the SPTM, the SPTM is responsible for
542 	 * managing system register locks. Due to this, we skip the call to
543 	 * spr_lockdown() below.
544 	 */
545 #else
546 #endif
547 
548 	arm_vm_prot_finalize(PE_state.bootArgs);
549 
550 #if CONFIG_KERNEL_INTEGRITY
551 #if KERNEL_INTEGRITY_WT
552 	/* Watchtower
553 	 *
554 	 * Notify the monitor about the completion of early kernel bootstrap.
555 	 * From this point forward it will enforce the integrity of kernel text,
556 	 * rodata and page tables.
557 	 */
558 
559 #ifdef MONITOR
560 	monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
561 #endif
562 #endif /* KERNEL_INTEGRITY_WT */
563 
564 #if CONFIG_SPTM
565 	extern void pmap_prepare_commpages(void);
566 	pmap_prepare_commpages();
567 
568 	/**
569 	 * sptm_lockdown_xnu() disables preemption like all SPTM calls, but may take
570 	 * a fair amount of time as it involves retyping a large number of pages.
571 	 * This preemption latency is not really a concern since we're still fairly
572 	 * early in the boot process, so just explicitly disable preemption before
573 	 * invoking the SPTM and abandon preemption latency measurements before
574 	 * re-enabling it.
575 	 */
576 	disable_preemption();
577 	/* Signal the SPTM that XNU is ready for RO memory to actually become read-only */
578 	sptm_lockdown_xnu();
579 #if SCHED_HYGIENE_DEBUG
580 	abandon_preemption_disable_measurement();
581 #endif /* SCHED_HYGIENE_DEBUG */
582 	enable_preemption();
583 #else
584 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
585 	/* KTRR
586 	 *
587 	 * Lock physical KTRR region. KTRR region is read-only. Memory outside
588 	 * the region is not executable at EL1.
589 	 */
590 
591 	rorgn_lockdown();
592 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
593 #endif /* CONFIG_SPTM */
594 
595 #if XNU_MONITOR
596 	pmap_lockdown_ppl();
597 #endif
598 
599 #endif /* CONFIG_KERNEL_INTEGRITY */
600 
601 
602 	if (NULL != invalidate_hmac_function) {
603 		invalidate_hmac_function();
604 	}
605 
606 	lockdown_done = 1;
607 }
608 
609 
610 char           *
machine_boot_info(__unused char * buf,__unused vm_size_t size)611 machine_boot_info(
612 	__unused char *buf,
613 	__unused vm_size_t size)
614 {
615 	return PE_boot_args();
616 }
617 
618 void
slave_machine_init(__unused void * param)619 slave_machine_init(__unused void *param)
620 {
621 	cpu_machine_init();     /* Initialize the processor */
622 	clock_init();           /* Init the clock */
623 }
624 
625 /*
626  *	Routine:        machine_processor_shutdown
627  *	Function:
628  */
629 thread_t
machine_processor_shutdown(__unused thread_t thread,void (* doshutdown)(processor_t),processor_t processor)630 machine_processor_shutdown(
631 	__unused thread_t thread,
632 	void (*doshutdown)(processor_t),
633 	processor_t processor)
634 {
635 	return Shutdown_context(doshutdown, processor);
636 }
637 
638 /*
639  *      Routine:        ml_init_lock_timeout
640  *      Function:
641  */
642 static void __startup_func
ml_init_lock_timeout(void)643 ml_init_lock_timeout(void)
644 {
645 	/*
646 	 * This function is called after STARTUP_SUB_TIMEOUTS
647 	 * initialization, so using the "legacy" boot-args here overrides
648 	 * the ml-timeout-...  configuration. (Given that these boot-args
649 	 * here are usually explicitly specified, this makes sense by
650 	 * overriding ml-timeout-..., which may come from the device tree.
651 	 */
652 
653 	uint64_t lto_timeout_ns;
654 	uint64_t lto_abstime;
655 	uint32_t slto;
656 
657 	if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
658 		lto_timeout_ns = slto * NSEC_PER_USEC;
659 		nanoseconds_to_absolutetime(lto_timeout_ns, &lto_abstime);
660 		os_atomic_store(&LockTimeOut, lto_abstime, relaxed);
661 	} else {
662 		lto_abstime = os_atomic_load(&LockTimeOut, relaxed);
663 		absolutetime_to_nanoseconds(lto_abstime, &lto_timeout_ns);
664 	}
665 
666 	os_atomic_store(&LockTimeOutUsec, lto_timeout_ns / NSEC_PER_USEC, relaxed);
667 
668 	if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
669 		nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, &lto_abstime);
670 		os_atomic_store(&TLockTimeOut, lto_abstime, relaxed);
671 	} else if (lto_abstime != 0) {
672 		os_atomic_store(&TLockTimeOut, lto_abstime >> 1, relaxed);
673 	} // else take default from MACHINE_TIMEOUT.
674 
675 	uint64_t mtxspin;
676 	uint64_t mtx_abstime;
677 	if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
678 		if (mtxspin > USEC_PER_SEC >> 4) {
679 			mtxspin =  USEC_PER_SEC >> 4;
680 		}
681 		nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &mtx_abstime);
682 		os_atomic_store(&MutexSpin, mtx_abstime, relaxed);
683 	} else {
684 		mtx_abstime = os_atomic_load(&MutexSpin, relaxed);
685 	}
686 
687 	low_MutexSpin = os_atomic_load(&MutexSpin, relaxed);
688 	/*
689 	 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
690 	 * real_ncpus is not set at this time
691 	 *
692 	 * NOTE: active spinning is disabled in arm. It can be activated
693 	 * by setting high_MutexSpin through the sysctl.
694 	 */
695 	high_MutexSpin = low_MutexSpin;
696 
697 	uint64_t maxwfeus = MAX_WFE_HINT_INTERVAL_US;
698 	PE_parse_boot_argn("max_wfe_us", &maxwfeus, sizeof(maxwfeus));
699 	nanoseconds_to_absolutetime(maxwfeus * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
700 }
701 STARTUP(TIMEOUTS, STARTUP_RANK_MIDDLE, ml_init_lock_timeout);
702 
703 
704 /*
705  * This is called when all of the ml_processor_info_t structures have been
706  * initialized and all the processors have been started through processor_start().
707  *
708  * Required by the scheduler subsystem.
709  */
710 void
ml_cpu_init_completed(void)711 ml_cpu_init_completed(void)
712 {
713 	if (SCHED(cpu_init_completed) != NULL) {
714 		SCHED(cpu_init_completed)();
715 	}
716 }
717 
718 /*
719  * These are called from the machine-independent routine cpu_up()
720  * to perform machine-dependent info updates.
721  *
722  * The update to CPU counts needs to be separate from other actions
723  * because we don't update the counts when CLPC causes temporary
724  * cluster powerdown events, as these must be transparent to the user.
725  */
726 void
ml_cpu_up(void)727 ml_cpu_up(void)
728 {
729 }
730 
731 void
ml_cpu_up_update_counts(int cpu_id)732 ml_cpu_up_update_counts(int cpu_id)
733 {
734 	ml_topology_cpu_t *cpu = &ml_get_topology_info()->cpus[cpu_id];
735 
736 	os_atomic_inc(&cluster_type_num_active_cpus[cpu->cluster_type], relaxed);
737 
738 	os_atomic_inc(&machine_info.physical_cpu, relaxed);
739 	os_atomic_inc(&machine_info.logical_cpu, relaxed);
740 }
741 
742 /*
743  * These are called from the machine-independent routine cpu_down()
744  * to perform machine-dependent info updates.
745  *
746  * The update to CPU counts needs to be separate from other actions
747  * because we don't update the counts when CLPC causes temporary
748  * cluster powerdown events, as these must be transparent to the user.
749  */
750 void
ml_cpu_down(void)751 ml_cpu_down(void)
752 {
753 	/*
754 	 * If we want to deal with outstanding IPIs, we need to
755 	 * do relatively early in the processor_doshutdown path,
756 	 * as we pend decrementer interrupts using the IPI
757 	 * mechanism if we cannot immediately service them (if
758 	 * IRQ is masked).  Do so now.
759 	 *
760 	 * We aren't on the interrupt stack here; would it make
761 	 * more sense to disable signaling and then enable
762 	 * interrupts?  It might be a bit cleaner.
763 	 */
764 	cpu_data_t *cpu_data_ptr = getCpuDatap();
765 	cpu_data_ptr->cpu_running = FALSE;
766 
767 	if (cpu_data_ptr != &BootCpuData) {
768 		/*
769 		 * Move all of this cpu's timers to the master/boot cpu,
770 		 * and poke it in case there's a sooner deadline for it to schedule.
771 		 */
772 		timer_queue_shutdown(&cpu_data_ptr->rtclock_timer.queue);
773 		kern_return_t rv = cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, &ml_cpu_down);
774 		if (rv != KERN_SUCCESS) {
775 			panic("ml_cpu_down: IPI failure %d", rv);
776 		}
777 	}
778 
779 	cpu_signal_handler_internal(TRUE);
780 }
781 void
ml_cpu_down_update_counts(int cpu_id)782 ml_cpu_down_update_counts(int cpu_id)
783 {
784 	ml_topology_cpu_t *cpu = &ml_get_topology_info()->cpus[cpu_id];
785 
786 	os_atomic_dec(&cluster_type_num_active_cpus[cpu->cluster_type], relaxed);
787 
788 	os_atomic_dec(&machine_info.physical_cpu, relaxed);
789 	os_atomic_dec(&machine_info.logical_cpu, relaxed);
790 }
791 
792 
793 unsigned int
ml_get_machine_mem(void)794 ml_get_machine_mem(void)
795 {
796 	return machine_info.memory_size;
797 }
798 
799 __attribute__((noreturn))
800 void
halt_all_cpus(boolean_t reboot)801 halt_all_cpus(boolean_t reboot)
802 {
803 	if (reboot) {
804 		printf("MACH Reboot\n");
805 		PEHaltRestart(kPERestartCPU);
806 	} else {
807 		printf("CPU halted\n");
808 		PEHaltRestart(kPEHaltCPU);
809 	}
810 	while (1) {
811 		;
812 	}
813 }
814 
815 __attribute__((noreturn))
816 void
halt_cpu(void)817 halt_cpu(void)
818 {
819 	halt_all_cpus(FALSE);
820 }
821 
822 /*
823  *	Routine:        machine_signal_idle
824  *	Function:
825  */
826 void
machine_signal_idle(processor_t processor)827 machine_signal_idle(
828 	processor_t processor)
829 {
830 	cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
831 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
832 }
833 
834 void
machine_signal_idle_deferred(processor_t processor)835 machine_signal_idle_deferred(
836 	processor_t processor)
837 {
838 	cpu_signal_deferred(processor_to_cpu_datap(processor));
839 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
840 }
841 
842 void
machine_signal_idle_cancel(processor_t processor)843 machine_signal_idle_cancel(
844 	processor_t processor)
845 {
846 	cpu_signal_cancel(processor_to_cpu_datap(processor));
847 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
848 }
849 
850 /*
851  *	Routine:        ml_install_interrupt_handler
852  *	Function:	Initialize Interrupt Handler
853  */
854 void
ml_install_interrupt_handler(void * nub,int source,void * target,IOInterruptHandler handler,void * refCon)855 ml_install_interrupt_handler(
856 	void *nub,
857 	int source,
858 	void *target,
859 	IOInterruptHandler handler,
860 	void *refCon)
861 {
862 	cpu_data_t     *cpu_data_ptr;
863 	boolean_t       current_state;
864 
865 	current_state = ml_set_interrupts_enabled(FALSE);
866 	cpu_data_ptr = getCpuDatap();
867 
868 	cpu_data_ptr->interrupt_nub = nub;
869 	cpu_data_ptr->interrupt_source = source;
870 	cpu_data_ptr->interrupt_target = target;
871 	cpu_data_ptr->interrupt_handler = handler;
872 	cpu_data_ptr->interrupt_refCon = refCon;
873 
874 	(void) ml_set_interrupts_enabled(current_state);
875 }
876 
877 /*
878  *	Routine:        ml_init_interrupt
879  *	Function:	Initialize Interrupts
880  */
881 void
ml_init_interrupt(void)882 ml_init_interrupt(void)
883 {
884 #if defined(HAS_IPI)
885 	/*
886 	 * ml_init_interrupt will get called once for each CPU, but this is redundant
887 	 * because there is only one global copy of the register for skye. do it only
888 	 * on the bootstrap cpu
889 	 */
890 	if (getCpuDatap()->cluster_master) {
891 		ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
892 	}
893 #endif
894 }
895 
896 /*
897  *	Routine:        ml_init_timebase
898  *	Function:	register and setup Timebase, Decremeter services
899  */
900 void
ml_init_timebase(void * args,tbd_ops_t tbd_funcs,vm_offset_t int_address,vm_offset_t int_value __unused)901 ml_init_timebase(
902 	void            *args,
903 	tbd_ops_t       tbd_funcs,
904 	vm_offset_t     int_address,
905 	vm_offset_t     int_value __unused)
906 {
907 	cpu_data_t     *cpu_data_ptr;
908 
909 	cpu_data_ptr = (cpu_data_t *)args;
910 
911 	if ((cpu_data_ptr == &BootCpuData)
912 	    && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
913 		rtclock_timebase_func = *tbd_funcs;
914 		rtclock_timebase_addr = int_address;
915 	}
916 }
917 
918 #define ML_READPROP_MANDATORY UINT64_MAX
919 
920 static uint64_t
ml_readprop(const DTEntry entry,const char * propertyName,uint64_t default_value)921 ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
922 {
923 	void const *prop;
924 	unsigned int propSize;
925 
926 	if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
927 		if (propSize == sizeof(uint8_t)) {
928 			return *((uint8_t const *)prop);
929 		} else if (propSize == sizeof(uint16_t)) {
930 			return *((uint16_t const *)prop);
931 		} else if (propSize == sizeof(uint32_t)) {
932 			return *((uint32_t const *)prop);
933 		} else if (propSize == sizeof(uint64_t)) {
934 			return *((uint64_t const *)prop);
935 		} else {
936 			panic("CPU property '%s' has bad size %u", propertyName, propSize);
937 		}
938 	} else {
939 		if (default_value == ML_READPROP_MANDATORY) {
940 			panic("Missing mandatory property '%s'", propertyName);
941 		}
942 		return default_value;
943 	}
944 }
945 
946 static boolean_t
ml_read_reg_range(const DTEntry entry,const char * propertyName,uint64_t * pa_ptr,uint64_t * len_ptr)947 ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
948 {
949 	uint64_t const *prop;
950 	unsigned int propSize;
951 
952 	if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
953 		return FALSE;
954 	}
955 
956 	if (propSize != sizeof(uint64_t) * 2) {
957 		panic("Wrong property size for %s", propertyName);
958 	}
959 
960 	*pa_ptr = prop[0];
961 	*len_ptr = prop[1];
962 	return TRUE;
963 }
964 
965 static boolean_t
ml_is_boot_cpu(const DTEntry entry)966 ml_is_boot_cpu(const DTEntry entry)
967 {
968 	void const *prop;
969 	unsigned int propSize;
970 
971 	if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
972 		panic("unable to retrieve state for cpu");
973 	}
974 
975 	if (strncmp((char const *)prop, "running", propSize) == 0) {
976 		return TRUE;
977 	} else {
978 		return FALSE;
979 	}
980 }
981 
982 static void
ml_read_chip_revision(unsigned int * rev __unused)983 ml_read_chip_revision(unsigned int *rev __unused)
984 {
985 	// The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
986 #ifdef APPLE_ARM64_ARCH_FAMILY
987 	DTEntry         entryP;
988 
989 	if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
990 		*rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
991 	} else {
992 		*rev = CPU_VERSION_UNKNOWN;
993 	}
994 #endif
995 }
996 
997 void
ml_parse_cpu_topology(void)998 ml_parse_cpu_topology(void)
999 {
1000 	DTEntry entry, child __unused;
1001 	OpaqueDTEntryIterator iter;
1002 	uint32_t cpu_boot_arg = MAX_CPUS;
1003 	uint64_t cpumask_boot_arg = ULLONG_MAX;
1004 	int err;
1005 
1006 	int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
1007 	int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
1008 	const boolean_t cpus_boot_arg_present = PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
1009 	const boolean_t cpumask_boot_arg_present = PE_parse_boot_argn("cpumask", &cpumask_boot_arg, sizeof(cpumask_boot_arg));
1010 
1011 	// The cpus=N and cpumask=N boot args cannot be used simultaneously. Flag this
1012 	// so that we trigger a panic later in the boot process, once serial is enabled.
1013 	if (cpus_boot_arg_present && cpumask_boot_arg_present) {
1014 		cpu_config_correct = false;
1015 	}
1016 
1017 	err = SecureDTLookupEntry(NULL, "/cpus", &entry);
1018 	assert(err == kSuccess);
1019 
1020 	err = SecureDTInitEntryIterator(entry, &iter);
1021 	assert(err == kSuccess);
1022 
1023 	for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
1024 		cluster_offsets[i] = -1;
1025 		cluster_phys_to_logical[i] = -1;
1026 		cluster_max_cpu_phys_id[i] = 0;
1027 	}
1028 
1029 	while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
1030 		boolean_t is_boot_cpu = ml_is_boot_cpu(child);
1031 		boolean_t cpu_enabled = cpumask_boot_arg & 1;
1032 		cpumask_boot_arg >>= 1;
1033 
1034 		// Boot CPU disabled in cpumask. Flag this so that we trigger a panic
1035 		// later in the boot process, once serial is enabled.
1036 		if (is_boot_cpu && !cpu_enabled) {
1037 			cpu_config_correct = false;
1038 		}
1039 
1040 		// Ignore this CPU if it has been disabled by the cpumask= boot-arg.
1041 		if (!is_boot_cpu && !cpu_enabled) {
1042 			continue;
1043 		}
1044 
1045 		// If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
1046 		// been added to the topology struct yet, and we only have one slot left, then skip
1047 		// every other non-boot CPU in order to leave room for the boot CPU.
1048 		//
1049 		// e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
1050 		// array will list CPU0, CPU1, and CPU4.  CPU2-CPU3 and CPU5-CPUn will be omitted.
1051 		if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
1052 			continue;
1053 		}
1054 		if (topology_info.num_cpus >= cpu_boot_arg) {
1055 			break;
1056 		}
1057 
1058 		ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
1059 
1060 		cpu->cpu_id = topology_info.num_cpus++;
1061 		assert(cpu->cpu_id < MAX_CPUS);
1062 		topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
1063 
1064 		cpu->die_id = 0;
1065 		topology_info.max_die_id = 0;
1066 
1067 		cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
1068 
1069 		cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
1070 		cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
1071 		cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
1072 		cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
1073 		cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
1074 
1075 		ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
1076 		ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
1077 		ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
1078 		cpu->cluster_type = CLUSTER_TYPE_SMP;
1079 
1080 		int cluster_type = (int)ml_readprop(child, "cluster-type", 0);
1081 		if (cluster_type == 'E') {
1082 			cpu->cluster_type = CLUSTER_TYPE_E;
1083 		} else if (cluster_type == 'P') {
1084 			cpu->cluster_type = CLUSTER_TYPE_P;
1085 		}
1086 
1087 		topology_info.cluster_type_num_cpus[cpu->cluster_type]++;
1088 
1089 		/*
1090 		 * Since we want to keep a linear cluster ID space, we cannot just rely
1091 		 * on the value provided by EDT. Instead, use the MPIDR value to see if we have
1092 		 * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
1093 		 */
1094 #if HAS_CLUSTER
1095 		uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
1096 #else
1097 		uint32_t phys_cluster_id = (cpu->cluster_type == CLUSTER_TYPE_P);
1098 #endif
1099 		assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
1100 		cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
1101 		    topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
1102 
1103 		assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
1104 
1105 		ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
1106 		if (cluster->num_cpus == 0) {
1107 			assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
1108 
1109 			topology_info.num_clusters++;
1110 			topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
1111 			topology_info.cluster_types |= (1 << cpu->cluster_type);
1112 
1113 			cluster->cluster_id = cpu->cluster_id;
1114 			cluster->cluster_type = cpu->cluster_type;
1115 			cluster->first_cpu_id = cpu->cpu_id;
1116 			assert(cluster_phys_to_logical[phys_cluster_id] == -1);
1117 			cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
1118 
1119 			topology_info.cluster_type_num_clusters[cluster->cluster_type]++;
1120 
1121 			// Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
1122 			// If we wind up with a bunch of these, we might want to create separate per-cluster
1123 			// EDT nodes and have the CPU nodes reference them through a phandle.
1124 			ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
1125 			ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
1126 		}
1127 
1128 #if HAS_CLUSTER
1129 		if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
1130 			cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
1131 		}
1132 #endif
1133 
1134 		cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
1135 		cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
1136 
1137 		cluster->num_cpus++;
1138 		cluster->cpu_mask |= 1ULL << cpu->cpu_id;
1139 
1140 		if (is_boot_cpu) {
1141 			assert(topology_info.boot_cpu == NULL);
1142 			topology_info.boot_cpu = cpu;
1143 			topology_info.boot_cluster = cluster;
1144 		}
1145 
1146 #if CONFIG_SPTM
1147 		sptm_register_cpu(cpu->phys_id);
1148 #endif
1149 	}
1150 
1151 #if HAS_CLUSTER
1152 	/*
1153 	 * Build the cluster offset array, ensuring that the region reserved
1154 	 * for each physical cluster contains enough entries to be indexed
1155 	 * by the maximum physical CPU ID (AFF0) within the cluster.
1156 	 */
1157 	unsigned int cur_cluster_offset = 0;
1158 	for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
1159 		if (cluster_phys_to_logical[i] != -1) {
1160 			cluster_offsets[i] = cur_cluster_offset;
1161 			cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
1162 		}
1163 	}
1164 	assert(cur_cluster_offset <= MAX_CPUS);
1165 #else
1166 	/*
1167 	 * For H10, there are really 2 physical clusters, but they are not separated
1168 	 * into distinct ACCs.  AFF1 therefore always reports 0, and AFF0 numbering
1169 	 * is linear across both clusters.   For the purpose of MPIDR_EL1-based indexing,
1170 	 * treat H10 and earlier devices as though they contain a single cluster.
1171 	 */
1172 	cluster_offsets[0] = 0;
1173 #endif
1174 	assert(topology_info.boot_cpu != NULL);
1175 	ml_read_chip_revision(&topology_info.chip_revision);
1176 
1177 	/*
1178 	 * Set TPIDR_EL0 to indicate the correct cpu number & cluster id,
1179 	 * as we may not be booting from cpu 0. Userspace will consume
1180 	 * the current CPU number through this register. For non-boot
1181 	 * cores, this is done in start.s (start_cpu) using the per-cpu
1182 	 * data object.
1183 	 */
1184 	ml_topology_cpu_t *boot_cpu = topology_info.boot_cpu;
1185 	uint64_t tpidr_el0 = ((boot_cpu->cpu_id << MACHDEP_TPIDR_CPUNUM_SHIFT) & MACHDEP_TPIDR_CPUNUM_MASK) | \
1186 	    ((boot_cpu->cluster_id << MACHDEP_TPIDR_CLUSTERID_SHIFT) & MACHDEP_TPIDR_CLUSTERID_MASK);
1187 	assert(((tpidr_el0 & MACHDEP_TPIDR_CPUNUM_MASK) >> MACHDEP_TPIDR_CPUNUM_SHIFT) == boot_cpu->cpu_id);
1188 	assert(((tpidr_el0 & MACHDEP_TPIDR_CLUSTERID_MASK) >> MACHDEP_TPIDR_CLUSTERID_SHIFT) == boot_cpu->cluster_id);
1189 	__builtin_arm_wsr64("TPIDR_EL0", tpidr_el0);
1190 
1191 	__builtin_arm_wsr64("TPIDRRO_EL0", 0);
1192 }
1193 
1194 const ml_topology_info_t *
ml_get_topology_info(void)1195 ml_get_topology_info(void)
1196 {
1197 	return &topology_info;
1198 }
1199 
1200 void
ml_map_cpu_pio(void)1201 ml_map_cpu_pio(void)
1202 {
1203 	unsigned int i;
1204 
1205 	for (i = 0; i < topology_info.num_cpus; i++) {
1206 		ml_topology_cpu_t *cpu = &topology_info.cpus[i];
1207 		if (cpu->cpu_IMPL_pa) {
1208 			cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
1209 			cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
1210 		}
1211 		if (cpu->cpu_UTTDBG_pa) {
1212 			cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
1213 		}
1214 	}
1215 
1216 	for (i = 0; i < topology_info.num_clusters; i++) {
1217 		ml_topology_cluster_t *cluster = &topology_info.clusters[i];
1218 		if (cluster->acc_IMPL_pa) {
1219 			cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
1220 		}
1221 		if (cluster->cpm_IMPL_pa) {
1222 			cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
1223 		}
1224 	}
1225 }
1226 
1227 unsigned int
ml_get_cpu_count(void)1228 ml_get_cpu_count(void)
1229 {
1230 	return topology_info.num_cpus;
1231 }
1232 
1233 unsigned int
ml_get_cluster_count(void)1234 ml_get_cluster_count(void)
1235 {
1236 	return topology_info.num_clusters;
1237 }
1238 
1239 int
ml_get_boot_cpu_number(void)1240 ml_get_boot_cpu_number(void)
1241 {
1242 	return topology_info.boot_cpu->cpu_id;
1243 }
1244 
1245 cluster_type_t
ml_get_boot_cluster_type(void)1246 ml_get_boot_cluster_type(void)
1247 {
1248 	return topology_info.boot_cluster->cluster_type;
1249 }
1250 
1251 int
ml_get_cpu_number(uint32_t phys_id)1252 ml_get_cpu_number(uint32_t phys_id)
1253 {
1254 	phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
1255 
1256 	for (unsigned i = 0; i < topology_info.num_cpus; i++) {
1257 		if (topology_info.cpus[i].phys_id == phys_id) {
1258 			return i;
1259 		}
1260 	}
1261 
1262 	return -1;
1263 }
1264 
1265 int
ml_get_cluster_number(uint32_t phys_id)1266 ml_get_cluster_number(uint32_t phys_id)
1267 {
1268 	int cpu_id = ml_get_cpu_number(phys_id);
1269 	if (cpu_id < 0) {
1270 		return -1;
1271 	}
1272 
1273 	ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
1274 
1275 	return cpu->cluster_id;
1276 }
1277 
1278 unsigned int
ml_get_cpu_number_local(void)1279 ml_get_cpu_number_local(void)
1280 {
1281 	uint64_t mpidr_el1_value = 0;
1282 	unsigned cpu_id;
1283 
1284 	/* We identify the CPU based on the constant bits of MPIDR_EL1. */
1285 	MRS(mpidr_el1_value, "MPIDR_EL1");
1286 	cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
1287 
1288 	assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
1289 
1290 	return cpu_id;
1291 }
1292 
1293 int
ml_get_cluster_number_local()1294 ml_get_cluster_number_local()
1295 {
1296 	uint64_t mpidr_el1_value = 0;
1297 	unsigned cluster_id;
1298 
1299 	/* We identify the cluster based on the constant bits of MPIDR_EL1. */
1300 	MRS(mpidr_el1_value, "MPIDR_EL1");
1301 	cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
1302 
1303 	assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
1304 
1305 	return cluster_id;
1306 }
1307 
1308 int
ml_get_max_cpu_number(void)1309 ml_get_max_cpu_number(void)
1310 {
1311 	return topology_info.max_cpu_id;
1312 }
1313 
1314 int
ml_get_max_cluster_number(void)1315 ml_get_max_cluster_number(void)
1316 {
1317 	return topology_info.max_cluster_id;
1318 }
1319 
1320 unsigned int
ml_get_first_cpu_id(unsigned int cluster_id)1321 ml_get_first_cpu_id(unsigned int cluster_id)
1322 {
1323 	return topology_info.clusters[cluster_id].first_cpu_id;
1324 }
1325 
1326 static_assert(MAX_CPUS <= 256, "MAX_CPUS must fit in _COMM_PAGE_CPU_TO_CLUSTER; Increase table size if needed");
1327 
1328 void
ml_map_cpus_to_clusters(uint8_t * table)1329 ml_map_cpus_to_clusters(uint8_t *table)
1330 {
1331 	for (uint16_t cpu_id = 0; cpu_id < topology_info.num_cpus; cpu_id++) {
1332 		*(table + cpu_id) = (uint8_t)(topology_info.cpus[cpu_id].cluster_id);
1333 	}
1334 }
1335 
1336 /*
1337  * Return the die id of a cluster.
1338  */
1339 unsigned int
ml_get_die_id(unsigned int cluster_id)1340 ml_get_die_id(unsigned int cluster_id)
1341 {
1342 	/*
1343 	 * The current implementation gets the die_id from the
1344 	 * first CPU of the cluster.
1345 	 * rdar://80917654 (Add the die_id field to the cluster topology info)
1346 	 */
1347 	unsigned int first_cpu = ml_get_first_cpu_id(cluster_id);
1348 	return topology_info.cpus[first_cpu].die_id;
1349 }
1350 
1351 /*
1352  * Return the index of a cluster in its die.
1353  */
1354 unsigned int
ml_get_die_cluster_id(unsigned int cluster_id)1355 ml_get_die_cluster_id(unsigned int cluster_id)
1356 {
1357 	/*
1358 	 * The current implementation gets the die_id from the
1359 	 * first CPU of the cluster.
1360 	 * rdar://80917654 (Add the die_id field to the cluster topology info)
1361 	 */
1362 	unsigned int first_cpu = ml_get_first_cpu_id(cluster_id);
1363 	return topology_info.cpus[first_cpu].die_cluster_id;
1364 }
1365 
1366 /*
1367  * Return the highest die id of the system.
1368  */
1369 unsigned int
ml_get_max_die_id(void)1370 ml_get_max_die_id(void)
1371 {
1372 	return topology_info.max_die_id;
1373 }
1374 
1375 void
ml_lockdown_init()1376 ml_lockdown_init()
1377 {
1378 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1379 	rorgn_stash_range();
1380 #endif
1381 }
1382 
1383 kern_return_t
ml_lockdown_handler_register(lockdown_handler_t f,void * this)1384 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1385 {
1386 	if (!f) {
1387 		return KERN_FAILURE;
1388 	}
1389 
1390 	assert(lockdown_done);
1391 	f(this); // XXX: f this whole function
1392 
1393 	return KERN_SUCCESS;
1394 }
1395 
1396 static mcache_flush_function mcache_flush_func;
1397 static void* mcache_flush_service;
1398 kern_return_t
ml_mcache_flush_callback_register(mcache_flush_function func,void * service)1399 ml_mcache_flush_callback_register(mcache_flush_function func, void *service)
1400 {
1401 	mcache_flush_service = service;
1402 	mcache_flush_func = func;
1403 
1404 	return KERN_SUCCESS;
1405 }
1406 
1407 kern_return_t
ml_mcache_flush(void)1408 ml_mcache_flush(void)
1409 {
1410 	if (!mcache_flush_func) {
1411 		panic("Cannot flush M$ with no flush callback registered");
1412 
1413 		return KERN_FAILURE;
1414 	} else {
1415 		return mcache_flush_func(mcache_flush_service);
1416 	}
1417 }
1418 
1419 
1420 extern lck_mtx_t pset_create_lock;
1421 
1422 kern_return_t
ml_processor_register(ml_processor_info_t * in_processor_info,processor_t * processor_out,ipi_handler_t * ipi_handler_out,perfmon_interrupt_handler_func * pmi_handler_out)1423 ml_processor_register(ml_processor_info_t *in_processor_info,
1424     processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1425     perfmon_interrupt_handler_func *pmi_handler_out)
1426 {
1427 	cpu_data_t *this_cpu_datap;
1428 	processor_set_t pset;
1429 	boolean_t  is_boot_cpu;
1430 	static unsigned int reg_cpu_count = 0;
1431 
1432 	if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
1433 		return KERN_FAILURE;
1434 	}
1435 
1436 	if ((unsigned)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= topology_info.num_cpus) {
1437 		return KERN_FAILURE;
1438 	}
1439 
1440 	if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1441 		is_boot_cpu = FALSE;
1442 		this_cpu_datap = cpu_data_alloc(FALSE);
1443 		cpu_data_init(this_cpu_datap);
1444 	} else {
1445 		this_cpu_datap = &BootCpuData;
1446 		is_boot_cpu = TRUE;
1447 	}
1448 
1449 	assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
1450 
1451 	this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1452 
1453 	if (!is_boot_cpu) {
1454 		this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
1455 
1456 		if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
1457 			goto processor_register_error;
1458 		}
1459 		assert((this_cpu_datap->cpu_number & MACHDEP_TPIDR_CPUNUM_MASK) == this_cpu_datap->cpu_number);
1460 	}
1461 
1462 	this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
1463 	this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
1464 	nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1465 	this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1466 
1467 	this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
1468 	this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1469 
1470 	this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
1471 	this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1472 	this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1473 	this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1474 
1475 	this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1476 	this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1477 	this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1478 	this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1479 	this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1480 	this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1481 
1482 	/*
1483 	 * Encode cpu_id, cluster_id to be stored in TPIDR_EL0 (see
1484 	 * cswitch.s:set_thread_registers, start.s:start_cpu) for consumption
1485 	 * by userspace.
1486 	 */
1487 	this_cpu_datap->cpu_tpidr_el0 = ((this_cpu_datap->cpu_number << MACHDEP_TPIDR_CPUNUM_SHIFT) & MACHDEP_TPIDR_CPUNUM_MASK) | \
1488 	    ((this_cpu_datap->cpu_cluster_id << MACHDEP_TPIDR_CLUSTERID_SHIFT) & MACHDEP_TPIDR_CLUSTERID_MASK);
1489 	assert(((this_cpu_datap->cpu_tpidr_el0 & MACHDEP_TPIDR_CPUNUM_MASK) >> MACHDEP_TPIDR_CPUNUM_SHIFT) == this_cpu_datap->cpu_number);
1490 	assert(((this_cpu_datap->cpu_tpidr_el0 & MACHDEP_TPIDR_CLUSTERID_MASK) >> MACHDEP_TPIDR_CLUSTERID_SHIFT) == this_cpu_datap->cpu_cluster_id);
1491 
1492 #if HAS_CLUSTER
1493 	this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1494 #else /* HAS_CLUSTER */
1495 	this_cpu_datap->cluster_master = is_boot_cpu;
1496 #endif /* HAS_CLUSTER */
1497 	lck_mtx_lock(&pset_create_lock);
1498 	pset = pset_find(in_processor_info->cluster_id, NULL);
1499 	kprintf("[%d]%s>pset_find(cluster_id=%d) returned pset %d\n", current_processor()->cpu_id, __FUNCTION__, in_processor_info->cluster_id, pset ? pset->pset_id : -1);
1500 	if (pset == NULL) {
1501 #if __AMP__
1502 		pset_cluster_type_t pset_cluster_type = this_cpu_datap->cpu_cluster_type == CLUSTER_TYPE_E ? PSET_AMP_E : PSET_AMP_P;
1503 		pset = pset_create(ml_get_boot_cluster_type() == this_cpu_datap->cpu_cluster_type ? &pset_node0 : &pset_node1, pset_cluster_type, this_cpu_datap->cpu_cluster_id, this_cpu_datap->cpu_cluster_id);
1504 		assert(pset != PROCESSOR_SET_NULL);
1505 		kprintf("[%d]%s>pset_create(cluster_id=%d) returned pset %d\n", current_processor()->cpu_id, __FUNCTION__, this_cpu_datap->cpu_cluster_id, pset->pset_id);
1506 #else /* __AMP__ */
1507 		pset_cluster_type_t pset_cluster_type = PSET_SMP;
1508 		pset = pset_create(&pset_node0, pset_cluster_type, this_cpu_datap->cpu_cluster_id, this_cpu_datap->cpu_cluster_id);
1509 		assert(pset != PROCESSOR_SET_NULL);
1510 #endif /* __AMP__ */
1511 	}
1512 	kprintf("[%d]%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", current_processor()->cpu_id, __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1513 	lck_mtx_unlock(&pset_create_lock);
1514 
1515 	processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
1516 	if (!is_boot_cpu) {
1517 		processor_init(processor, this_cpu_datap->cpu_number, pset);
1518 
1519 		if (this_cpu_datap->cpu_l2_access_penalty) {
1520 			/*
1521 			 * Cores that have a non-zero L2 access penalty compared
1522 			 * to the boot processor should be de-prioritized by the
1523 			 * scheduler, so that threads use the cores with better L2
1524 			 * preferentially.
1525 			 */
1526 			processor_set_primary(processor, master_processor);
1527 		}
1528 	}
1529 
1530 	*processor_out = processor;
1531 	*ipi_handler_out = cpu_signal_handler;
1532 #if CPMU_AIC_PMI && CONFIG_CPU_COUNTERS
1533 	*pmi_handler_out = mt_cpmu_aic_pmi;
1534 #else
1535 	*pmi_handler_out = NULL;
1536 #endif /* CPMU_AIC_PMI && CONFIG_CPU_COUNTERS */
1537 	if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
1538 		*in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1539 	}
1540 
1541 #if CONFIG_CPU_COUNTERS
1542 	if (kpc_register_cpu(this_cpu_datap) != TRUE) {
1543 		goto processor_register_error;
1544 	}
1545 #endif /* CONFIG_CPU_COUNTERS */
1546 
1547 
1548 	if (!is_boot_cpu) {
1549 		random_cpu_init(this_cpu_datap->cpu_number);
1550 		// now let next CPU register itself
1551 		OSIncrementAtomic((SInt32*)&real_ncpus);
1552 	}
1553 
1554 	return KERN_SUCCESS;
1555 
1556 processor_register_error:
1557 #if CONFIG_CPU_COUNTERS
1558 	kpc_unregister_cpu(this_cpu_datap);
1559 #endif /* CONFIG_CPU_COUNTERS */
1560 	if (!is_boot_cpu) {
1561 		cpu_data_free(this_cpu_datap);
1562 	}
1563 
1564 	return KERN_FAILURE;
1565 }
1566 
1567 void
ml_init_arm_debug_interface(void * in_cpu_datap,vm_offset_t virt_address)1568 ml_init_arm_debug_interface(
1569 	void * in_cpu_datap,
1570 	vm_offset_t virt_address)
1571 {
1572 	((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1573 	do_debugid();
1574 }
1575 
1576 /*
1577  *	Routine:        init_ast_check
1578  *	Function:
1579  */
1580 void
init_ast_check(__unused processor_t processor)1581 init_ast_check(
1582 	__unused processor_t processor)
1583 {
1584 }
1585 
1586 /*
1587  *	Routine:        cause_ast_check
1588  *	Function:
1589  */
1590 void
cause_ast_check(processor_t processor)1591 cause_ast_check(
1592 	processor_t processor)
1593 {
1594 	if (current_processor() != processor) {
1595 		cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1596 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1597 	}
1598 }
1599 
1600 extern uint32_t cpu_idle_count;
1601 
1602 void
ml_get_power_state(boolean_t * icp,boolean_t * pidlep)1603 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1604 {
1605 	*icp = ml_at_interrupt_context();
1606 	*pidlep = (cpu_idle_count == real_ncpus);
1607 }
1608 
1609 /*
1610  *	Routine:        ml_cause_interrupt
1611  *	Function:	Generate a fake interrupt
1612  */
1613 void
ml_cause_interrupt(void)1614 ml_cause_interrupt(void)
1615 {
1616 	return;                 /* BS_XXX */
1617 }
1618 
1619 /* Map memory map IO space */
1620 vm_offset_t
ml_io_map(vm_offset_t phys_addr,vm_size_t size)1621 ml_io_map(
1622 	vm_offset_t phys_addr,
1623 	vm_size_t size)
1624 {
1625 	return io_map(phys_addr, size, VM_WIMG_IO, VM_PROT_DEFAULT, false);
1626 }
1627 
1628 /* Map memory map IO space (with protections specified) */
1629 vm_offset_t
ml_io_map_with_prot(vm_offset_t phys_addr,vm_size_t size,vm_prot_t prot)1630 ml_io_map_with_prot(
1631 	vm_offset_t phys_addr,
1632 	vm_size_t size,
1633 	vm_prot_t prot)
1634 {
1635 	return io_map(phys_addr, size, VM_WIMG_IO, prot, false);
1636 }
1637 
1638 vm_offset_t
ml_io_map_unmappable(vm_offset_t phys_addr,vm_size_t size,unsigned int flags)1639 ml_io_map_unmappable(
1640 	vm_offset_t             phys_addr,
1641 	vm_size_t               size,
1642 	unsigned int            flags)
1643 {
1644 	return io_map(phys_addr, size, flags, VM_PROT_DEFAULT, true);
1645 }
1646 
1647 vm_offset_t
ml_io_map_wcomb(vm_offset_t phys_addr,vm_size_t size)1648 ml_io_map_wcomb(
1649 	vm_offset_t phys_addr,
1650 	vm_size_t size)
1651 {
1652 	return io_map(phys_addr, size, VM_WIMG_WCOMB, VM_PROT_DEFAULT, false);
1653 }
1654 
1655 void
ml_io_unmap(vm_offset_t addr,vm_size_t sz)1656 ml_io_unmap(vm_offset_t addr, vm_size_t sz)
1657 {
1658 	pmap_remove(kernel_pmap, addr, addr + sz);
1659 	kmem_free(kernel_map, addr, sz);
1660 }
1661 
1662 vm_map_address_t
ml_map_high_window(vm_offset_t phys_addr,vm_size_t len)1663 ml_map_high_window(
1664 	vm_offset_t     phys_addr,
1665 	vm_size_t       len)
1666 {
1667 	return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1668 }
1669 
1670 vm_offset_t
ml_static_ptovirt(vm_offset_t paddr)1671 ml_static_ptovirt(
1672 	vm_offset_t paddr)
1673 {
1674 	return phystokv(paddr);
1675 }
1676 
1677 vm_offset_t
ml_static_slide(vm_offset_t vaddr)1678 ml_static_slide(
1679 	vm_offset_t vaddr)
1680 {
1681 	vm_offset_t slid_vaddr = 0;
1682 
1683 #if CONFIG_SPTM
1684 	if ((vaddr >= vm_sptm_offsets.unslid_base) && (vaddr < vm_sptm_offsets.unslid_top)) {
1685 		slid_vaddr = vaddr + vm_sptm_offsets.slide;
1686 	} else if ((vaddr >= vm_txm_offsets.unslid_base) && (vaddr < vm_txm_offsets.unslid_top)) {
1687 		slid_vaddr = vaddr + vm_txm_offsets.slide;
1688 	} else
1689 #endif /* CONFIG_SPTM */
1690 	{
1691 		slid_vaddr = vaddr + vm_kernel_slide;
1692 	}
1693 
1694 	if (!VM_KERNEL_IS_SLID(slid_vaddr)) {
1695 		/* This is only intended for use on static kernel addresses. */
1696 		return 0;
1697 	}
1698 
1699 	return slid_vaddr;
1700 }
1701 
1702 vm_offset_t
ml_static_unslide(vm_offset_t vaddr)1703 ml_static_unslide(
1704 	vm_offset_t vaddr)
1705 {
1706 	if (!VM_KERNEL_IS_SLID(vaddr)) {
1707 		/* This is only intended for use on static kernel addresses. */
1708 		return 0;
1709 	}
1710 
1711 #if CONFIG_SPTM
1712 	/**
1713 	 * Addresses coming from the SPTM and TXM have a different slide than the
1714 	 * rest of the kernel.
1715 	 */
1716 	if ((vaddr >= vm_sptm_offsets.slid_base) && (vaddr < vm_sptm_offsets.slid_top)) {
1717 		return vaddr - vm_sptm_offsets.slide;
1718 	}
1719 
1720 	if ((vaddr >= vm_txm_offsets.slid_base) && (vaddr < vm_txm_offsets.slid_top)) {
1721 		return vaddr - vm_txm_offsets.slide;
1722 	}
1723 #endif /* CONFIG_SPTM */
1724 
1725 	return vaddr - vm_kernel_slide;
1726 }
1727 
1728 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1729 
1730 kern_return_t
ml_static_protect(vm_offset_t vaddr,vm_size_t size,vm_prot_t new_prot __unused)1731 ml_static_protect(
1732 	vm_offset_t vaddr, /* kernel virtual address */
1733 	vm_size_t size,
1734 	vm_prot_t new_prot __unused)
1735 {
1736 #if CONFIG_SPTM
1737 	/**
1738 	 * Retype any frames that may be passed to the VM to XNU_DEFAULT.
1739 	 */
1740 	for (vm_offset_t sptm_vaddr_cur = vaddr; sptm_vaddr_cur < trunc_page_64(vaddr + size); sptm_vaddr_cur += PAGE_SIZE) {
1741 		/* Check if this frame is XNU_DEFAULT and only retype it if is not */
1742 		sptm_paddr_t sptm_paddr_cur = kvtophys_nofail(sptm_vaddr_cur);
1743 		sptm_frame_type_t current_type = sptm_get_frame_type(sptm_paddr_cur);
1744 		if (current_type != XNU_DEFAULT) {
1745 			sptm_retype_params_t retype_params = {.raw = SPTM_RETYPE_PARAMS_NULL};
1746 			sptm_retype(sptm_paddr_cur, current_type, XNU_DEFAULT, retype_params);
1747 		}
1748 	}
1749 
1750 	return KERN_SUCCESS;
1751 #else /* CONFIG_SPTM */
1752 	pt_entry_t    arm_prot = 0;
1753 	pt_entry_t    arm_block_prot = 0;
1754 	vm_offset_t   vaddr_cur;
1755 	ppnum_t       ppn;
1756 	kern_return_t result = KERN_SUCCESS;
1757 
1758 	if (vaddr < physmap_base) {
1759 		panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) physmap_base);
1760 		return KERN_FAILURE;
1761 	}
1762 
1763 	assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1764 
1765 	if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1766 		panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1767 	}
1768 	if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
1769 		panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
1770 	}
1771 
1772 	/* Set up the protection bits, and block bits so we can validate block mappings. */
1773 	if (new_prot & VM_PROT_WRITE) {
1774 		arm_prot |= ARM_PTE_AP(AP_RWNA);
1775 		arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1776 	} else {
1777 		arm_prot |= ARM_PTE_AP(AP_RONA);
1778 		arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1779 	}
1780 
1781 	arm_prot |= ARM_PTE_NX;
1782 	arm_block_prot |= ARM_TTE_BLOCK_NX;
1783 
1784 	if (!(new_prot & VM_PROT_EXECUTE)) {
1785 		arm_prot |= ARM_PTE_PNX;
1786 		arm_block_prot |= ARM_TTE_BLOCK_PNX;
1787 	}
1788 
1789 	for (vaddr_cur = vaddr;
1790 	    vaddr_cur < trunc_page_64(vaddr + size);
1791 	    vaddr_cur += PAGE_SIZE) {
1792 		ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1793 		if (ppn != (vm_offset_t) NULL) {
1794 			tt_entry_t      *tte2;
1795 			pt_entry_t      *pte_p;
1796 			pt_entry_t      ptmp;
1797 
1798 #if XNU_MONITOR
1799 			assert(!pmap_is_monitor(ppn));
1800 			assert(!TEST_PAGE_RATIO_4);
1801 #endif
1802 
1803 			tte2 = arm_kva_to_tte(vaddr_cur);
1804 
1805 			if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1806 				if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1807 				    ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1808 					/*
1809 					 * We can support ml_static_protect on a block mapping if the mapping already has
1810 					 * the desired protections.  We still want to run checks on a per-page basis.
1811 					 */
1812 					continue;
1813 				}
1814 
1815 				result = KERN_FAILURE;
1816 				break;
1817 			}
1818 
1819 			pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1820 			ptmp = *pte_p;
1821 
1822 			if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1823 				/*
1824 				 * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1825 				 * protections do not match the desired protections, then we will fail (as we cannot update
1826 				 * this mapping without updating other mappings as well).
1827 				 */
1828 				result = KERN_FAILURE;
1829 				break;
1830 			}
1831 
1832 			__unreachable_ok_push
1833 			if (TEST_PAGE_RATIO_4) {
1834 				{
1835 					unsigned int    i;
1836 					pt_entry_t      *ptep_iter;
1837 
1838 					ptep_iter = pte_p;
1839 					for (i = 0; i < 4; i++, ptep_iter++) {
1840 						/* Note that there is a hole in the HINT sanity checking here. */
1841 						ptmp = *ptep_iter;
1842 
1843 						/* We only need to update the page tables if the protections do not match. */
1844 						if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1845 							ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1846 							*ptep_iter = ptmp;
1847 						}
1848 					}
1849 				}
1850 			} else {
1851 				ptmp = *pte_p;
1852 				/* We only need to update the page tables if the protections do not match. */
1853 				if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1854 					ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1855 					*pte_p = ptmp;
1856 				}
1857 			}
1858 			__unreachable_ok_pop
1859 		}
1860 	}
1861 
1862 	if (vaddr_cur > vaddr) {
1863 		assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1864 		flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1865 	}
1866 
1867 
1868 	return result;
1869 #endif /* CONFIG_SPTM */
1870 }
1871 
1872 #if defined(CONFIG_SPTM)
1873 /*
1874  * Returns true if the given physical address is in one of the boot kernelcache ranges.
1875  */
1876 static bool
ml_physaddr_in_bootkc_range(vm_offset_t physaddr)1877 ml_physaddr_in_bootkc_range(vm_offset_t physaddr)
1878 {
1879 	for (int i = 0; i < arm_vm_kernelcache_numranges; i++) {
1880 		if (physaddr >= arm_vm_kernelcache_ranges[i].start_phys && physaddr < arm_vm_kernelcache_ranges[i].end_phys) {
1881 			return true;
1882 		}
1883 	}
1884 	return false;
1885 }
1886 #endif /* defined(CONFIG_SPTM) */
1887 
1888 /*
1889  *	Routine:        ml_static_mfree
1890  *	Function:
1891  */
1892 void
ml_static_mfree(vm_offset_t vaddr,vm_size_t size)1893 ml_static_mfree(
1894 	vm_offset_t vaddr,
1895 	vm_size_t   size)
1896 {
1897 	vm_offset_t vaddr_cur;
1898 	vm_offset_t paddr_cur;
1899 	ppnum_t     ppn;
1900 	uint32_t    freed_pages = 0;
1901 	uint32_t    freed_kernelcache_pages = 0;
1902 
1903 
1904 	/* It is acceptable (if bad) to fail to free. */
1905 	if (vaddr < physmap_base) {
1906 		return;
1907 	}
1908 
1909 	assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1910 
1911 	for (vaddr_cur = vaddr;
1912 	    vaddr_cur < trunc_page_64(vaddr + size);
1913 	    vaddr_cur += PAGE_SIZE) {
1914 		ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1915 		if (ppn != (vm_offset_t) NULL) {
1916 			/*
1917 			 * It is not acceptable to fail to update the protections on a page
1918 			 * we will release to the VM.  We need to either panic or continue.
1919 			 * For now, we'll panic (to help flag if there is memory we can
1920 			 * reclaim).
1921 			 */
1922 			if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1923 				panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1924 			}
1925 
1926 			paddr_cur = ptoa(ppn);
1927 
1928 
1929 			vm_page_create(ppn, (ppn + 1));
1930 			freed_pages++;
1931 #if defined(CONFIG_SPTM)
1932 			if (ml_physaddr_in_bootkc_range(paddr_cur)) {
1933 #else
1934 			if (paddr_cur >= arm_vm_kernelcache_phys_start && paddr_cur < arm_vm_kernelcache_phys_end) {
1935 #endif
1936 				freed_kernelcache_pages++;
1937 			}
1938 		}
1939 	}
1940 	vm_page_lockspin_queues();
1941 	vm_page_wire_count -= freed_pages;
1942 	vm_page_wire_count_initial -= freed_pages;
1943 	vm_page_kernelcache_count -= freed_kernelcache_pages;
1944 	vm_page_unlock_queues();
1945 #if     DEBUG
1946 	kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x, +%d bad\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn, bad_page_cnt);
1947 #endif
1948 }
1949 
1950 /*
1951  * Routine: ml_page_protection_type
1952  * Function: Returns the type of page protection that the system supports.
1953  */
1954 ml_page_protection_t
1955 ml_page_protection_type(void)
1956 {
1957 #if CONFIG_SPTM
1958 	return 2;
1959 #elif XNU_MONITOR
1960 	return 1;
1961 #else
1962 	return 0;
1963 #endif
1964 }
1965 
1966 /* virtual to physical on wired pages */
1967 vm_offset_t
1968 ml_vtophys(vm_offset_t vaddr)
1969 {
1970 	return kvtophys(vaddr);
1971 }
1972 
1973 /*
1974  * Routine: ml_nofault_copy
1975  * Function: Perform a physical mode copy if the source and destination have
1976  * valid translations in the kernel pmap. If translations are present, they are
1977  * assumed to be wired; e.g., no attempt is made to guarantee that the
1978  * translations obtained remain valid for the duration of the copy process.
1979  */
1980 vm_size_t
1981 ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1982 {
1983 	addr64_t        cur_phys_dst, cur_phys_src;
1984 	vm_size_t       count, nbytes = 0;
1985 
1986 	while (size > 0) {
1987 		if (!(cur_phys_src = kvtophys(virtsrc))) {
1988 			break;
1989 		}
1990 		if (!(cur_phys_dst = kvtophys(virtdst))) {
1991 			break;
1992 		}
1993 		if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
1994 		    !pmap_valid_address(trunc_page_64(cur_phys_src))) {
1995 			break;
1996 		}
1997 		count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
1998 		if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
1999 			count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
2000 		}
2001 		if (count > size) {
2002 			count = size;
2003 		}
2004 
2005 		bcopy_phys(cur_phys_src, cur_phys_dst, count);
2006 
2007 		nbytes += count;
2008 		virtsrc += count;
2009 		virtdst += count;
2010 		size -= count;
2011 	}
2012 
2013 	return nbytes;
2014 }
2015 
2016 /*
2017  *	Routine:        ml_validate_nofault
2018  *	Function: Validate that ths address range has a valid translations
2019  *			in the kernel pmap.  If translations are present, they are
2020  *			assumed to be wired; i.e. no attempt is made to guarantee
2021  *			that the translation persist after the check.
2022  *  Returns: TRUE if the range is mapped and will not cause a fault,
2023  *			FALSE otherwise.
2024  */
2025 
2026 boolean_t
2027 ml_validate_nofault(
2028 	vm_offset_t virtsrc, vm_size_t size)
2029 {
2030 	addr64_t cur_phys_src;
2031 	uint32_t count;
2032 
2033 	while (size > 0) {
2034 		if (!(cur_phys_src = kvtophys(virtsrc))) {
2035 			return FALSE;
2036 		}
2037 		if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
2038 			return FALSE;
2039 		}
2040 		count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
2041 		if (count > size) {
2042 			count = (uint32_t)size;
2043 		}
2044 
2045 		virtsrc += count;
2046 		size -= count;
2047 	}
2048 
2049 	return TRUE;
2050 }
2051 
2052 void
2053 ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
2054 {
2055 	*phys_addr = 0;
2056 	*size = 0;
2057 }
2058 
2059 void
2060 active_rt_threads(__unused boolean_t active)
2061 {
2062 }
2063 
2064 static void
2065 cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
2066 {
2067 	return;
2068 }
2069 
2070 cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
2071 
2072 void
2073 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
2074 {
2075 	if (cpu_qos_cb != NULL) {
2076 		cpu_qos_update = cpu_qos_cb;
2077 	} else {
2078 		cpu_qos_update = cpu_qos_cb_default;
2079 	}
2080 }
2081 
2082 void
2083 thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
2084 {
2085 	SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
2086 
2087 	cpu_qos_update((int)urgency, rt_period, rt_deadline);
2088 
2089 	SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
2090 }
2091 
2092 void
2093 machine_run_count(__unused uint32_t count)
2094 {
2095 }
2096 
2097 processor_t
2098 machine_choose_processor(__unused processor_set_t pset, processor_t processor)
2099 {
2100 	return processor;
2101 }
2102 
2103 #if KASAN
2104 vm_offset_t ml_stack_base(void);
2105 vm_size_t ml_stack_size(void);
2106 
2107 vm_offset_t
2108 ml_stack_base(void)
2109 {
2110 	uintptr_t local = (uintptr_t) &local;
2111 	vm_offset_t     intstack_top_ptr;
2112 
2113 	intstack_top_ptr = getCpuDatap()->intstack_top;
2114 	if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
2115 		return intstack_top_ptr - INTSTACK_SIZE;
2116 	} else {
2117 		return current_thread()->kernel_stack;
2118 	}
2119 }
2120 vm_size_t
2121 ml_stack_size(void)
2122 {
2123 	uintptr_t local = (uintptr_t) &local;
2124 	vm_offset_t     intstack_top_ptr;
2125 
2126 	intstack_top_ptr = getCpuDatap()->intstack_top;
2127 	if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
2128 		return INTSTACK_SIZE;
2129 	} else {
2130 		return kernel_stack_size;
2131 	}
2132 }
2133 #endif
2134 
2135 #ifdef CONFIG_KCOV
2136 
2137 kcov_cpu_data_t *
2138 current_kcov_data(void)
2139 {
2140 	return &current_cpu_datap()->cpu_kcov_data;
2141 }
2142 
2143 kcov_cpu_data_t *
2144 cpu_kcov_data(int cpuid)
2145 {
2146 	return &cpu_datap(cpuid)->cpu_kcov_data;
2147 }
2148 
2149 #endif /* CONFIG_KCOV */
2150 
2151 boolean_t
2152 machine_timeout_suspended(void)
2153 {
2154 	return FALSE;
2155 }
2156 
2157 kern_return_t
2158 ml_interrupt_prewarm(__unused uint64_t deadline)
2159 {
2160 	return KERN_FAILURE;
2161 }
2162 
2163 /*
2164  * Assumes fiq, irq disabled.
2165  */
2166 void
2167 ml_set_decrementer(uint32_t dec_value)
2168 {
2169 	cpu_data_t      *cdp = getCpuDatap();
2170 
2171 	assert(ml_get_interrupts_enabled() == FALSE);
2172 	cdp->cpu_decrementer = dec_value;
2173 
2174 	if (cdp->cpu_set_decrementer_func) {
2175 		cdp->cpu_set_decrementer_func(dec_value);
2176 	} else {
2177 		__builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value);
2178 	}
2179 }
2180 
2181 /**
2182  * Perform a read of the timebase which is permitted to be executed
2183  * speculatively and/or out of program order.
2184  */
2185 static inline uint64_t
2186 speculative_timebase(void)
2187 {
2188 	return __builtin_arm_rsr64("CNTVCT_EL0");
2189 }
2190 
2191 /**
2192  * Read a non-speculative view of the timebase if one is available,
2193  * otherwise fallback on an ISB to prevent prevent speculation and
2194  * enforce ordering.
2195  */
2196 static inline uint64_t
2197 nonspeculative_timebase(void)
2198 {
2199 #if defined(HAS_ACNTVCT)
2200 	return __builtin_arm_rsr64("ACNTVCT_EL0");
2201 #elif __ARM_ARCH_8_6__
2202 	return __builtin_arm_rsr64("CNTVCTSS_EL0");
2203 #else
2204 	// ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
2205 	// "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
2206 	// to other instructions executed on the same processor."
2207 	__builtin_arm_isb(ISB_SY);
2208 	return speculative_timebase();
2209 #endif
2210 }
2211 
2212 
2213 uint64_t
2214 ml_get_hwclock()
2215 {
2216 	uint64_t timebase = nonspeculative_timebase();
2217 	return timebase;
2218 }
2219 
2220 uint64_t
2221 ml_get_timebase()
2222 {
2223 	uint64_t clock, timebase;
2224 
2225 	//the retry is for the case where S2R catches us in the middle of this. see rdar://77019633
2226 	do {
2227 		timebase = getCpuDatap()->cpu_base_timebase;
2228 		os_compiler_barrier();
2229 		clock = ml_get_hwclock();
2230 		os_compiler_barrier();
2231 	} while (getCpuDatap()->cpu_base_timebase != timebase);
2232 
2233 	return clock + timebase;
2234 }
2235 
2236 /**
2237  * Issue a barrier that guarantees all prior memory accesses will complete
2238  * before any subsequent timebase reads.
2239  */
2240 void
2241 ml_memory_to_timebase_fence(void)
2242 {
2243 	__builtin_arm_dmb(DMB_SY);
2244 	const uint64_t take_backwards_branch = 0;
2245 	asm volatile (
2246         "1:"
2247                 "ldr	x0, [%[take_backwards_branch]]" "\n"
2248                 "cbnz	x0, 1b"                         "\n"
2249                 :
2250                 : [take_backwards_branch] "r"(&take_backwards_branch)
2251                 : "x0"
2252         );
2253 
2254 	/* throwaway read to prevent ml_get_speculative_timebase() reordering */
2255 	(void)ml_get_hwclock();
2256 }
2257 
2258 /**
2259  * Issue a barrier that guarantees all prior timebase reads will
2260  * be ordered before any subsequent memory accesses.
2261  */
2262 void
2263 ml_timebase_to_memory_fence(void)
2264 {
2265 	__builtin_arm_isb(ISB_SY);
2266 }
2267 
2268 /*
2269  * Get the speculative timebase without an ISB.
2270  */
2271 uint64_t
2272 ml_get_speculative_timebase(void)
2273 {
2274 	uint64_t clock, timebase;
2275 
2276 	//the retry is for the case where S2R catches us in the middle of this. see rdar://77019633&77697482
2277 	do {
2278 		timebase = getCpuDatap()->cpu_base_timebase;
2279 		os_compiler_barrier();
2280 		clock = speculative_timebase();
2281 
2282 		os_compiler_barrier();
2283 	} while (getCpuDatap()->cpu_base_timebase != timebase);
2284 
2285 	return clock + timebase;
2286 }
2287 
2288 uint64_t
2289 ml_get_timebase_entropy(void)
2290 {
2291 	return ml_get_speculative_timebase();
2292 }
2293 
2294 uint32_t
2295 ml_get_decrementer(void)
2296 {
2297 	cpu_data_t *cdp = getCpuDatap();
2298 	uint32_t dec;
2299 
2300 	assert(ml_get_interrupts_enabled() == FALSE);
2301 
2302 	if (cdp->cpu_get_decrementer_func) {
2303 		dec = cdp->cpu_get_decrementer_func();
2304 	} else {
2305 		uint64_t wide_val;
2306 
2307 		wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0");
2308 		dec = (uint32_t)wide_val;
2309 		assert(wide_val == (uint64_t)dec);
2310 	}
2311 
2312 	return dec;
2313 }
2314 
2315 boolean_t
2316 ml_get_timer_pending(void)
2317 {
2318 	uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0");
2319 	return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
2320 }
2321 
2322 __attribute__((noreturn))
2323 void
2324 platform_syscall(arm_saved_state_t *state)
2325 {
2326 	uint32_t code;
2327 
2328 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
2329 
2330 	code = (uint32_t)get_saved_state_reg(state, 3);
2331 
2332 	KDBG(MACHDBG_CODE(DBG_MACH_MACHDEP_EXCP_SC_ARM, code) | DBG_FUNC_START,
2333 	    get_saved_state_reg(state, 0),
2334 	    get_saved_state_reg(state, 1),
2335 	    get_saved_state_reg(state, 2));
2336 
2337 	switch (code) {
2338 	case 2:
2339 		/* set cthread */
2340 		platform_syscall_kprintf("set cthread self.\n");
2341 		thread_set_cthread_self(get_saved_state_reg(state, 0));
2342 		break;
2343 	case 3:
2344 		/* get cthread */
2345 		platform_syscall_kprintf("get cthread self.\n");
2346 		set_user_saved_state_reg(state, 0, thread_get_cthread_self());
2347 		break;
2348 	case 0: /* I-Cache flush (removed) */
2349 	case 1: /* D-Cache flush (removed) */
2350 	default:
2351 		platform_syscall_kprintf("unknown: %d\n", code);
2352 		break;
2353 	}
2354 
2355 	KDBG(MACHDBG_CODE(DBG_MACH_MACHDEP_EXCP_SC_ARM, code) | DBG_FUNC_END,
2356 	    get_saved_state_reg(state, 0));
2357 
2358 	thread_exception_return();
2359 }
2360 
2361 static void
2362 _enable_timebase_event_stream(uint32_t bit_index)
2363 {
2364 	uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
2365 
2366 	if (bit_index >= 64) {
2367 		panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2368 	}
2369 
2370 	__asm__ volatile ("mrs	%0, CNTKCTL_EL1" : "=r"(cntkctl));
2371 
2372 	cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2373 	cntkctl |= CNTKCTL_EL1_EVNTEN;
2374 	cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2375 
2376 	/*
2377 	 * If the SOC supports it (and it isn't broken), enable
2378 	 * EL0 access to the timebase registers.
2379 	 */
2380 	if (user_timebase_type() != USER_TIMEBASE_NONE) {
2381 		cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
2382 	}
2383 
2384 	__builtin_arm_wsr64("CNTKCTL_EL1", cntkctl);
2385 }
2386 
2387 /*
2388  * Turn timer on, unmask that interrupt.
2389  */
2390 static void
2391 _enable_virtual_timer(void)
2392 {
2393 	uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
2394 
2395 	__builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl);
2396 	/* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
2397 	__builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
2398 }
2399 
2400 void
2401 fiq_context_init(boolean_t enable_fiq __unused)
2402 {
2403 	/* Interrupts still disabled. */
2404 	assert(ml_get_interrupts_enabled() == FALSE);
2405 	_enable_virtual_timer();
2406 }
2407 
2408 void
2409 wfe_timeout_init(void)
2410 {
2411 	_enable_timebase_event_stream(arm64_eventi);
2412 }
2413 
2414 /**
2415  * Configures, but does not enable, the WFE event stream. The event stream
2416  * generates an event at a set interval to act as a timeout for WFEs.
2417  *
2418  * This function sets the static global variable arm64_eventi to be the proper
2419  * bit index for the CNTKCTL_EL1.EVENTI field to generate events at the correct
2420  * period (1us unless specified by the "wfe_events_sec" boot-arg). arm64_eventi
2421  * is used by wfe_timeout_init to actually poke the registers and enable the
2422  * event stream.
2423  *
2424  * The CNTKCTL_EL1.EVENTI field contains the index of the bit of CNTVCT_EL0 that
2425  * is the trigger for the system to generate an event. The trigger can occur on
2426  * either the rising or falling edge of the bit depending on the value of
2427  * CNTKCTL_EL1.EVNTDIR. This is arbitrary for our purposes, so we use the
2428  * falling edge (1->0) transition to generate events.
2429  */
2430 void
2431 wfe_timeout_configure(void)
2432 {
2433 	/* Could fill in our own ops here, if we needed them */
2434 	uint64_t        ticks_per_sec, ticks_per_event, events_per_sec = 0;
2435 	uint32_t        bit_index;
2436 
2437 	if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
2438 		if (events_per_sec <= 0) {
2439 			events_per_sec = 1;
2440 		} else if (events_per_sec > USEC_PER_SEC) {
2441 			events_per_sec = USEC_PER_SEC;
2442 		}
2443 	} else {
2444 		events_per_sec = USEC_PER_SEC;
2445 	}
2446 	ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
2447 	ticks_per_event = ticks_per_sec / events_per_sec;
2448 
2449 	/* Bit index of next power of two greater than ticks_per_event */
2450 	bit_index = flsll(ticks_per_event) - 1;
2451 	/* Round up to next power of two if ticks_per_event is initially power of two */
2452 	if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
2453 		bit_index++;
2454 	}
2455 
2456 	/*
2457 	 * The timer can only trigger on rising or falling edge, not both; we don't
2458 	 * care which we trigger on, but we do need to adjust which bit we are
2459 	 * interested in to account for this.
2460 	 *
2461 	 * In particular, we set CNTKCTL_EL1.EVENTDIR to trigger events on the
2462 	 * falling edge of the given bit. Therefore, we must decrement the bit index
2463 	 * by one as when the bit before the one we care about makes a 1 -> 0
2464 	 * transition, the bit we care about makes a 0 -> 1 transition.
2465 	 *
2466 	 * For example if we want an event generated every 8 ticks (if we calculated
2467 	 * a bit_index of 3), we would want the event to be generated whenever the
2468 	 * lower four bits of the counter transition from 0b0111 -> 0b1000. We can
2469 	 * see that the bit at index 2 makes a falling transition in this scenario,
2470 	 * so we would want EVENTI to be 2 instead of 3.
2471 	 */
2472 	if (bit_index != 0) {
2473 		bit_index--;
2474 	}
2475 
2476 	arm64_eventi = bit_index;
2477 }
2478 
2479 boolean_t
2480 ml_delay_should_spin(uint64_t interval)
2481 {
2482 	cpu_data_t     *cdp = getCpuDatap();
2483 
2484 	if (cdp->cpu_idle_latency) {
2485 		return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2486 	} else {
2487 		/*
2488 		 * Early boot, latency is unknown. Err on the side of blocking,
2489 		 * which should always be safe, even if slow
2490 		 */
2491 		return FALSE;
2492 	}
2493 }
2494 
2495 boolean_t
2496 ml_thread_is64bit(thread_t thread)
2497 {
2498 	return thread_is_64bit_addr(thread);
2499 }
2500 
2501 void
2502 ml_delay_on_yield(void)
2503 {
2504 #if DEVELOPMENT || DEBUG
2505 	if (yield_delay_us) {
2506 		delay(yield_delay_us);
2507 	}
2508 #endif
2509 }
2510 
2511 void
2512 ml_timer_evaluate(void)
2513 {
2514 }
2515 
2516 boolean_t
2517 ml_timer_forced_evaluation(void)
2518 {
2519 	return FALSE;
2520 }
2521 
2522 void
2523 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2524 {
2525 	/*
2526 	 * For now: update the resource coalition stats of the
2527 	 * current thread's coalition
2528 	 */
2529 	task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
2530 }
2531 
2532 uint64_t
2533 ml_gpu_stat(__unused thread_t t)
2534 {
2535 	return 0;
2536 }
2537 
2538 thread_t
2539 current_thread(void)
2540 {
2541 	return current_thread_fast();
2542 }
2543 
2544 #if defined(HAS_APPLE_PAC)
2545 uint8_t
2546 ml_task_get_disable_user_jop(task_t task)
2547 {
2548 	assert(task);
2549 	return task->disable_user_jop;
2550 }
2551 
2552 void
2553 ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
2554 {
2555 	assert(task);
2556 	task->disable_user_jop = disable_user_jop;
2557 }
2558 
2559 void
2560 ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
2561 {
2562 	assert(thread);
2563 	if (disable_user_jop) {
2564 		thread->machine.arm_machine_flags |= ARM_MACHINE_THREAD_DISABLE_USER_JOP;
2565 	} else {
2566 		thread->machine.arm_machine_flags &= ~ARM_MACHINE_THREAD_DISABLE_USER_JOP;
2567 	}
2568 }
2569 
2570 void
2571 ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2572 {
2573 	if (inherit) {
2574 		task->rop_pid = parent_task->rop_pid;
2575 	} else {
2576 		task->rop_pid = early_random();
2577 	}
2578 }
2579 
2580 /**
2581  * jop_pid may be inherited from the parent task or generated inside the shared
2582  * region.  Unfortunately these two parameters are available at very different
2583  * times during task creation, so we need to split this into two steps.
2584  */
2585 void
2586 ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit, boolean_t disable_user_jop)
2587 {
2588 	if (inherit) {
2589 		task->jop_pid = parent_task->jop_pid;
2590 	} else if (disable_user_jop) {
2591 		task->jop_pid = ml_non_arm64e_user_jop_pid();
2592 	} else {
2593 		task->jop_pid = ml_default_jop_pid();
2594 	}
2595 }
2596 
2597 void
2598 ml_task_set_jop_pid_from_shared_region(task_t task, boolean_t disable_user_jop)
2599 {
2600 	if (disable_user_jop) {
2601 		task->jop_pid = ml_non_arm64e_user_jop_pid();
2602 		return;
2603 	}
2604 
2605 	vm_shared_region_t sr = vm_shared_region_get(task);
2606 	/*
2607 	 * If there's no shared region, we can assign the key arbitrarily.  This
2608 	 * typically happens when Mach-O image activation failed part of the way
2609 	 * through, and this task is in the middle of dying with SIGKILL anyway.
2610 	 */
2611 	if (__improbable(!sr)) {
2612 		task->jop_pid = early_random();
2613 		return;
2614 	}
2615 	vm_shared_region_deallocate(sr);
2616 
2617 	/*
2618 	 * Similarly we have to worry about jetsam having killed the task and
2619 	 * already cleared the shared_region_id.
2620 	 */
2621 	task_lock(task);
2622 	if (task->shared_region_id != NULL) {
2623 		task->jop_pid = shared_region_find_key(task->shared_region_id);
2624 	} else {
2625 		task->jop_pid = early_random();
2626 	}
2627 	task_unlock(task);
2628 }
2629 
2630 void
2631 ml_thread_set_jop_pid(thread_t thread, task_t task)
2632 {
2633 	thread->machine.jop_pid = task->jop_pid;
2634 }
2635 #endif /* defined(HAS_APPLE_PAC) */
2636 
2637 #if DEVELOPMENT || DEBUG
2638 static uint64_t minor_badness_suffered = 0;
2639 #endif
2640 void
2641 ml_report_minor_badness(uint32_t __unused badness_id)
2642 {
2643 	#if DEVELOPMENT || DEBUG
2644 	(void)os_atomic_or(&minor_badness_suffered, 1ULL << badness_id, relaxed);
2645 	#endif
2646 }
2647 
2648 #if defined(HAS_APPLE_PAC)
2649 #if __ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM
2650 /**
2651  * The ARMv8.6 implementation is also safe for non-FPAC CPUs, but less efficient;
2652  * guest kernels need to use it because it does not know at compile time whether
2653  * the host CPU supports FPAC.
2654  */
2655 
2656 /**
2657  * Emulates the poisoning done by ARMv8.3-PAuth instructions on auth failure.
2658  */
2659 static void *
2660 ml_poison_ptr(void *ptr, ptrauth_key key)
2661 {
2662 	bool b_key = key & (1ULL << 0);
2663 	uint64_t error_code;
2664 	if (b_key) {
2665 		error_code = 2;
2666 	} else {
2667 		error_code = 1;
2668 	}
2669 
2670 	bool kernel_pointer = (uintptr_t)ptr & (1ULL << 55);
2671 	bool data_key = key & (1ULL << 1);
2672 	/* When PAC is enabled, only userspace data pointers use TBI, regardless of boot parameters */
2673 	bool tbi = data_key && !kernel_pointer;
2674 	unsigned int poison_shift;
2675 	if (tbi) {
2676 		poison_shift = 53;
2677 	} else {
2678 		poison_shift = 61;
2679 	}
2680 
2681 	uintptr_t poisoned = (uintptr_t)ptr;
2682 	poisoned &= ~(3ULL << poison_shift);
2683 	poisoned |= error_code << poison_shift;
2684 	return (void *)poisoned;
2685 }
2686 
2687 /*
2688  * ptrauth_sign_unauthenticated() reimplemented using asm volatile, forcing the
2689  * compiler to assume this operation has side-effects and cannot be reordered
2690  */
2691 #define ptrauth_sign_volatile(__value, __suffix, __data)                \
2692 	({                                                              \
2693 	        void *__ret = __value;                                  \
2694 	        asm volatile (                                          \
2695 	                "pac" #__suffix "	%[value], %[data]"          \
2696 	                : [value] "+r"(__ret)                           \
2697 	                : [data] "r"(__data)                            \
2698 	        );                                                      \
2699 	        __ret;                                                  \
2700 	})
2701 
2702 #define ml_auth_ptr_unchecked_for_key(_ptr, _suffix, _key, _modifier)                           \
2703 	do {                                                                                    \
2704 	        void *stripped = ptrauth_strip(_ptr, _key);                                     \
2705 	        void *reauthed = ptrauth_sign_volatile(stripped, _suffix, _modifier);           \
2706 	        if (__probable(_ptr == reauthed)) {                                             \
2707 	                _ptr = stripped;                                                        \
2708 	        } else {                                                                        \
2709 	                _ptr = ml_poison_ptr(stripped, _key);                                   \
2710 	        }                                                                               \
2711 	} while (0)
2712 
2713 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2714 	ml_auth_ptr_unchecked_for_key(_ptr, _suffix, ptrauth_key_as ## _suffix, _modifier)
2715 #else
2716 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2717 	asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
2718 #endif /* __ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM */
2719 
2720 /**
2721  * Authenticates a signed pointer without trapping on failure.
2722  *
2723  * @warning This function must be called with interrupts disabled.
2724  *
2725  * @warning Pointer authentication failure should normally be treated as a fatal
2726  * error.  This function is intended for a handful of callers that cannot panic
2727  * on failure, and that understand the risks in handling a poisoned return
2728  * value.  Other code should generally use the trapping variant
2729  * ptrauth_auth_data() instead.
2730  *
2731  * @param ptr the pointer to authenticate
2732  * @param key which key to use for authentication
2733  * @param modifier a modifier to mix into the key
2734  * @return an authenticated version of ptr, possibly with poison bits set
2735  */
2736 void *
2737 ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
2738 {
2739 	switch (key & 0x3) {
2740 	case ptrauth_key_asia:
2741 		_ml_auth_ptr_unchecked(ptr, ia, modifier);
2742 		break;
2743 	case ptrauth_key_asib:
2744 		_ml_auth_ptr_unchecked(ptr, ib, modifier);
2745 		break;
2746 	case ptrauth_key_asda:
2747 		_ml_auth_ptr_unchecked(ptr, da, modifier);
2748 		break;
2749 	case ptrauth_key_asdb:
2750 		_ml_auth_ptr_unchecked(ptr, db, modifier);
2751 		break;
2752 	}
2753 
2754 	return ptr;
2755 }
2756 #endif /* defined(HAS_APPLE_PAC) */
2757 
2758 #ifdef CONFIG_XNUPOST
2759 void
2760 ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
2761 {
2762 	thread_t thread = current_thread();
2763 	thread->machine.expected_fault_handler = expected_fault_handler;
2764 	thread->machine.expected_fault_addr = expected_fault_addr;
2765 	thread->machine.expected_fault_pc = 0;
2766 }
2767 
2768 /** Expect an exception to be thrown at EXPECTED_FAULT_PC */
2769 void
2770 ml_expect_fault_pc_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_pc)
2771 {
2772 	thread_t thread = current_thread();
2773 	thread->machine.expected_fault_handler = expected_fault_handler;
2774 	thread->machine.expected_fault_addr = 0;
2775 	uintptr_t raw_func = (uintptr_t)ptrauth_strip(
2776 		(void *)expected_fault_pc,
2777 		ptrauth_key_function_pointer);
2778 	thread->machine.expected_fault_pc = raw_func;
2779 }
2780 
2781 void
2782 ml_expect_fault_end(void)
2783 {
2784 	thread_t thread = current_thread();
2785 	thread->machine.expected_fault_handler = NULL;
2786 	thread->machine.expected_fault_addr = 0;
2787 	thread->machine.expected_fault_pc = 0;
2788 }
2789 #endif /* CONFIG_XNUPOST */
2790 
2791 void
2792 ml_hibernate_active_pre(void)
2793 {
2794 #if HIBERNATION
2795 	if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2796 
2797 		hibernate_rebuild_vm_structs();
2798 	}
2799 #endif /* HIBERNATION */
2800 }
2801 
2802 void
2803 ml_hibernate_active_post(void)
2804 {
2805 #if HIBERNATION
2806 	if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2807 		hibernate_machine_init();
2808 		hibernate_vm_lock_end();
2809 		current_cpu_datap()->cpu_hibernate = 0;
2810 	}
2811 #endif /* HIBERNATION */
2812 }
2813 
2814 /**
2815  * Return back a machine-dependent array of address space regions that should be
2816  * reserved by the VM (pre-mapped in the address space). This will prevent user
2817  * processes from allocating or deallocating from within these regions.
2818  *
2819  * @param vm_is64bit True if the process has a 64-bit address space.
2820  * @param regions An out parameter representing an array of regions to reserve.
2821  *
2822  * @return The number of reserved regions returned through `regions`.
2823  */
2824 size_t
2825 ml_get_vm_reserved_regions(bool vm_is64bit, const struct vm_reserved_region **regions)
2826 {
2827 	assert(regions != NULL);
2828 
2829 	/**
2830 	 * Reserved regions only apply to 64-bit address spaces. This is because
2831 	 * we only expect to grow the maximum user VA address on 64-bit address spaces
2832 	 * (we've essentially already reached the max for 32-bit spaces). The reserved
2833 	 * regions should safely fall outside of the max user VA for 32-bit processes.
2834 	 */
2835 	if (vm_is64bit) {
2836 		*regions = vm_reserved_regions;
2837 		return ARRAY_COUNT(vm_reserved_regions);
2838 	} else {
2839 		/* Don't reserve any VA regions on arm64_32 processes. */
2840 		*regions = NULL;
2841 		return 0;
2842 	}
2843 }
2844 
2845 /* These WFE recommendations are expected to be updated on a relatively
2846  * infrequent cadence, possibly from a different cluster, hence
2847  * false cacheline sharing isn't expected to be material
2848  */
2849 static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
2850 
2851 uint32_t
2852 ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
2853 {
2854 	assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
2855 	assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
2856 	os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
2857 	return 0; /* Success */
2858 }
2859 
2860 #if DEVELOPMENT || DEBUG
2861 int wfe_rec_max = 0;
2862 int wfe_rec_none = 0;
2863 uint64_t wfe_rec_override_mat = 0;
2864 uint64_t wfe_rec_clamp = 0;
2865 #endif
2866 
2867 uint64_t
2868 ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
2869 {
2870 	/* This and its consumer does not synchronize vis-a-vis updates
2871 	 * of the recommendation; races are acceptable.
2872 	 */
2873 	uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
2874 #if DEVELOPMENT || DEBUG
2875 	if (wfe_rec_clamp) {
2876 		wfet = MIN(wfe_rec_clamp, wfet);
2877 	}
2878 
2879 	if (wfe_rec_max) {
2880 		for (int i = 0; i < MAX_CPU_CLUSTERS; i++) {
2881 			if (arm64_cluster_wfe_recs[i] > wfet) {
2882 				wfet = arm64_cluster_wfe_recs[i];
2883 			}
2884 		}
2885 	}
2886 
2887 	if (wfe_rec_none) {
2888 		wfet = 0;
2889 	}
2890 
2891 	if (wfe_rec_override_mat) {
2892 		wfet = wfe_rec_override_mat;
2893 	}
2894 #endif
2895 	return wfet;
2896 }
2897 
2898 __pure2 bool
2899 ml_addr_in_non_xnu_stack(__unused uintptr_t addr)
2900 {
2901 #if CONFIG_SPTM
2902 	/**
2903 	 * If the address is within one of the SPTM-allocated per-cpu stacks, then
2904 	 * return true.
2905 	 */
2906 	if ((addr >= SPTMArgs->cpu_stack_papt_start) &&
2907 	    (addr < SPTMArgs->cpu_stack_papt_end)) {
2908 		return true;
2909 	}
2910 
2911 	/**
2912 	 * If the address is within one of the TXM thread stacks, then return true.
2913 	 * The SPTM guarantees that these stacks are virtually contiguous.
2914 	 */
2915 	if ((addr >= SPTMArgs->txm_thread_stacks[0]) &&
2916 	    (addr < SPTMArgs->txm_thread_stacks[MAX_CPUS - 1])) {
2917 		return true;
2918 	}
2919 
2920 	return false;
2921 #elif XNU_MONITOR
2922 	return (addr >= (uintptr_t)pmap_stacks_start) && (addr < (uintptr_t)pmap_stacks_end);
2923 #else
2924 	return false;
2925 #endif /* CONFIG_SPTM || XNU_MONITOR */
2926 }
2927 
2928 uint64_t
2929 ml_get_backtrace_pc(struct arm_saved_state *state)
2930 {
2931 	assert((state != NULL) && is_saved_state64(state));
2932 
2933 #if CONFIG_SPTM
2934 	/**
2935 	 * On SPTM-based systems, when a non-XNU domain (e.g., SPTM) is interrupted,
2936 	 * the PC value saved into the state is not the actual PC at the interrupted
2937 	 * point, but a fixed value to a handler that knows how to re-enter the
2938 	 * interrupted domain. The interrupted domain's actual PC value is saved
2939 	 * into x14, so let's return that instead.
2940 	 */
2941 	if (ml_addr_in_non_xnu_stack(get_saved_state_fp(state))) {
2942 		return saved_state64(state)->x[14];
2943 	}
2944 #endif /* CONFIG_SPTM */
2945 
2946 	return get_saved_state_pc(state);
2947 }
2948 
2949 
2950 bool
2951 ml_paddr_is_exclaves_owned(vm_offset_t paddr)
2952 {
2953 #if CONFIG_SPTM
2954 	const sptm_frame_type_t type = sptm_get_frame_type(paddr);
2955 	return type == SK_DEFAULT || type == SK_IO;   // SK_SHARED_R[OW] are not exclusively exclaves frames
2956 #else
2957 	#pragma unused(paddr)
2958 	return false;
2959 #endif /* CONFIG_SPTM */
2960 }
2961 
2962 /**
2963  * Panic because an ARM saved-state accessor expected user saved-state but was
2964  * passed non-user saved-state.
2965  *
2966  * @param ss invalid saved-state (CPSR.M != EL0)
2967  */
2968 void
2969 ml_panic_on_invalid_old_cpsr(const arm_saved_state_t *ss)
2970 {
2971 	panic("invalid CPSR in user saved-state %p", ss);
2972 }
2973 
2974 /**
2975  * Panic because an ARM saved-state accessor was passed user saved-state and
2976  * asked to assign a non-user CPSR.
2977  *
2978  * @param ss original EL0 saved-state
2979  * @param cpsr invalid new CPSR value (CPSR.M != EL0)
2980  */
2981 void
2982 ml_panic_on_invalid_new_cpsr(const arm_saved_state_t *ss, uint32_t cpsr)
2983 {
2984 	panic("attempt to set non-user CPSR %#010x on user saved-state %p", cpsr, ss);
2985 }
2986 
2987 /**
2988  * Explicitly preallocates a floating point save area.
2989  * This is a noop on ARM because preallocation isn't required at this time.
2990  */
2991 void
2992 ml_fp_save_area_prealloc(void)
2993 {
2994 }
2995