1 /*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm64/machine_machdep.h>
30 #include <arm64/proc_reg.h>
31 #include <arm/machine_cpu.h>
32 #include <arm/cpu_internal.h>
33 #include <arm/cpuid.h>
34 #include <arm/cpu_data.h>
35 #include <arm/cpu_data_internal.h>
36 #include <arm/caches_internal.h>
37 #include <arm/misc_protos.h>
38 #include <arm/machdep_call.h>
39 #include <arm/machine_routines.h>
40 #include <arm/rtclock.h>
41 #include <arm/cpuid_internal.h>
42 #include <arm/cpu_capabilities.h>
43 #include <console/serial_protos.h>
44 #include <kern/machine.h>
45 #include <kern/misc_protos.h>
46 #include <prng/random.h>
47 #include <kern/startup.h>
48 #include <kern/thread.h>
49 #include <kern/timer_queue.h>
50 #include <mach/machine.h>
51 #include <machine/atomic.h>
52 #include <machine/config.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_shared_region.h>
56 #include <vm/vm_map.h>
57 #include <sys/codesign.h>
58 #include <sys/kdebug.h>
59 #include <kern/coalition.h>
60 #include <pexpert/device_tree.h>
61
62 #include <IOKit/IOPlatformExpert.h>
63 #if HIBERNATION
64 #include <IOKit/IOHibernatePrivate.h>
65 #endif /* HIBERNATION */
66
67 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
68 #include <arm64/amcc_rorgn.h>
69 #endif
70
71
72
73 #include <libkern/section_keywords.h>
74
75 /**
76 * On supported hardware, debuggable builds make the HID bits read-only
77 * without locking them. This lets people manually modify HID bits while
78 * debugging, since they can use a debugging tool to first reset the HID
79 * bits back to read/write. However it will still catch xnu changes that
80 * accidentally write to HID bits after they've been made read-only.
81 */
82 SECURITY_READ_ONLY_LATE(bool) skip_spr_lockdown_glb = 0;
83
84 /*
85 * On some SoCs, PIO lockdown is applied in assembly in early boot by
86 * secondary CPUs.
87 * Since the cluster_pio_ro_ctl value is dynamic, it is stored here by the
88 * primary CPU so that it doesn't have to be computed each time by the
89 * startup code.
90 */
91 SECURITY_READ_ONLY_LATE(uint64_t) cluster_pio_ro_ctl_mask_glb = 0;
92
93 #if KPC
94 #include <kern/kpc.h>
95 #endif
96
97 #define MPIDR_CPU_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
98 #define MPIDR_CLUSTER_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
99
100 #if HAS_CLUSTER
101 static uint8_t cluster_initialized = 0;
102 #endif
103
104 MACHINE_TIMEOUT_DEV_WRITEABLE(LockTimeOut, "lock", 6e6 /* 0.25s */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
105 machine_timeout_t LockTimeOutUsec; // computed in ml_init_lock_timeout
106
107 MACHINE_TIMEOUT_DEV_WRITEABLE(TLockTimeOut, "ticket-lock", 3e6 /* 0.125s */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
108
109 MACHINE_TIMEOUT_DEV_WRITEABLE(MutexSpin, "mutex-spin", 240 /* 10us */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
110
111 uint64_t low_MutexSpin;
112 int64_t high_MutexSpin;
113
114
115
116 static uint64_t ml_wfe_hint_max_interval;
117 #define MAX_WFE_HINT_INTERVAL_US (500ULL)
118
119 /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
120 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
121
122 extern vm_offset_t segLOWEST;
123 extern vm_offset_t segLOWESTTEXT;
124 extern vm_offset_t segLASTB;
125 extern unsigned long segSizeLAST;
126
127 /* ARM64 specific bounds; used to test for presence in the kernelcache. */
128 extern vm_offset_t vm_kernelcache_base;
129 extern vm_offset_t vm_kernelcache_top;
130
131 /* Location of the physmap / physical aperture */
132 extern uint64_t physmap_base;
133
134 extern vm_offset_t arm_vm_kernelcache_phys_start;
135 extern vm_offset_t arm_vm_kernelcache_phys_end;
136
137 #if defined(HAS_IPI)
138 unsigned int gFastIPI = 1;
139 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
140 static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
141 kDeferredIPITimerDefault);
142 #endif /* defined(HAS_IPI) */
143
144 thread_t Idle_context(void);
145
146 SECURITY_READ_ONLY_LATE(bool) cpu_config_correct = true;
147
148 SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
149 SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
150 SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
151 .version = CPU_TOPOLOGY_VERSION,
152 .cpus = topology_cpu_array,
153 .clusters = topology_cluster_array,
154 };
155
156 _Atomic unsigned int cluster_type_num_active_cpus[MAX_CPU_TYPES];
157
158 /**
159 * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
160 * entries of an arbitrary data type. This is intended for use by specialized consumers
161 * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
162 * as follows:
163 * hypothetical_array[cluster_offsets[AFF1] + AFF0]
164 * Most consumers should instead use general-purpose facilities such as PERCPU or
165 * ml_get_cpu_number().
166 */
167 SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
168
169 SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
170
171 extern uint32_t lockdown_done;
172
173 /**
174 * Represents regions of virtual address space that should be reserved
175 * (pre-mapped) in each user address space.
176 */
177 static const struct vm_reserved_region vm_reserved_regions[] = {
178 {
179 .vmrr_name = "GPU Carveout",
180 .vmrr_addr = MACH_VM_MIN_GPU_CARVEOUT_ADDRESS,
181 .vmrr_size = (vm_map_size_t)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS - MACH_VM_MIN_GPU_CARVEOUT_ADDRESS)
182 },
183 /*
184 * Reserve the virtual memory space representing the commpage nesting region
185 * to prevent user processes from allocating memory within it. The actual
186 * page table entries for the commpage are inserted by vm_commpage_enter().
187 * This vm_map_enter() just prevents userspace from allocating/deallocating
188 * anything within the entire commpage nested region.
189 */
190 {
191 .vmrr_name = "commpage nesting",
192 .vmrr_addr = _COMM_PAGE64_NESTING_START,
193 .vmrr_size = _COMM_PAGE64_NESTING_SIZE
194 }
195 };
196
197 uint32_t get_arm_cpu_version(void);
198
199 #if defined(HAS_IPI)
200 static inline void
ml_cpu_signal_type(unsigned int cpu_mpidr,uint32_t type)201 ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
202 {
203 #if HAS_CLUSTER
204 uint64_t local_mpidr;
205 /* NOTE: this logic expects that we are called in a non-preemptible
206 * context, or at least one in which the calling thread is bound
207 * to a single CPU. Otherwise we may migrate between choosing which
208 * IPI mechanism to use and issuing the IPI. */
209 MRS(local_mpidr, "MPIDR_EL1");
210 if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
211 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
212 MSR("S3_5_C15_C0_0", x);
213 } else {
214 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
215 uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
216 MSR("S3_5_C15_C0_1", x);
217 }
218 #else
219 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
220 MSR("S3_5_C15_C0_1", x);
221 #endif
222 /* The recommended local/global IPI sequence is:
223 * DSB <sys> (This ensures visibility of e.g. older stores to the
224 * pending CPU signals bit vector in DRAM prior to IPI reception,
225 * and is present in cpu_signal_internal())
226 * MSR S3_5_C15_C0_1, Xt
227 * ISB
228 */
229 __builtin_arm_isb(ISB_SY);
230 }
231 #endif
232
233 #if !defined(HAS_IPI)
234 __dead2
235 #endif
236 void
ml_cpu_signal(unsigned int cpu_mpidr __unused)237 ml_cpu_signal(unsigned int cpu_mpidr __unused)
238 {
239 #if defined(HAS_IPI)
240 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
241 #else
242 panic("Platform does not support ACC Fast IPI");
243 #endif
244 }
245
246 #if !defined(HAS_IPI)
247 __dead2
248 #endif
249 void
ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)250 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
251 {
252 #if defined(HAS_IPI)
253 /* adjust IPI_CR timer countdown value for deferred IPI
254 * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
255 * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
256 *
257 * global register, should only require a single write to update all
258 * CPU cores: from Skye ACC user spec section 5.7.3.3
259 *
260 * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
261 * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
262 */
263 uint64_t abstime;
264
265 nanoseconds_to_absolutetime(nanosecs, &abstime);
266
267 abstime = MIN(abstime, 0xFFFF);
268
269 /* update deferred_ipi_timer_ns with the new clamped value */
270 absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
271
272 MSR("S3_5_C15_C3_1", abstime);
273 #else
274 (void)nanosecs;
275 panic("Platform does not support ACC Fast IPI");
276 #endif
277 }
278
279 uint64_t
ml_cpu_signal_deferred_get_timer()280 ml_cpu_signal_deferred_get_timer()
281 {
282 #if defined(HAS_IPI)
283 return deferred_ipi_timer_ns;
284 #else
285 return 0;
286 #endif
287 }
288
289 #if !defined(HAS_IPI)
290 __dead2
291 #endif
292 void
ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)293 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
294 {
295 #if defined(HAS_IPI)
296 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
297 #else
298 panic("Platform does not support ACC Fast IPI deferral");
299 #endif
300 }
301
302 #if !defined(HAS_IPI)
303 __dead2
304 #endif
305 void
ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)306 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
307 {
308 #if defined(HAS_IPI)
309 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
310 #else
311 panic("Platform does not support ACC Fast IPI retraction");
312 #endif
313 }
314
315 extern uint32_t idle_proximate_io_wfe_unmasked;
316
317 #define CPUPM_IDLE_WFE 0x5310300
318 static bool
wfe_process_recommendation(void)319 wfe_process_recommendation(void)
320 {
321 bool ipending = false;
322 if (__probable(idle_proximate_io_wfe_unmasked == 1)) {
323 /* Check for an active perf. controller generated
324 * WFE recommendation for this cluster.
325 */
326 cpu_data_t *cdp = getCpuDatap();
327 uint32_t cid = cdp->cpu_cluster_id;
328 uint64_t wfe_ttd = 0;
329 uint64_t wfe_deadline = 0;
330
331 if ((wfe_ttd = ml_cluster_wfe_timeout(cid)) != 0) {
332 wfe_deadline = mach_absolute_time() + wfe_ttd;
333 }
334
335 if (wfe_deadline != 0) {
336 /* Poll issuing event-bounded WFEs until an interrupt
337 * arrives or the WFE recommendation expires
338 */
339 #if DEVELOPMENT || DEBUG
340 uint64_t wc = cdp->wfe_count;
341 KDBG(CPUPM_IDLE_WFE | DBG_FUNC_START, ipending, wc, wfe_ttd, cdp->cpu_stat.irq_ex_cnt_wake);
342 #endif
343 /* Issue WFE until the recommendation expires,
344 * with IRQs unmasked.
345 */
346 ipending = wfe_to_deadline_or_interrupt(cid, wfe_deadline, cdp, true, true);
347 #if DEVELOPMENT || DEBUG
348 KDBG(CPUPM_IDLE_WFE | DBG_FUNC_END, ipending, cdp->wfe_count - wc, wfe_deadline, cdp->cpu_stat.irq_ex_cnt_wake);
349 #endif
350 }
351 }
352 return ipending;
353 }
354
355 void
machine_idle(void)356 machine_idle(void)
357 {
358 /* Interrupts are expected to be masked on entry or re-entry via
359 * Idle_load_context()
360 */
361 assert((__builtin_arm_rsr("DAIF") & (DAIF_IRQF | DAIF_FIQF)) == (DAIF_IRQF | DAIF_FIQF));
362 /* Check for, and act on, a WFE recommendation.
363 * Bypasses context spill/fill for a minor perf. increment.
364 * May unmask and restore IRQ+FIQ mask.
365 */
366 if (wfe_process_recommendation() == false) {
367 /* If WFE recommendation absent, or WFE deadline
368 * arrived with no interrupt pending/processed,
369 * fall back to WFI.
370 */
371 Idle_context();
372 }
373 __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
374 }
375
376 void
OSSynchronizeIO(void)377 OSSynchronizeIO(void)
378 {
379 __builtin_arm_dsb(DSB_SY);
380 }
381
382 uint64_t
get_aux_control(void)383 get_aux_control(void)
384 {
385 uint64_t value;
386
387 MRS(value, "ACTLR_EL1");
388 return value;
389 }
390
391 uint64_t
get_mmu_control(void)392 get_mmu_control(void)
393 {
394 uint64_t value;
395
396 MRS(value, "SCTLR_EL1");
397 return value;
398 }
399
400 uint64_t
get_tcr(void)401 get_tcr(void)
402 {
403 uint64_t value;
404
405 MRS(value, "TCR_EL1");
406 return value;
407 }
408
409 boolean_t
ml_get_interrupts_enabled(void)410 ml_get_interrupts_enabled(void)
411 {
412 uint64_t value;
413
414 MRS(value, "DAIF");
415 if (value & DAIF_IRQF) {
416 return FALSE;
417 }
418 return TRUE;
419 }
420
421 pmap_paddr_t
get_mmu_ttb(void)422 get_mmu_ttb(void)
423 {
424 pmap_paddr_t value;
425
426 MRS(value, "TTBR0_EL1");
427 return value;
428 }
429
430 uint32_t
get_arm_cpu_version(void)431 get_arm_cpu_version(void)
432 {
433 uint32_t value = machine_read_midr();
434
435 /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
436 return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
437 }
438
439 bool
ml_feature_supported(uint32_t feature_bit)440 ml_feature_supported(uint32_t feature_bit)
441 {
442 uint64_t aidr_el1_value = 0;
443
444 MRS(aidr_el1_value, "AIDR_EL1");
445
446
447 return aidr_el1_value & feature_bit;
448 }
449
450 /*
451 * user_cont_hwclock_allowed()
452 *
453 * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
454 * as a continuous time source (e.g. from mach_continuous_time)
455 */
456 boolean_t
user_cont_hwclock_allowed(void)457 user_cont_hwclock_allowed(void)
458 {
459 #if HAS_CONTINUOUS_HWCLOCK
460 return TRUE;
461 #else
462 return FALSE;
463 #endif
464 }
465
466 /*
467 * user_timebase_type()
468 *
469 * Indicates type of EL0 virtual timebase read (CNTVCT_EL0).
470 *
471 * USER_TIMEBASE_NONE: EL0 has no access to timebase register
472 * USER_TIMEBASE_SPEC: EL0 has access to speculative timebase reads (CNTVCT_EL0)
473 * USER_TIMEBASE_NOSPEC: EL0 has access to non speculative timebase reads (CNTVCTSS_EL0)
474 *
475 */
476
477 uint8_t
user_timebase_type(void)478 user_timebase_type(void)
479 {
480 #if HAS_ACNTVCT
481 return USER_TIMEBASE_NOSPEC_APPLE;
482 #elif __ARM_ARCH_8_6__
483 return USER_TIMEBASE_NOSPEC;
484 #else
485 return USER_TIMEBASE_SPEC;
486 #endif
487 }
488
489 void
machine_startup(__unused boot_args * args)490 machine_startup(__unused boot_args * args)
491 {
492 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
493 if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
494 gFastIPI = 1;
495 }
496 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
497
498
499 machine_conf();
500
501
502 /*
503 * Kick off the kernel bootstrap.
504 */
505 kernel_bootstrap();
506 /* NOTREACHED */
507 }
508
509 typedef void (*invalidate_fn_t)(void);
510
511 static SECURITY_READ_ONLY_LATE(invalidate_fn_t) invalidate_hmac_function = NULL;
512
513 void set_invalidate_hmac_function(invalidate_fn_t fn);
514
515 void
set_invalidate_hmac_function(invalidate_fn_t fn)516 set_invalidate_hmac_function(invalidate_fn_t fn)
517 {
518 if (NULL != invalidate_hmac_function) {
519 panic("Invalidate HMAC function already set");
520 }
521
522 invalidate_hmac_function = fn;
523 }
524
525 void
machine_lockdown(void)526 machine_lockdown(void)
527 {
528
529
530 arm_vm_prot_finalize(PE_state.bootArgs);
531
532 #if CONFIG_KERNEL_INTEGRITY
533 #if KERNEL_INTEGRITY_WT
534 /* Watchtower
535 *
536 * Notify the monitor about the completion of early kernel bootstrap.
537 * From this point forward it will enforce the integrity of kernel text,
538 * rodata and page tables.
539 */
540
541 #ifdef MONITOR
542 monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
543 #endif
544 #endif /* KERNEL_INTEGRITY_WT */
545
546 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
547 /* KTRR
548 *
549 * Lock physical KTRR region. KTRR region is read-only. Memory outside
550 * the region is not executable at EL1.
551 */
552
553 rorgn_lockdown();
554 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
555
556 #if XNU_MONITOR
557 pmap_lockdown_ppl();
558 #endif
559
560 #endif /* CONFIG_KERNEL_INTEGRITY */
561
562
563 if (NULL != invalidate_hmac_function) {
564 invalidate_hmac_function();
565 }
566
567 lockdown_done = 1;
568 }
569
570
571 char *
machine_boot_info(__unused char * buf,__unused vm_size_t size)572 machine_boot_info(
573 __unused char *buf,
574 __unused vm_size_t size)
575 {
576 return PE_boot_args();
577 }
578
579 void
slave_machine_init(__unused void * param)580 slave_machine_init(__unused void *param)
581 {
582 cpu_machine_init(); /* Initialize the processor */
583 clock_init(); /* Init the clock */
584 }
585
586 /*
587 * Routine: machine_processor_shutdown
588 * Function:
589 */
590 thread_t
machine_processor_shutdown(__unused thread_t thread,void (* doshutdown)(processor_t),processor_t processor)591 machine_processor_shutdown(
592 __unused thread_t thread,
593 void (*doshutdown)(processor_t),
594 processor_t processor)
595 {
596 return Shutdown_context(doshutdown, processor);
597 }
598
599 /*
600 * Routine: ml_init_lock_timeout
601 * Function:
602 */
603 static void __startup_func
ml_init_lock_timeout(void)604 ml_init_lock_timeout(void)
605 {
606 /*
607 * This function is called after STARTUP_SUB_TIMEOUTS
608 * initialization, so using the "legacy" boot-args here overrides
609 * the ml-timeout-... configuration. (Given that these boot-args
610 * here are usually explicitly specified, this makes sense by
611 * overriding ml-timeout-..., which may come from the device tree.
612 */
613
614 uint64_t lto_timeout_ns;
615 uint64_t lto_abstime;
616 uint32_t slto;
617
618 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
619 lto_timeout_ns = slto * NSEC_PER_USEC;
620 nanoseconds_to_absolutetime(lto_timeout_ns, <o_abstime);
621 os_atomic_store(&LockTimeOut, lto_abstime, relaxed);
622 } else {
623 lto_abstime = os_atomic_load(&LockTimeOut, relaxed);
624 absolutetime_to_nanoseconds(lto_abstime, <o_timeout_ns);
625 }
626
627 os_atomic_store(&LockTimeOutUsec, lto_timeout_ns / NSEC_PER_USEC, relaxed);
628
629 if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
630 nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, <o_abstime);
631 os_atomic_store(&TLockTimeOut, lto_abstime, relaxed);
632 } else if (lto_abstime != 0) {
633 os_atomic_store(&TLockTimeOut, lto_abstime >> 1, relaxed);
634 } // else take default from MACHINE_TIMEOUT.
635
636 uint64_t mtxspin;
637 uint64_t mtx_abstime;
638 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
639 if (mtxspin > USEC_PER_SEC >> 4) {
640 mtxspin = USEC_PER_SEC >> 4;
641 }
642 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &mtx_abstime);
643 os_atomic_store(&MutexSpin, mtx_abstime, relaxed);
644 } else {
645 mtx_abstime = os_atomic_load(&MutexSpin, relaxed);
646 }
647
648 low_MutexSpin = os_atomic_load(&MutexSpin, relaxed);
649 /*
650 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
651 * real_ncpus is not set at this time
652 *
653 * NOTE: active spinning is disabled in arm. It can be activated
654 * by setting high_MutexSpin through the sysctl.
655 */
656 high_MutexSpin = low_MutexSpin;
657
658 uint64_t maxwfeus = MAX_WFE_HINT_INTERVAL_US;
659 PE_parse_boot_argn("max_wfe_us", &maxwfeus, sizeof(maxwfeus));
660 nanoseconds_to_absolutetime(maxwfeus * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
661 }
662 STARTUP(TIMEOUTS, STARTUP_RANK_MIDDLE, ml_init_lock_timeout);
663
664
665 /*
666 * This is called when all of the ml_processor_info_t structures have been
667 * initialized and all the processors have been started through processor_start().
668 *
669 * Required by the scheduler subsystem.
670 */
671 void
ml_cpu_init_completed(void)672 ml_cpu_init_completed(void)
673 {
674 if (SCHED(cpu_init_completed) != NULL) {
675 SCHED(cpu_init_completed)();
676 }
677 }
678
679 /*
680 * These are called from the machine-independent routine cpu_up()
681 * to perform machine-dependent info updates.
682 *
683 * The update to CPU counts needs to be separate from other actions
684 * because we don't update the counts when CLPC causes temporary
685 * cluster powerdown events, as these must be transparent to the user.
686 */
687 void
ml_cpu_up(void)688 ml_cpu_up(void)
689 {
690 }
691
692 void
ml_cpu_up_update_counts(int cpu_id)693 ml_cpu_up_update_counts(int cpu_id)
694 {
695 ml_topology_cpu_t *cpu = &ml_get_topology_info()->cpus[cpu_id];
696
697 os_atomic_inc(&cluster_type_num_active_cpus[cpu->cluster_type], relaxed);
698
699 os_atomic_inc(&machine_info.physical_cpu, relaxed);
700 os_atomic_inc(&machine_info.logical_cpu, relaxed);
701 }
702
703 /*
704 * These are called from the machine-independent routine cpu_down()
705 * to perform machine-dependent info updates.
706 *
707 * The update to CPU counts needs to be separate from other actions
708 * because we don't update the counts when CLPC causes temporary
709 * cluster powerdown events, as these must be transparent to the user.
710 */
711 void
ml_cpu_down(void)712 ml_cpu_down(void)
713 {
714 /*
715 * If we want to deal with outstanding IPIs, we need to
716 * do relatively early in the processor_doshutdown path,
717 * as we pend decrementer interrupts using the IPI
718 * mechanism if we cannot immediately service them (if
719 * IRQ is masked). Do so now.
720 *
721 * We aren't on the interrupt stack here; would it make
722 * more sense to disable signaling and then enable
723 * interrupts? It might be a bit cleaner.
724 */
725 cpu_data_t *cpu_data_ptr = getCpuDatap();
726 cpu_data_ptr->cpu_running = FALSE;
727
728 if (cpu_data_ptr != &BootCpuData) {
729 /*
730 * Move all of this cpu's timers to the master/boot cpu,
731 * and poke it in case there's a sooner deadline for it to schedule.
732 */
733 timer_queue_shutdown(&cpu_data_ptr->rtclock_timer.queue);
734 kern_return_t rv = cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, &ml_cpu_down);
735 if (rv != KERN_SUCCESS) {
736 panic("ml_cpu_down: IPI failure %d", rv);
737 }
738 }
739
740 cpu_signal_handler_internal(TRUE);
741 }
742 void
ml_cpu_down_update_counts(int cpu_id)743 ml_cpu_down_update_counts(int cpu_id)
744 {
745 ml_topology_cpu_t *cpu = &ml_get_topology_info()->cpus[cpu_id];
746
747 os_atomic_dec(&cluster_type_num_active_cpus[cpu->cluster_type], relaxed);
748
749 os_atomic_dec(&machine_info.physical_cpu, relaxed);
750 os_atomic_dec(&machine_info.logical_cpu, relaxed);
751 }
752
753
754 unsigned int
ml_get_machine_mem(void)755 ml_get_machine_mem(void)
756 {
757 return machine_info.memory_size;
758 }
759
760 __attribute__((noreturn))
761 void
halt_all_cpus(boolean_t reboot)762 halt_all_cpus(boolean_t reboot)
763 {
764 if (reboot) {
765 printf("MACH Reboot\n");
766 PEHaltRestart(kPERestartCPU);
767 } else {
768 printf("CPU halted\n");
769 PEHaltRestart(kPEHaltCPU);
770 }
771 while (1) {
772 ;
773 }
774 }
775
776 __attribute__((noreturn))
777 void
halt_cpu(void)778 halt_cpu(void)
779 {
780 halt_all_cpus(FALSE);
781 }
782
783 /*
784 * Routine: machine_signal_idle
785 * Function:
786 */
787 void
machine_signal_idle(processor_t processor)788 machine_signal_idle(
789 processor_t processor)
790 {
791 cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
792 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
793 }
794
795 void
machine_signal_idle_deferred(processor_t processor)796 machine_signal_idle_deferred(
797 processor_t processor)
798 {
799 cpu_signal_deferred(processor_to_cpu_datap(processor));
800 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
801 }
802
803 void
machine_signal_idle_cancel(processor_t processor)804 machine_signal_idle_cancel(
805 processor_t processor)
806 {
807 cpu_signal_cancel(processor_to_cpu_datap(processor));
808 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
809 }
810
811 /*
812 * Routine: ml_install_interrupt_handler
813 * Function: Initialize Interrupt Handler
814 */
815 void
ml_install_interrupt_handler(void * nub,int source,void * target,IOInterruptHandler handler,void * refCon)816 ml_install_interrupt_handler(
817 void *nub,
818 int source,
819 void *target,
820 IOInterruptHandler handler,
821 void *refCon)
822 {
823 cpu_data_t *cpu_data_ptr;
824 boolean_t current_state;
825
826 current_state = ml_set_interrupts_enabled(FALSE);
827 cpu_data_ptr = getCpuDatap();
828
829 cpu_data_ptr->interrupt_nub = nub;
830 cpu_data_ptr->interrupt_source = source;
831 cpu_data_ptr->interrupt_target = target;
832 cpu_data_ptr->interrupt_handler = handler;
833 cpu_data_ptr->interrupt_refCon = refCon;
834
835 (void) ml_set_interrupts_enabled(current_state);
836 }
837
838 /*
839 * Routine: ml_init_interrupt
840 * Function: Initialize Interrupts
841 */
842 void
ml_init_interrupt(void)843 ml_init_interrupt(void)
844 {
845 #if defined(HAS_IPI)
846 /*
847 * ml_init_interrupt will get called once for each CPU, but this is redundant
848 * because there is only one global copy of the register for skye. do it only
849 * on the bootstrap cpu
850 */
851 if (getCpuDatap()->cluster_master) {
852 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
853 }
854 #endif
855 }
856
857 /*
858 * Routine: ml_init_timebase
859 * Function: register and setup Timebase, Decremeter services
860 */
861 void
ml_init_timebase(void * args,tbd_ops_t tbd_funcs,vm_offset_t int_address,vm_offset_t int_value __unused)862 ml_init_timebase(
863 void *args,
864 tbd_ops_t tbd_funcs,
865 vm_offset_t int_address,
866 vm_offset_t int_value __unused)
867 {
868 cpu_data_t *cpu_data_ptr;
869
870 cpu_data_ptr = (cpu_data_t *)args;
871
872 if ((cpu_data_ptr == &BootCpuData)
873 && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
874 rtclock_timebase_func = *tbd_funcs;
875 rtclock_timebase_addr = int_address;
876 }
877 }
878
879 #define ML_READPROP_MANDATORY UINT64_MAX
880
881 static uint64_t
ml_readprop(const DTEntry entry,const char * propertyName,uint64_t default_value)882 ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
883 {
884 void const *prop;
885 unsigned int propSize;
886
887 if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
888 if (propSize == sizeof(uint8_t)) {
889 return *((uint8_t const *)prop);
890 } else if (propSize == sizeof(uint16_t)) {
891 return *((uint16_t const *)prop);
892 } else if (propSize == sizeof(uint32_t)) {
893 return *((uint32_t const *)prop);
894 } else if (propSize == sizeof(uint64_t)) {
895 return *((uint64_t const *)prop);
896 } else {
897 panic("CPU property '%s' has bad size %u", propertyName, propSize);
898 }
899 } else {
900 if (default_value == ML_READPROP_MANDATORY) {
901 panic("Missing mandatory property '%s'", propertyName);
902 }
903 return default_value;
904 }
905 }
906
907 static boolean_t
ml_read_reg_range(const DTEntry entry,const char * propertyName,uint64_t * pa_ptr,uint64_t * len_ptr)908 ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
909 {
910 uint64_t const *prop;
911 unsigned int propSize;
912
913 if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
914 return FALSE;
915 }
916
917 if (propSize != sizeof(uint64_t) * 2) {
918 panic("Wrong property size for %s", propertyName);
919 }
920
921 *pa_ptr = prop[0];
922 *len_ptr = prop[1];
923 return TRUE;
924 }
925
926 static boolean_t
ml_is_boot_cpu(const DTEntry entry)927 ml_is_boot_cpu(const DTEntry entry)
928 {
929 void const *prop;
930 unsigned int propSize;
931
932 if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
933 panic("unable to retrieve state for cpu");
934 }
935
936 if (strncmp((char const *)prop, "running", propSize) == 0) {
937 return TRUE;
938 } else {
939 return FALSE;
940 }
941 }
942
943 static void
ml_read_chip_revision(unsigned int * rev __unused)944 ml_read_chip_revision(unsigned int *rev __unused)
945 {
946 // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
947 #ifdef APPLE_ARM64_ARCH_FAMILY
948 DTEntry entryP;
949
950 if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
951 *rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
952 } else {
953 *rev = CPU_VERSION_UNKNOWN;
954 }
955 #endif
956 }
957
958 void
ml_parse_cpu_topology(void)959 ml_parse_cpu_topology(void)
960 {
961 DTEntry entry, child __unused;
962 OpaqueDTEntryIterator iter;
963 uint32_t cpu_boot_arg = MAX_CPUS;
964 uint64_t cpumask_boot_arg = ULLONG_MAX;
965 int err;
966
967 int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
968 int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
969 const boolean_t cpus_boot_arg_present = PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
970 const boolean_t cpumask_boot_arg_present = PE_parse_boot_argn("cpumask", &cpumask_boot_arg, sizeof(cpumask_boot_arg));
971
972 // The cpus=N and cpumask=N boot args cannot be used simultaneously. Flag this
973 // so that we trigger a panic later in the boot process, once serial is enabled.
974 if (cpus_boot_arg_present && cpumask_boot_arg_present) {
975 cpu_config_correct = false;
976 }
977
978 err = SecureDTLookupEntry(NULL, "/cpus", &entry);
979 assert(err == kSuccess);
980
981 err = SecureDTInitEntryIterator(entry, &iter);
982 assert(err == kSuccess);
983
984 for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
985 cluster_offsets[i] = -1;
986 cluster_phys_to_logical[i] = -1;
987 cluster_max_cpu_phys_id[i] = 0;
988 }
989
990 while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
991 boolean_t is_boot_cpu = ml_is_boot_cpu(child);
992 boolean_t cpu_enabled = cpumask_boot_arg & 1;
993 cpumask_boot_arg >>= 1;
994
995 // Boot CPU disabled in cpumask. Flag this so that we trigger a panic
996 // later in the boot process, once serial is enabled.
997 if (is_boot_cpu && !cpu_enabled) {
998 cpu_config_correct = false;
999 }
1000
1001 // Ignore this CPU if it has been disabled by the cpumask= boot-arg.
1002 if (!is_boot_cpu && !cpu_enabled) {
1003 continue;
1004 }
1005
1006 // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
1007 // been added to the topology struct yet, and we only have one slot left, then skip
1008 // every other non-boot CPU in order to leave room for the boot CPU.
1009 //
1010 // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
1011 // array will list CPU0, CPU1, and CPU4. CPU2-CPU3 and CPU5-CPUn will be omitted.
1012 if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
1013 continue;
1014 }
1015 if (topology_info.num_cpus >= cpu_boot_arg) {
1016 break;
1017 }
1018
1019 ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
1020
1021 cpu->cpu_id = topology_info.num_cpus++;
1022 assert(cpu->cpu_id < MAX_CPUS);
1023 topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
1024
1025 cpu->die_id = 0;
1026 topology_info.max_die_id = 0;
1027
1028 cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
1029
1030 cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
1031 cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
1032 cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
1033 cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
1034 cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
1035
1036 ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
1037 ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
1038 ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
1039 cpu->cluster_type = CLUSTER_TYPE_SMP;
1040
1041 int cluster_type = (int)ml_readprop(child, "cluster-type", 0);
1042 if (cluster_type == 'E') {
1043 cpu->cluster_type = CLUSTER_TYPE_E;
1044 } else if (cluster_type == 'P') {
1045 cpu->cluster_type = CLUSTER_TYPE_P;
1046 }
1047
1048 topology_info.cluster_type_num_cpus[cpu->cluster_type]++;
1049
1050 /*
1051 * Since we want to keep a linear cluster ID space, we cannot just rely
1052 * on the value provided by EDT. Instead, use the MPIDR value to see if we have
1053 * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
1054 */
1055 #if HAS_CLUSTER
1056 uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
1057 #else
1058 uint32_t phys_cluster_id = (cpu->cluster_type == CLUSTER_TYPE_P);
1059 #endif
1060 assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
1061 cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
1062 topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
1063
1064 assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
1065
1066 ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
1067 if (cluster->num_cpus == 0) {
1068 assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
1069
1070 topology_info.num_clusters++;
1071 topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
1072 topology_info.cluster_types |= (1 << cpu->cluster_type);
1073
1074 cluster->cluster_id = cpu->cluster_id;
1075 cluster->cluster_type = cpu->cluster_type;
1076 cluster->first_cpu_id = cpu->cpu_id;
1077 assert(cluster_phys_to_logical[phys_cluster_id] == -1);
1078 cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
1079
1080 topology_info.cluster_type_num_clusters[cluster->cluster_type]++;
1081
1082 // Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
1083 // If we wind up with a bunch of these, we might want to create separate per-cluster
1084 // EDT nodes and have the CPU nodes reference them through a phandle.
1085 ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
1086 ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
1087 }
1088
1089 #if HAS_CLUSTER
1090 if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
1091 cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
1092 }
1093 #endif
1094
1095 cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
1096 cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
1097
1098 cluster->num_cpus++;
1099 cluster->cpu_mask |= 1ULL << cpu->cpu_id;
1100
1101 if (is_boot_cpu) {
1102 assert(topology_info.boot_cpu == NULL);
1103 topology_info.boot_cpu = cpu;
1104 topology_info.boot_cluster = cluster;
1105 }
1106
1107 }
1108
1109 #if HAS_CLUSTER
1110 /*
1111 * Build the cluster offset array, ensuring that the region reserved
1112 * for each physical cluster contains enough entries to be indexed
1113 * by the maximum physical CPU ID (AFF0) within the cluster.
1114 */
1115 unsigned int cur_cluster_offset = 0;
1116 for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
1117 if (cluster_phys_to_logical[i] != -1) {
1118 cluster_offsets[i] = cur_cluster_offset;
1119 cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
1120 }
1121 }
1122 assert(cur_cluster_offset <= MAX_CPUS);
1123 #else
1124 /*
1125 * For H10, there are really 2 physical clusters, but they are not separated
1126 * into distinct ACCs. AFF1 therefore always reports 0, and AFF0 numbering
1127 * is linear across both clusters. For the purpose of MPIDR_EL1-based indexing,
1128 * treat H10 and earlier devices as though they contain a single cluster.
1129 */
1130 cluster_offsets[0] = 0;
1131 #endif
1132 assert(topology_info.boot_cpu != NULL);
1133 ml_read_chip_revision(&topology_info.chip_revision);
1134
1135 /*
1136 * Set TPIDR_EL0 to indicate the correct cpu number & cluster id,
1137 * as we may not be booting from cpu 0. Userspace will consume
1138 * the current CPU number through this register. For non-boot
1139 * cores, this is done in start.s (start_cpu) using the per-cpu
1140 * data object.
1141 */
1142 ml_topology_cpu_t *boot_cpu = topology_info.boot_cpu;
1143 uint64_t tpidr_el0 = ((boot_cpu->cpu_id << MACHDEP_TPIDR_CPUNUM_SHIFT) & MACHDEP_TPIDR_CPUNUM_MASK) | \
1144 ((boot_cpu->cluster_id << MACHDEP_TPIDR_CLUSTERID_SHIFT) & MACHDEP_TPIDR_CLUSTERID_MASK);
1145 assert(((tpidr_el0 & MACHDEP_TPIDR_CPUNUM_MASK) >> MACHDEP_TPIDR_CPUNUM_SHIFT) == boot_cpu->cpu_id);
1146 assert(((tpidr_el0 & MACHDEP_TPIDR_CLUSTERID_MASK) >> MACHDEP_TPIDR_CLUSTERID_SHIFT) == boot_cpu->cluster_id);
1147 __builtin_arm_wsr64("TPIDR_EL0", tpidr_el0);
1148
1149 __builtin_arm_wsr64("TPIDRRO_EL0", 0);
1150 }
1151
1152 const ml_topology_info_t *
ml_get_topology_info(void)1153 ml_get_topology_info(void)
1154 {
1155 return &topology_info;
1156 }
1157
1158 void
ml_map_cpu_pio(void)1159 ml_map_cpu_pio(void)
1160 {
1161 unsigned int i;
1162
1163 for (i = 0; i < topology_info.num_cpus; i++) {
1164 ml_topology_cpu_t *cpu = &topology_info.cpus[i];
1165 if (cpu->cpu_IMPL_pa) {
1166 cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
1167 cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
1168 }
1169 if (cpu->cpu_UTTDBG_pa) {
1170 cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
1171 }
1172 }
1173
1174 for (i = 0; i < topology_info.num_clusters; i++) {
1175 ml_topology_cluster_t *cluster = &topology_info.clusters[i];
1176 if (cluster->acc_IMPL_pa) {
1177 cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
1178 }
1179 if (cluster->cpm_IMPL_pa) {
1180 cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
1181 }
1182 }
1183 }
1184
1185 unsigned int
ml_get_cpu_count(void)1186 ml_get_cpu_count(void)
1187 {
1188 return topology_info.num_cpus;
1189 }
1190
1191 unsigned int
ml_get_cluster_count(void)1192 ml_get_cluster_count(void)
1193 {
1194 return topology_info.num_clusters;
1195 }
1196
1197 int
ml_get_boot_cpu_number(void)1198 ml_get_boot_cpu_number(void)
1199 {
1200 return topology_info.boot_cpu->cpu_id;
1201 }
1202
1203 cluster_type_t
ml_get_boot_cluster_type(void)1204 ml_get_boot_cluster_type(void)
1205 {
1206 return topology_info.boot_cluster->cluster_type;
1207 }
1208
1209 int
ml_get_cpu_number(uint32_t phys_id)1210 ml_get_cpu_number(uint32_t phys_id)
1211 {
1212 phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
1213
1214 for (unsigned i = 0; i < topology_info.num_cpus; i++) {
1215 if (topology_info.cpus[i].phys_id == phys_id) {
1216 return i;
1217 }
1218 }
1219
1220 return -1;
1221 }
1222
1223 int
ml_get_cluster_number(uint32_t phys_id)1224 ml_get_cluster_number(uint32_t phys_id)
1225 {
1226 int cpu_id = ml_get_cpu_number(phys_id);
1227 if (cpu_id < 0) {
1228 return -1;
1229 }
1230
1231 ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
1232
1233 return cpu->cluster_id;
1234 }
1235
1236 unsigned int
ml_get_cpu_number_local(void)1237 ml_get_cpu_number_local(void)
1238 {
1239 uint64_t mpidr_el1_value = 0;
1240 unsigned cpu_id;
1241
1242 /* We identify the CPU based on the constant bits of MPIDR_EL1. */
1243 MRS(mpidr_el1_value, "MPIDR_EL1");
1244 cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
1245
1246 assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
1247
1248 return cpu_id;
1249 }
1250
1251 int
ml_get_cluster_number_local()1252 ml_get_cluster_number_local()
1253 {
1254 uint64_t mpidr_el1_value = 0;
1255 unsigned cluster_id;
1256
1257 /* We identify the cluster based on the constant bits of MPIDR_EL1. */
1258 MRS(mpidr_el1_value, "MPIDR_EL1");
1259 cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
1260
1261 assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
1262
1263 return cluster_id;
1264 }
1265
1266 int
ml_get_max_cpu_number(void)1267 ml_get_max_cpu_number(void)
1268 {
1269 return topology_info.max_cpu_id;
1270 }
1271
1272 int
ml_get_max_cluster_number(void)1273 ml_get_max_cluster_number(void)
1274 {
1275 return topology_info.max_cluster_id;
1276 }
1277
1278 unsigned int
ml_get_first_cpu_id(unsigned int cluster_id)1279 ml_get_first_cpu_id(unsigned int cluster_id)
1280 {
1281 return topology_info.clusters[cluster_id].first_cpu_id;
1282 }
1283
1284 static_assert(MAX_CPUS <= 256, "MAX_CPUS must fit in _COMM_PAGE_CPU_TO_CLUSTER; Increase table size if needed");
1285
1286 void
ml_map_cpus_to_clusters(uint8_t * table)1287 ml_map_cpus_to_clusters(uint8_t *table)
1288 {
1289 for (uint16_t cpu_id = 0; cpu_id < topology_info.num_cpus; cpu_id++) {
1290 *(table + cpu_id) = (uint8_t)(topology_info.cpus[cpu_id].cluster_id);
1291 }
1292 }
1293
1294 /*
1295 * Return the die id of a cluster.
1296 */
1297 unsigned int
ml_get_die_id(unsigned int cluster_id)1298 ml_get_die_id(unsigned int cluster_id)
1299 {
1300 /*
1301 * The current implementation gets the die_id from the
1302 * first CPU of the cluster.
1303 * rdar://80917654 (Add the die_id field to the cluster topology info)
1304 */
1305 unsigned int first_cpu = ml_get_first_cpu_id(cluster_id);
1306 return topology_info.cpus[first_cpu].die_id;
1307 }
1308
1309 /*
1310 * Return the index of a cluster in its die.
1311 */
1312 unsigned int
ml_get_die_cluster_id(unsigned int cluster_id)1313 ml_get_die_cluster_id(unsigned int cluster_id)
1314 {
1315 /*
1316 * The current implementation gets the die_id from the
1317 * first CPU of the cluster.
1318 * rdar://80917654 (Add the die_id field to the cluster topology info)
1319 */
1320 unsigned int first_cpu = ml_get_first_cpu_id(cluster_id);
1321 return topology_info.cpus[first_cpu].die_cluster_id;
1322 }
1323
1324 /*
1325 * Return the highest die id of the system.
1326 */
1327 unsigned int
ml_get_max_die_id(void)1328 ml_get_max_die_id(void)
1329 {
1330 return topology_info.max_die_id;
1331 }
1332
1333 void
ml_lockdown_init()1334 ml_lockdown_init()
1335 {
1336 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1337 rorgn_stash_range();
1338 #endif
1339 }
1340
1341 kern_return_t
ml_lockdown_handler_register(lockdown_handler_t f,void * this)1342 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1343 {
1344 if (!f) {
1345 return KERN_FAILURE;
1346 }
1347
1348 assert(lockdown_done);
1349 f(this); // XXX: f this whole function
1350
1351 return KERN_SUCCESS;
1352 }
1353
1354 static mcache_flush_function mcache_flush_func;
1355 static void* mcache_flush_service;
1356 kern_return_t
ml_mcache_flush_callback_register(mcache_flush_function func,void * service)1357 ml_mcache_flush_callback_register(mcache_flush_function func, void *service)
1358 {
1359 mcache_flush_service = service;
1360 mcache_flush_func = func;
1361
1362 return KERN_SUCCESS;
1363 }
1364
1365 kern_return_t
ml_mcache_flush(void)1366 ml_mcache_flush(void)
1367 {
1368 if (!mcache_flush_func) {
1369 panic("Cannot flush M$ with no flush callback registered");
1370
1371 return KERN_FAILURE;
1372 } else {
1373 return mcache_flush_func(mcache_flush_service);
1374 }
1375 }
1376
1377
1378 extern lck_mtx_t pset_create_lock;
1379
1380 kern_return_t
ml_processor_register(ml_processor_info_t * in_processor_info,processor_t * processor_out,ipi_handler_t * ipi_handler_out,perfmon_interrupt_handler_func * pmi_handler_out)1381 ml_processor_register(ml_processor_info_t *in_processor_info,
1382 processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1383 perfmon_interrupt_handler_func *pmi_handler_out)
1384 {
1385 cpu_data_t *this_cpu_datap;
1386 processor_set_t pset;
1387 boolean_t is_boot_cpu;
1388 static unsigned int reg_cpu_count = 0;
1389
1390 if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
1391 return KERN_FAILURE;
1392 }
1393
1394 if ((unsigned)OSIncrementAtomic((SInt32*)®_cpu_count) >= topology_info.num_cpus) {
1395 return KERN_FAILURE;
1396 }
1397
1398 if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1399 is_boot_cpu = FALSE;
1400 this_cpu_datap = cpu_data_alloc(FALSE);
1401 cpu_data_init(this_cpu_datap);
1402 } else {
1403 this_cpu_datap = &BootCpuData;
1404 is_boot_cpu = TRUE;
1405 }
1406
1407 assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
1408
1409 this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1410
1411 if (!is_boot_cpu) {
1412 this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
1413
1414 if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
1415 goto processor_register_error;
1416 }
1417 assert((this_cpu_datap->cpu_number & MACHDEP_TPIDR_CPUNUM_MASK) == this_cpu_datap->cpu_number);
1418 }
1419
1420 this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
1421 this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
1422 nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1423 this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1424
1425 this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
1426 this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1427
1428 this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
1429 this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1430 this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1431 this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1432
1433 this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1434 this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1435 this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1436 this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1437 this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1438 this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1439
1440 /*
1441 * Encode cpu_id, cluster_id to be stored in TPIDR_EL0 (see
1442 * cswitch.s:set_thread_registers, start.s:start_cpu) for consumption
1443 * by userspace.
1444 */
1445 this_cpu_datap->cpu_tpidr_el0 = ((this_cpu_datap->cpu_number << MACHDEP_TPIDR_CPUNUM_SHIFT) & MACHDEP_TPIDR_CPUNUM_MASK) | \
1446 ((this_cpu_datap->cpu_cluster_id << MACHDEP_TPIDR_CLUSTERID_SHIFT) & MACHDEP_TPIDR_CLUSTERID_MASK);
1447 assert(((this_cpu_datap->cpu_tpidr_el0 & MACHDEP_TPIDR_CPUNUM_MASK) >> MACHDEP_TPIDR_CPUNUM_SHIFT) == this_cpu_datap->cpu_number);
1448 assert(((this_cpu_datap->cpu_tpidr_el0 & MACHDEP_TPIDR_CLUSTERID_MASK) >> MACHDEP_TPIDR_CLUSTERID_SHIFT) == this_cpu_datap->cpu_cluster_id);
1449
1450 #if HAS_CLUSTER
1451 this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1452 #else /* HAS_CLUSTER */
1453 this_cpu_datap->cluster_master = is_boot_cpu;
1454 #endif /* HAS_CLUSTER */
1455 lck_mtx_lock(&pset_create_lock);
1456 pset = pset_find(in_processor_info->cluster_id, NULL);
1457 kprintf("[%d]%s>pset_find(cluster_id=%d) returned pset %d\n", current_processor()->cpu_id, __FUNCTION__, in_processor_info->cluster_id, pset ? pset->pset_id : -1);
1458 if (pset == NULL) {
1459 #if __AMP__
1460 pset_cluster_type_t pset_cluster_type = this_cpu_datap->cpu_cluster_type == CLUSTER_TYPE_E ? PSET_AMP_E : PSET_AMP_P;
1461 pset = pset_create(ml_get_boot_cluster_type() == this_cpu_datap->cpu_cluster_type ? &pset_node0 : &pset_node1, pset_cluster_type, this_cpu_datap->cpu_cluster_id, this_cpu_datap->cpu_cluster_id);
1462 assert(pset != PROCESSOR_SET_NULL);
1463 kprintf("[%d]%s>pset_create(cluster_id=%d) returned pset %d\n", current_processor()->cpu_id, __FUNCTION__, this_cpu_datap->cpu_cluster_id, pset->pset_id);
1464 #else /* __AMP__ */
1465 pset_cluster_type_t pset_cluster_type = PSET_SMP;
1466 pset = pset_create(&pset_node0, pset_cluster_type, this_cpu_datap->cpu_cluster_id, this_cpu_datap->cpu_cluster_id);
1467 assert(pset != PROCESSOR_SET_NULL);
1468 #endif /* __AMP__ */
1469 }
1470 kprintf("[%d]%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", current_processor()->cpu_id, __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1471 lck_mtx_unlock(&pset_create_lock);
1472
1473 processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
1474 if (!is_boot_cpu) {
1475 processor_init(processor, this_cpu_datap->cpu_number, pset);
1476
1477 if (this_cpu_datap->cpu_l2_access_penalty) {
1478 /*
1479 * Cores that have a non-zero L2 access penalty compared
1480 * to the boot processor should be de-prioritized by the
1481 * scheduler, so that threads use the cores with better L2
1482 * preferentially.
1483 */
1484 processor_set_primary(processor, master_processor);
1485 }
1486 }
1487
1488 *processor_out = processor;
1489 *ipi_handler_out = cpu_signal_handler;
1490 #if CPMU_AIC_PMI && MONOTONIC
1491 *pmi_handler_out = mt_cpmu_aic_pmi;
1492 #else
1493 *pmi_handler_out = NULL;
1494 #endif /* CPMU_AIC_PMI && MONOTONIC */
1495 if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
1496 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1497 }
1498
1499 #if KPC
1500 if (kpc_register_cpu(this_cpu_datap) != TRUE) {
1501 goto processor_register_error;
1502 }
1503 #endif /* KPC */
1504
1505
1506 if (!is_boot_cpu) {
1507 random_cpu_init(this_cpu_datap->cpu_number);
1508 // now let next CPU register itself
1509 OSIncrementAtomic((SInt32*)&real_ncpus);
1510 }
1511
1512 return KERN_SUCCESS;
1513
1514 processor_register_error:
1515 #if KPC
1516 kpc_unregister_cpu(this_cpu_datap);
1517 #endif /* KPC */
1518 if (!is_boot_cpu) {
1519 cpu_data_free(this_cpu_datap);
1520 }
1521
1522 return KERN_FAILURE;
1523 }
1524
1525 void
ml_init_arm_debug_interface(void * in_cpu_datap,vm_offset_t virt_address)1526 ml_init_arm_debug_interface(
1527 void * in_cpu_datap,
1528 vm_offset_t virt_address)
1529 {
1530 ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1531 do_debugid();
1532 }
1533
1534 /*
1535 * Routine: init_ast_check
1536 * Function:
1537 */
1538 void
init_ast_check(__unused processor_t processor)1539 init_ast_check(
1540 __unused processor_t processor)
1541 {
1542 }
1543
1544 /*
1545 * Routine: cause_ast_check
1546 * Function:
1547 */
1548 void
cause_ast_check(processor_t processor)1549 cause_ast_check(
1550 processor_t processor)
1551 {
1552 if (current_processor() != processor) {
1553 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1554 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1555 }
1556 }
1557
1558 extern uint32_t cpu_idle_count;
1559
1560 void
ml_get_power_state(boolean_t * icp,boolean_t * pidlep)1561 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1562 {
1563 *icp = ml_at_interrupt_context();
1564 *pidlep = (cpu_idle_count == real_ncpus);
1565 }
1566
1567 /*
1568 * Routine: ml_cause_interrupt
1569 * Function: Generate a fake interrupt
1570 */
1571 void
ml_cause_interrupt(void)1572 ml_cause_interrupt(void)
1573 {
1574 return; /* BS_XXX */
1575 }
1576
1577 /* Map memory map IO space */
1578 vm_offset_t
ml_io_map(vm_offset_t phys_addr,vm_size_t size)1579 ml_io_map(
1580 vm_offset_t phys_addr,
1581 vm_size_t size)
1582 {
1583 return io_map(phys_addr, size, VM_WIMG_IO, VM_PROT_DEFAULT, false);
1584 }
1585
1586 /* Map memory map IO space (with protections specified) */
1587 vm_offset_t
ml_io_map_with_prot(vm_offset_t phys_addr,vm_size_t size,vm_prot_t prot)1588 ml_io_map_with_prot(
1589 vm_offset_t phys_addr,
1590 vm_size_t size,
1591 vm_prot_t prot)
1592 {
1593 return io_map(phys_addr, size, VM_WIMG_IO, prot, false);
1594 }
1595
1596 vm_offset_t
ml_io_map_unmappable(vm_offset_t phys_addr,vm_size_t size,unsigned int flags)1597 ml_io_map_unmappable(
1598 vm_offset_t phys_addr,
1599 vm_size_t size,
1600 unsigned int flags)
1601 {
1602 return io_map(phys_addr, size, flags, VM_PROT_DEFAULT, true);
1603 }
1604
1605 vm_offset_t
ml_io_map_wcomb(vm_offset_t phys_addr,vm_size_t size)1606 ml_io_map_wcomb(
1607 vm_offset_t phys_addr,
1608 vm_size_t size)
1609 {
1610 return io_map(phys_addr, size, VM_WIMG_WCOMB, VM_PROT_DEFAULT, false);
1611 }
1612
1613 void
ml_io_unmap(vm_offset_t addr,vm_size_t sz)1614 ml_io_unmap(vm_offset_t addr, vm_size_t sz)
1615 {
1616 pmap_remove(kernel_pmap, addr, addr + sz);
1617 kmem_free(kernel_map, addr, sz);
1618 }
1619
1620 vm_map_address_t
ml_map_high_window(vm_offset_t phys_addr,vm_size_t len)1621 ml_map_high_window(
1622 vm_offset_t phys_addr,
1623 vm_size_t len)
1624 {
1625 return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1626 }
1627
1628 vm_offset_t
ml_static_ptovirt(vm_offset_t paddr)1629 ml_static_ptovirt(
1630 vm_offset_t paddr)
1631 {
1632 return phystokv(paddr);
1633 }
1634
1635 vm_offset_t
ml_static_slide(vm_offset_t vaddr)1636 ml_static_slide(
1637 vm_offset_t vaddr)
1638 {
1639 vm_offset_t slid_vaddr = 0;
1640
1641 {
1642 slid_vaddr = vaddr + vm_kernel_slide;
1643 }
1644
1645 if (!VM_KERNEL_IS_SLID(slid_vaddr)) {
1646 /* This is only intended for use on static kernel addresses. */
1647 return 0;
1648 }
1649
1650 return slid_vaddr;
1651 }
1652
1653 vm_offset_t
ml_static_unslide(vm_offset_t vaddr)1654 ml_static_unslide(
1655 vm_offset_t vaddr)
1656 {
1657 if (!VM_KERNEL_IS_SLID(vaddr)) {
1658 /* This is only intended for use on static kernel addresses. */
1659 return 0;
1660 }
1661
1662
1663 return vaddr - vm_kernel_slide;
1664 }
1665
1666 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1667
1668 kern_return_t
ml_static_protect(vm_offset_t vaddr,vm_size_t size,vm_prot_t new_prot __unused)1669 ml_static_protect(
1670 vm_offset_t vaddr, /* kernel virtual address */
1671 vm_size_t size,
1672 vm_prot_t new_prot __unused)
1673 {
1674 pt_entry_t arm_prot = 0;
1675 pt_entry_t arm_block_prot = 0;
1676 vm_offset_t vaddr_cur;
1677 ppnum_t ppn;
1678 kern_return_t result = KERN_SUCCESS;
1679
1680 if (vaddr < physmap_base) {
1681 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) physmap_base);
1682 return KERN_FAILURE;
1683 }
1684
1685 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1686
1687 if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1688 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1689 }
1690 if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
1691 panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
1692 }
1693
1694 /* Set up the protection bits, and block bits so we can validate block mappings. */
1695 if (new_prot & VM_PROT_WRITE) {
1696 arm_prot |= ARM_PTE_AP(AP_RWNA);
1697 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1698 } else {
1699 arm_prot |= ARM_PTE_AP(AP_RONA);
1700 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1701 }
1702
1703 arm_prot |= ARM_PTE_NX;
1704 arm_block_prot |= ARM_TTE_BLOCK_NX;
1705
1706 if (!(new_prot & VM_PROT_EXECUTE)) {
1707 arm_prot |= ARM_PTE_PNX;
1708 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1709 }
1710
1711 for (vaddr_cur = vaddr;
1712 vaddr_cur < trunc_page_64(vaddr + size);
1713 vaddr_cur += PAGE_SIZE) {
1714 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1715 if (ppn != (vm_offset_t) NULL) {
1716 tt_entry_t *tte2;
1717 pt_entry_t *pte_p;
1718 pt_entry_t ptmp;
1719
1720 #if XNU_MONITOR
1721 assert(!pmap_is_monitor(ppn));
1722 assert(!TEST_PAGE_RATIO_4);
1723 #endif
1724
1725 tte2 = arm_kva_to_tte(vaddr_cur);
1726
1727 if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1728 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1729 ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1730 /*
1731 * We can support ml_static_protect on a block mapping if the mapping already has
1732 * the desired protections. We still want to run checks on a per-page basis.
1733 */
1734 continue;
1735 }
1736
1737 result = KERN_FAILURE;
1738 break;
1739 }
1740
1741 pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1742 ptmp = *pte_p;
1743
1744 if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1745 /*
1746 * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1747 * protections do not match the desired protections, then we will fail (as we cannot update
1748 * this mapping without updating other mappings as well).
1749 */
1750 result = KERN_FAILURE;
1751 break;
1752 }
1753
1754 __unreachable_ok_push
1755 if (TEST_PAGE_RATIO_4) {
1756 {
1757 unsigned int i;
1758 pt_entry_t *ptep_iter;
1759
1760 ptep_iter = pte_p;
1761 for (i = 0; i < 4; i++, ptep_iter++) {
1762 /* Note that there is a hole in the HINT sanity checking here. */
1763 ptmp = *ptep_iter;
1764
1765 /* We only need to update the page tables if the protections do not match. */
1766 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1767 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1768 *ptep_iter = ptmp;
1769 }
1770 }
1771 }
1772 } else {
1773 ptmp = *pte_p;
1774 /* We only need to update the page tables if the protections do not match. */
1775 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1776 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1777 *pte_p = ptmp;
1778 }
1779 }
1780 __unreachable_ok_pop
1781 }
1782 }
1783
1784 if (vaddr_cur > vaddr) {
1785 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1786 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1787 }
1788
1789
1790 return result;
1791 }
1792
1793
1794 /*
1795 * Routine: ml_static_mfree
1796 * Function:
1797 */
1798 void
ml_static_mfree(vm_offset_t vaddr,vm_size_t size)1799 ml_static_mfree(
1800 vm_offset_t vaddr,
1801 vm_size_t size)
1802 {
1803 vm_offset_t vaddr_cur;
1804 vm_offset_t paddr_cur;
1805 ppnum_t ppn;
1806 uint32_t freed_pages = 0;
1807 uint32_t freed_kernelcache_pages = 0;
1808
1809
1810 /* It is acceptable (if bad) to fail to free. */
1811 if (vaddr < physmap_base) {
1812 return;
1813 }
1814
1815 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1816
1817 for (vaddr_cur = vaddr;
1818 vaddr_cur < trunc_page_64(vaddr + size);
1819 vaddr_cur += PAGE_SIZE) {
1820 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1821 if (ppn != (vm_offset_t) NULL) {
1822 /*
1823 * It is not acceptable to fail to update the protections on a page
1824 * we will release to the VM. We need to either panic or continue.
1825 * For now, we'll panic (to help flag if there is memory we can
1826 * reclaim).
1827 */
1828 if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1829 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1830 }
1831
1832 paddr_cur = ptoa(ppn);
1833
1834
1835 vm_page_create(ppn, (ppn + 1));
1836 freed_pages++;
1837 if (paddr_cur >= arm_vm_kernelcache_phys_start && paddr_cur < arm_vm_kernelcache_phys_end) {
1838 freed_kernelcache_pages++;
1839 }
1840 }
1841 }
1842 vm_page_lockspin_queues();
1843 vm_page_wire_count -= freed_pages;
1844 vm_page_wire_count_initial -= freed_pages;
1845 vm_page_kernelcache_count -= freed_kernelcache_pages;
1846 vm_page_unlock_queues();
1847 #if DEBUG
1848 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x, +%d bad\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn, bad_page_cnt);
1849 #endif
1850 }
1851
1852 /*
1853 * Routine: ml_page_protection_type
1854 * Function: Returns the type of page protection that the system supports.
1855 */
1856 ml_page_protection_t
ml_page_protection_type(void)1857 ml_page_protection_type(void)
1858 {
1859 #if XNU_MONITOR
1860 return 1;
1861 #else
1862 return 0;
1863 #endif
1864 }
1865
1866 /* virtual to physical on wired pages */
1867 vm_offset_t
ml_vtophys(vm_offset_t vaddr)1868 ml_vtophys(vm_offset_t vaddr)
1869 {
1870 return kvtophys(vaddr);
1871 }
1872
1873 /*
1874 * Routine: ml_nofault_copy
1875 * Function: Perform a physical mode copy if the source and destination have
1876 * valid translations in the kernel pmap. If translations are present, they are
1877 * assumed to be wired; e.g., no attempt is made to guarantee that the
1878 * translations obtained remain valid for the duration of the copy process.
1879 */
1880 vm_size_t
ml_nofault_copy(vm_offset_t virtsrc,vm_offset_t virtdst,vm_size_t size)1881 ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1882 {
1883 addr64_t cur_phys_dst, cur_phys_src;
1884 vm_size_t count, nbytes = 0;
1885
1886 while (size > 0) {
1887 if (!(cur_phys_src = kvtophys(virtsrc))) {
1888 break;
1889 }
1890 if (!(cur_phys_dst = kvtophys(virtdst))) {
1891 break;
1892 }
1893 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
1894 !pmap_valid_address(trunc_page_64(cur_phys_src))) {
1895 break;
1896 }
1897 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
1898 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
1899 count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
1900 }
1901 if (count > size) {
1902 count = size;
1903 }
1904
1905 bcopy_phys(cur_phys_src, cur_phys_dst, count);
1906
1907 nbytes += count;
1908 virtsrc += count;
1909 virtdst += count;
1910 size -= count;
1911 }
1912
1913 return nbytes;
1914 }
1915
1916 /*
1917 * Routine: ml_validate_nofault
1918 * Function: Validate that ths address range has a valid translations
1919 * in the kernel pmap. If translations are present, they are
1920 * assumed to be wired; i.e. no attempt is made to guarantee
1921 * that the translation persist after the check.
1922 * Returns: TRUE if the range is mapped and will not cause a fault,
1923 * FALSE otherwise.
1924 */
1925
1926 boolean_t
ml_validate_nofault(vm_offset_t virtsrc,vm_size_t size)1927 ml_validate_nofault(
1928 vm_offset_t virtsrc, vm_size_t size)
1929 {
1930 addr64_t cur_phys_src;
1931 uint32_t count;
1932
1933 while (size > 0) {
1934 if (!(cur_phys_src = kvtophys(virtsrc))) {
1935 return FALSE;
1936 }
1937 if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
1938 return FALSE;
1939 }
1940 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
1941 if (count > size) {
1942 count = (uint32_t)size;
1943 }
1944
1945 virtsrc += count;
1946 size -= count;
1947 }
1948
1949 return TRUE;
1950 }
1951
1952 void
ml_get_bouncepool_info(vm_offset_t * phys_addr,vm_size_t * size)1953 ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
1954 {
1955 *phys_addr = 0;
1956 *size = 0;
1957 }
1958
1959 void
active_rt_threads(__unused boolean_t active)1960 active_rt_threads(__unused boolean_t active)
1961 {
1962 }
1963
1964 static void
cpu_qos_cb_default(__unused int urgency,__unused uint64_t qos_param1,__unused uint64_t qos_param2)1965 cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
1966 {
1967 return;
1968 }
1969
1970 cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
1971
1972 void
cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)1973 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
1974 {
1975 if (cpu_qos_cb != NULL) {
1976 cpu_qos_update = cpu_qos_cb;
1977 } else {
1978 cpu_qos_update = cpu_qos_cb_default;
1979 }
1980 }
1981
1982 void
thread_tell_urgency(thread_urgency_t urgency,uint64_t rt_period,uint64_t rt_deadline,uint64_t sched_latency __unused,__unused thread_t nthread)1983 thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
1984 {
1985 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
1986
1987 cpu_qos_update((int)urgency, rt_period, rt_deadline);
1988
1989 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
1990 }
1991
1992 void
machine_run_count(__unused uint32_t count)1993 machine_run_count(__unused uint32_t count)
1994 {
1995 }
1996
1997 processor_t
machine_choose_processor(__unused processor_set_t pset,processor_t processor)1998 machine_choose_processor(__unused processor_set_t pset, processor_t processor)
1999 {
2000 return processor;
2001 }
2002
2003 #if KASAN
2004 vm_offset_t ml_stack_base(void);
2005 vm_size_t ml_stack_size(void);
2006
2007 vm_offset_t
ml_stack_base(void)2008 ml_stack_base(void)
2009 {
2010 uintptr_t local = (uintptr_t) &local;
2011 vm_offset_t intstack_top_ptr;
2012
2013 intstack_top_ptr = getCpuDatap()->intstack_top;
2014 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
2015 return intstack_top_ptr - INTSTACK_SIZE;
2016 } else {
2017 return current_thread()->kernel_stack;
2018 }
2019 }
2020 vm_size_t
ml_stack_size(void)2021 ml_stack_size(void)
2022 {
2023 uintptr_t local = (uintptr_t) &local;
2024 vm_offset_t intstack_top_ptr;
2025
2026 intstack_top_ptr = getCpuDatap()->intstack_top;
2027 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
2028 return INTSTACK_SIZE;
2029 } else {
2030 return kernel_stack_size;
2031 }
2032 }
2033 #endif
2034
2035 #ifdef CONFIG_KCOV
2036
2037 kcov_cpu_data_t *
current_kcov_data(void)2038 current_kcov_data(void)
2039 {
2040 return ¤t_cpu_datap()->cpu_kcov_data;
2041 }
2042
2043 kcov_cpu_data_t *
cpu_kcov_data(int cpuid)2044 cpu_kcov_data(int cpuid)
2045 {
2046 return &cpu_datap(cpuid)->cpu_kcov_data;
2047 }
2048
2049 #endif /* CONFIG_KCOV */
2050
2051 boolean_t
machine_timeout_suspended(void)2052 machine_timeout_suspended(void)
2053 {
2054 return FALSE;
2055 }
2056
2057 kern_return_t
ml_interrupt_prewarm(__unused uint64_t deadline)2058 ml_interrupt_prewarm(__unused uint64_t deadline)
2059 {
2060 return KERN_FAILURE;
2061 }
2062
2063 /*
2064 * Assumes fiq, irq disabled.
2065 */
2066 void
ml_set_decrementer(uint32_t dec_value)2067 ml_set_decrementer(uint32_t dec_value)
2068 {
2069 cpu_data_t *cdp = getCpuDatap();
2070
2071 assert(ml_get_interrupts_enabled() == FALSE);
2072 cdp->cpu_decrementer = dec_value;
2073
2074 if (cdp->cpu_set_decrementer_func) {
2075 cdp->cpu_set_decrementer_func(dec_value);
2076 } else {
2077 __builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value);
2078 }
2079 }
2080
2081 /**
2082 * Perform a read of the timebase which is permitted to be executed
2083 * speculatively and/or out of program order.
2084 */
2085 static inline uint64_t
speculative_timebase(void)2086 speculative_timebase(void)
2087 {
2088 return __builtin_arm_rsr64("CNTVCT_EL0");
2089 }
2090
2091 /**
2092 * Read a non-speculative view of the timebase if one is available,
2093 * otherwise fallback on an ISB to prevent prevent speculation and
2094 * enforce ordering.
2095 */
2096 static inline uint64_t
nonspeculative_timebase(void)2097 nonspeculative_timebase(void)
2098 {
2099 #if defined(HAS_ACNTVCT)
2100 return __builtin_arm_rsr64("ACNTVCT_EL0");
2101 #elif __ARM_ARCH_8_6__
2102 return __builtin_arm_rsr64("CNTVCTSS_EL0");
2103 #else
2104 // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
2105 // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
2106 // to other instructions executed on the same processor."
2107 __builtin_arm_isb(ISB_SY);
2108 return speculative_timebase();
2109 #endif
2110 }
2111
2112
2113 uint64_t
ml_get_hwclock()2114 ml_get_hwclock()
2115 {
2116 uint64_t timebase = nonspeculative_timebase();
2117 return timebase;
2118 }
2119
2120 uint64_t
ml_get_timebase()2121 ml_get_timebase()
2122 {
2123 uint64_t clock, timebase;
2124
2125 //the retry is for the case where S2R catches us in the middle of this. see rdar://77019633
2126 do {
2127 timebase = getCpuDatap()->cpu_base_timebase;
2128 os_compiler_barrier();
2129 clock = ml_get_hwclock();
2130 os_compiler_barrier();
2131 } while (getCpuDatap()->cpu_base_timebase != timebase);
2132
2133 return clock + timebase;
2134 }
2135
2136 /**
2137 * Issue a barrier that guarantees all prior memory accesses will complete
2138 * before any subsequent timebase reads.
2139 */
2140 void
ml_memory_to_timebase_fence(void)2141 ml_memory_to_timebase_fence(void)
2142 {
2143 __builtin_arm_dmb(DMB_SY);
2144 const uint64_t take_backwards_branch = 0;
2145 asm volatile (
2146 "1:"
2147 "ldr x0, [%[take_backwards_branch]]" "\n"
2148 "cbnz x0, 1b" "\n"
2149 :
2150 : [take_backwards_branch] "r"(&take_backwards_branch)
2151 : "x0"
2152 );
2153
2154 /* throwaway read to prevent ml_get_speculative_timebase() reordering */
2155 (void)ml_get_hwclock();
2156 }
2157
2158 /**
2159 * Issue a barrier that guarantees all prior timebase reads will
2160 * be ordered before any subsequent memory accesses.
2161 */
2162 void
ml_timebase_to_memory_fence(void)2163 ml_timebase_to_memory_fence(void)
2164 {
2165 __builtin_arm_isb(ISB_SY);
2166 }
2167
2168 /*
2169 * Get the speculative timebase without an ISB.
2170 */
2171 uint64_t
ml_get_speculative_timebase(void)2172 ml_get_speculative_timebase(void)
2173 {
2174 uint64_t clock, timebase;
2175
2176 //the retry is for the case where S2R catches us in the middle of this. see rdar://77019633&77697482
2177 do {
2178 timebase = getCpuDatap()->cpu_base_timebase;
2179 os_compiler_barrier();
2180 clock = speculative_timebase();
2181
2182 os_compiler_barrier();
2183 } while (getCpuDatap()->cpu_base_timebase != timebase);
2184
2185 return clock + timebase;
2186 }
2187
2188 uint64_t
ml_get_timebase_entropy(void)2189 ml_get_timebase_entropy(void)
2190 {
2191 return ml_get_speculative_timebase();
2192 }
2193
2194 uint32_t
ml_get_decrementer(void)2195 ml_get_decrementer(void)
2196 {
2197 cpu_data_t *cdp = getCpuDatap();
2198 uint32_t dec;
2199
2200 assert(ml_get_interrupts_enabled() == FALSE);
2201
2202 if (cdp->cpu_get_decrementer_func) {
2203 dec = cdp->cpu_get_decrementer_func();
2204 } else {
2205 uint64_t wide_val;
2206
2207 wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0");
2208 dec = (uint32_t)wide_val;
2209 assert(wide_val == (uint64_t)dec);
2210 }
2211
2212 return dec;
2213 }
2214
2215 boolean_t
ml_get_timer_pending(void)2216 ml_get_timer_pending(void)
2217 {
2218 uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0");
2219 return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
2220 }
2221
2222 __attribute__((noreturn))
2223 void
platform_syscall(arm_saved_state_t * state)2224 platform_syscall(arm_saved_state_t *state)
2225 {
2226 uint32_t code;
2227
2228 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
2229
2230 code = (uint32_t)get_saved_state_reg(state, 3);
2231
2232 KDBG(MACHDBG_CODE(DBG_MACH_MACHDEP_EXCP_SC_ARM, code) | DBG_FUNC_START,
2233 get_saved_state_reg(state, 0),
2234 get_saved_state_reg(state, 1),
2235 get_saved_state_reg(state, 2));
2236
2237 switch (code) {
2238 case 2:
2239 /* set cthread */
2240 platform_syscall_kprintf("set cthread self.\n");
2241 thread_set_cthread_self(get_saved_state_reg(state, 0));
2242 break;
2243 case 3:
2244 /* get cthread */
2245 platform_syscall_kprintf("get cthread self.\n");
2246 set_user_saved_state_reg(state, 0, thread_get_cthread_self());
2247 break;
2248 case 0: /* I-Cache flush (removed) */
2249 case 1: /* D-Cache flush (removed) */
2250 default:
2251 platform_syscall_kprintf("unknown: %d\n", code);
2252 break;
2253 }
2254
2255 KDBG(MACHDBG_CODE(DBG_MACH_MACHDEP_EXCP_SC_ARM, code) | DBG_FUNC_END,
2256 get_saved_state_reg(state, 0));
2257
2258 thread_exception_return();
2259 }
2260
2261 static void
_enable_timebase_event_stream(uint32_t bit_index)2262 _enable_timebase_event_stream(uint32_t bit_index)
2263 {
2264 uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
2265
2266 if (bit_index >= 64) {
2267 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2268 }
2269
2270 __asm__ volatile ("mrs %0, CNTKCTL_EL1" : "=r"(cntkctl));
2271
2272 cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2273 cntkctl |= CNTKCTL_EL1_EVNTEN;
2274 cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2275
2276 /*
2277 * If the SOC supports it (and it isn't broken), enable
2278 * EL0 access to the timebase registers.
2279 */
2280 if (user_timebase_type() != USER_TIMEBASE_NONE) {
2281 cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
2282 }
2283
2284 __builtin_arm_wsr64("CNTKCTL_EL1", cntkctl);
2285 }
2286
2287 /*
2288 * Turn timer on, unmask that interrupt.
2289 */
2290 static void
_enable_virtual_timer(void)2291 _enable_virtual_timer(void)
2292 {
2293 uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
2294
2295 __builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl);
2296 /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
2297 __builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
2298 }
2299
2300 void
fiq_context_init(boolean_t enable_fiq __unused)2301 fiq_context_init(boolean_t enable_fiq __unused)
2302 {
2303 /* Interrupts still disabled. */
2304 assert(ml_get_interrupts_enabled() == FALSE);
2305 _enable_virtual_timer();
2306 }
2307
2308 void
wfe_timeout_init(void)2309 wfe_timeout_init(void)
2310 {
2311 _enable_timebase_event_stream(arm64_eventi);
2312 }
2313
2314 /**
2315 * Configures, but does not enable, the WFE event stream. The event stream
2316 * generates an event at a set interval to act as a timeout for WFEs.
2317 *
2318 * This function sets the static global variable arm64_eventi to be the proper
2319 * bit index for the CNTKCTL_EL1.EVENTI field to generate events at the correct
2320 * period (1us unless specified by the "wfe_events_sec" boot-arg). arm64_eventi
2321 * is used by wfe_timeout_init to actually poke the registers and enable the
2322 * event stream.
2323 *
2324 * The CNTKCTL_EL1.EVENTI field contains the index of the bit of CNTVCT_EL0 that
2325 * is the trigger for the system to generate an event. The trigger can occur on
2326 * either the rising or falling edge of the bit depending on the value of
2327 * CNTKCTL_EL1.EVNTDIR. This is arbitrary for our purposes, so we use the
2328 * falling edge (1->0) transition to generate events.
2329 */
2330 void
wfe_timeout_configure(void)2331 wfe_timeout_configure(void)
2332 {
2333 /* Could fill in our own ops here, if we needed them */
2334 uint64_t ticks_per_sec, ticks_per_event, events_per_sec = 0;
2335 uint32_t bit_index;
2336
2337 if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
2338 if (events_per_sec <= 0) {
2339 events_per_sec = 1;
2340 } else if (events_per_sec > USEC_PER_SEC) {
2341 events_per_sec = USEC_PER_SEC;
2342 }
2343 } else {
2344 events_per_sec = USEC_PER_SEC;
2345 }
2346 ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
2347 ticks_per_event = ticks_per_sec / events_per_sec;
2348
2349 /* Bit index of next power of two greater than ticks_per_event */
2350 bit_index = flsll(ticks_per_event) - 1;
2351 /* Round up to next power of two if ticks_per_event is initially power of two */
2352 if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
2353 bit_index++;
2354 }
2355
2356 /*
2357 * The timer can only trigger on rising or falling edge, not both; we don't
2358 * care which we trigger on, but we do need to adjust which bit we are
2359 * interested in to account for this.
2360 *
2361 * In particular, we set CNTKCTL_EL1.EVENTDIR to trigger events on the
2362 * falling edge of the given bit. Therefore, we must decrement the bit index
2363 * by one as when the bit before the one we care about makes a 1 -> 0
2364 * transition, the bit we care about makes a 0 -> 1 transition.
2365 *
2366 * For example if we want an event generated every 8 ticks (if we calculated
2367 * a bit_index of 3), we would want the event to be generated whenever the
2368 * lower four bits of the counter transition from 0b0111 -> 0b1000. We can
2369 * see that the bit at index 2 makes a falling transition in this scenario,
2370 * so we would want EVENTI to be 2 instead of 3.
2371 */
2372 if (bit_index != 0) {
2373 bit_index--;
2374 }
2375
2376 arm64_eventi = bit_index;
2377 }
2378
2379 boolean_t
ml_delay_should_spin(uint64_t interval)2380 ml_delay_should_spin(uint64_t interval)
2381 {
2382 cpu_data_t *cdp = getCpuDatap();
2383
2384 if (cdp->cpu_idle_latency) {
2385 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2386 } else {
2387 /*
2388 * Early boot, latency is unknown. Err on the side of blocking,
2389 * which should always be safe, even if slow
2390 */
2391 return FALSE;
2392 }
2393 }
2394
2395 boolean_t
ml_thread_is64bit(thread_t thread)2396 ml_thread_is64bit(thread_t thread)
2397 {
2398 return thread_is_64bit_addr(thread);
2399 }
2400
2401 void
ml_delay_on_yield(void)2402 ml_delay_on_yield(void)
2403 {
2404 #if DEVELOPMENT || DEBUG
2405 if (yield_delay_us) {
2406 delay(yield_delay_us);
2407 }
2408 #endif
2409 }
2410
2411 void
ml_timer_evaluate(void)2412 ml_timer_evaluate(void)
2413 {
2414 }
2415
2416 boolean_t
ml_timer_forced_evaluation(void)2417 ml_timer_forced_evaluation(void)
2418 {
2419 return FALSE;
2420 }
2421
2422 void
ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)2423 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2424 {
2425 /*
2426 * For now: update the resource coalition stats of the
2427 * current thread's coalition
2428 */
2429 task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
2430 }
2431
2432 uint64_t
ml_gpu_stat(__unused thread_t t)2433 ml_gpu_stat(__unused thread_t t)
2434 {
2435 return 0;
2436 }
2437
2438 thread_t
current_thread(void)2439 current_thread(void)
2440 {
2441 return current_thread_fast();
2442 }
2443
2444 #if defined(HAS_APPLE_PAC)
2445 void
ml_task_set_disable_user_jop(task_t task,uint8_t disable_user_jop)2446 ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
2447 {
2448 assert(task);
2449 task->disable_user_jop = disable_user_jop;
2450 }
2451
2452 void
ml_thread_set_disable_user_jop(thread_t thread,uint8_t disable_user_jop)2453 ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
2454 {
2455 assert(thread);
2456 if (disable_user_jop) {
2457 thread->machine.arm_machine_flags |= ARM_MACHINE_THREAD_DISABLE_USER_JOP;
2458 } else {
2459 thread->machine.arm_machine_flags &= ~ARM_MACHINE_THREAD_DISABLE_USER_JOP;
2460 }
2461 }
2462
2463 void
ml_task_set_rop_pid(task_t task,task_t parent_task,boolean_t inherit)2464 ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2465 {
2466 if (inherit) {
2467 task->rop_pid = parent_task->rop_pid;
2468 } else {
2469 task->rop_pid = early_random();
2470 }
2471 }
2472
2473 /**
2474 * jop_pid may be inherited from the parent task or generated inside the shared
2475 * region. Unfortunately these two parameters are available at very different
2476 * times during task creation, so we need to split this into two steps.
2477 */
2478 void
ml_task_set_jop_pid(task_t task,task_t parent_task,boolean_t inherit)2479 ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit)
2480 {
2481 if (inherit) {
2482 task->jop_pid = parent_task->jop_pid;
2483 } else {
2484 task->jop_pid = ml_default_jop_pid();
2485 }
2486 }
2487
2488 void
ml_task_set_jop_pid_from_shared_region(task_t task)2489 ml_task_set_jop_pid_from_shared_region(task_t task)
2490 {
2491 vm_shared_region_t sr = vm_shared_region_get(task);
2492 /*
2493 * If there's no shared region, we can assign the key arbitrarily. This
2494 * typically happens when Mach-O image activation failed part of the way
2495 * through, and this task is in the middle of dying with SIGKILL anyway.
2496 */
2497 if (__improbable(!sr)) {
2498 task->jop_pid = early_random();
2499 return;
2500 }
2501 vm_shared_region_deallocate(sr);
2502
2503 /*
2504 * Similarly we have to worry about jetsam having killed the task and
2505 * already cleared the shared_region_id.
2506 */
2507 task_lock(task);
2508 if (task->shared_region_id != NULL) {
2509 task->jop_pid = shared_region_find_key(task->shared_region_id);
2510 } else {
2511 task->jop_pid = early_random();
2512 }
2513 task_unlock(task);
2514 }
2515
2516 void
ml_thread_set_jop_pid(thread_t thread,task_t task)2517 ml_thread_set_jop_pid(thread_t thread, task_t task)
2518 {
2519 thread->machine.jop_pid = task->jop_pid;
2520 }
2521 #endif /* defined(HAS_APPLE_PAC) */
2522
2523 #if DEVELOPMENT || DEBUG
2524 static uint64_t minor_badness_suffered = 0;
2525 #endif
2526 void
ml_report_minor_badness(uint32_t __unused badness_id)2527 ml_report_minor_badness(uint32_t __unused badness_id)
2528 {
2529 #if DEVELOPMENT || DEBUG
2530 (void)os_atomic_or(&minor_badness_suffered, 1ULL << badness_id, relaxed);
2531 #endif
2532 }
2533
2534 #if defined(HAS_APPLE_PAC)
2535 #if __ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM
2536 /**
2537 * The ARMv8.6 implementation is also safe for non-FPAC CPUs, but less efficient;
2538 * guest kernels need to use it because it does not know at compile time whether
2539 * the host CPU supports FPAC.
2540 */
2541
2542 /**
2543 * Emulates the poisoning done by ARMv8.3-PAuth instructions on auth failure.
2544 */
2545 static void *
ml_poison_ptr(void * ptr,ptrauth_key key)2546 ml_poison_ptr(void *ptr, ptrauth_key key)
2547 {
2548 bool b_key = key & (1ULL << 0);
2549 uint64_t error_code;
2550 if (b_key) {
2551 error_code = 2;
2552 } else {
2553 error_code = 1;
2554 }
2555
2556 bool kernel_pointer = (uintptr_t)ptr & (1ULL << 55);
2557 bool data_key = key & (1ULL << 1);
2558 /* When PAC is enabled, only userspace data pointers use TBI, regardless of boot parameters */
2559 bool tbi = data_key && !kernel_pointer;
2560 unsigned int poison_shift;
2561 if (tbi) {
2562 poison_shift = 53;
2563 } else {
2564 poison_shift = 61;
2565 }
2566
2567 uintptr_t poisoned = (uintptr_t)ptr;
2568 poisoned &= ~(3ULL << poison_shift);
2569 poisoned |= error_code << poison_shift;
2570 return (void *)poisoned;
2571 }
2572
2573 /*
2574 * ptrauth_sign_unauthenticated() reimplemented using asm volatile, forcing the
2575 * compiler to assume this operation has side-effects and cannot be reordered
2576 */
2577 #define ptrauth_sign_volatile(__value, __suffix, __data) \
2578 ({ \
2579 void *__ret = __value; \
2580 asm volatile ( \
2581 "pac" #__suffix " %[value], %[data]" \
2582 : [value] "+r"(__ret) \
2583 : [data] "r"(__data) \
2584 ); \
2585 __ret; \
2586 })
2587
2588 #define ml_auth_ptr_unchecked_for_key(_ptr, _suffix, _key, _modifier) \
2589 do { \
2590 void *stripped = ptrauth_strip(_ptr, _key); \
2591 void *reauthed = ptrauth_sign_volatile(stripped, _suffix, _modifier); \
2592 if (__probable(_ptr == reauthed)) { \
2593 _ptr = stripped; \
2594 } else { \
2595 _ptr = ml_poison_ptr(stripped, _key); \
2596 } \
2597 } while (0)
2598
2599 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2600 ml_auth_ptr_unchecked_for_key(_ptr, _suffix, ptrauth_key_as ## _suffix, _modifier)
2601 #else
2602 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2603 asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
2604 #endif /* __ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM */
2605
2606 /**
2607 * Authenticates a signed pointer without trapping on failure.
2608 *
2609 * @warning This function must be called with interrupts disabled.
2610 *
2611 * @warning Pointer authentication failure should normally be treated as a fatal
2612 * error. This function is intended for a handful of callers that cannot panic
2613 * on failure, and that understand the risks in handling a poisoned return
2614 * value. Other code should generally use the trapping variant
2615 * ptrauth_auth_data() instead.
2616 *
2617 * @param ptr the pointer to authenticate
2618 * @param key which key to use for authentication
2619 * @param modifier a modifier to mix into the key
2620 * @return an authenticated version of ptr, possibly with poison bits set
2621 */
2622 void *
ml_auth_ptr_unchecked(void * ptr,ptrauth_key key,uint64_t modifier)2623 ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
2624 {
2625 switch (key & 0x3) {
2626 case ptrauth_key_asia:
2627 _ml_auth_ptr_unchecked(ptr, ia, modifier);
2628 break;
2629 case ptrauth_key_asib:
2630 _ml_auth_ptr_unchecked(ptr, ib, modifier);
2631 break;
2632 case ptrauth_key_asda:
2633 _ml_auth_ptr_unchecked(ptr, da, modifier);
2634 break;
2635 case ptrauth_key_asdb:
2636 _ml_auth_ptr_unchecked(ptr, db, modifier);
2637 break;
2638 }
2639
2640 return ptr;
2641 }
2642 #endif /* defined(HAS_APPLE_PAC) */
2643
2644 #ifdef CONFIG_XNUPOST
2645 void
ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler,uintptr_t expected_fault_addr)2646 ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
2647 {
2648 thread_t thread = current_thread();
2649 thread->machine.expected_fault_handler = expected_fault_handler;
2650 thread->machine.expected_fault_addr = expected_fault_addr;
2651 thread->machine.expected_fault_pc = 0;
2652 }
2653
2654 /** Expect an exception to be thrown at EXPECTED_FAULT_PC */
2655 void
ml_expect_fault_pc_begin(expected_fault_handler_t expected_fault_handler,uintptr_t expected_fault_pc)2656 ml_expect_fault_pc_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_pc)
2657 {
2658 thread_t thread = current_thread();
2659 thread->machine.expected_fault_handler = expected_fault_handler;
2660 thread->machine.expected_fault_addr = 0;
2661 uintptr_t raw_func = (uintptr_t)ptrauth_strip(
2662 (void *)expected_fault_pc,
2663 ptrauth_key_function_pointer);
2664 thread->machine.expected_fault_pc = raw_func;
2665 }
2666
2667 void
ml_expect_fault_end(void)2668 ml_expect_fault_end(void)
2669 {
2670 thread_t thread = current_thread();
2671 thread->machine.expected_fault_handler = NULL;
2672 thread->machine.expected_fault_addr = 0;
2673 thread->machine.expected_fault_pc = 0;
2674 }
2675 #endif /* CONFIG_XNUPOST */
2676
2677 void
ml_hibernate_active_pre(void)2678 ml_hibernate_active_pre(void)
2679 {
2680 #if HIBERNATION
2681 if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2682
2683 hibernate_rebuild_vm_structs();
2684 }
2685 #endif /* HIBERNATION */
2686 }
2687
2688 void
ml_hibernate_active_post(void)2689 ml_hibernate_active_post(void)
2690 {
2691 #if HIBERNATION
2692 if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2693 hibernate_machine_init();
2694 hibernate_vm_lock_end();
2695 current_cpu_datap()->cpu_hibernate = 0;
2696 }
2697 #endif /* HIBERNATION */
2698 }
2699
2700 /**
2701 * Return back a machine-dependent array of address space regions that should be
2702 * reserved by the VM (pre-mapped in the address space). This will prevent user
2703 * processes from allocating or deallocating from within these regions.
2704 *
2705 * @param vm_is64bit True if the process has a 64-bit address space.
2706 * @param regions An out parameter representing an array of regions to reserve.
2707 *
2708 * @return The number of reserved regions returned through `regions`.
2709 */
2710 size_t
ml_get_vm_reserved_regions(bool vm_is64bit,const struct vm_reserved_region ** regions)2711 ml_get_vm_reserved_regions(bool vm_is64bit, const struct vm_reserved_region **regions)
2712 {
2713 assert(regions != NULL);
2714
2715 /**
2716 * Reserved regions only apply to 64-bit address spaces. This is because
2717 * we only expect to grow the maximum user VA address on 64-bit address spaces
2718 * (we've essentially already reached the max for 32-bit spaces). The reserved
2719 * regions should safely fall outside of the max user VA for 32-bit processes.
2720 */
2721 if (vm_is64bit) {
2722 *regions = vm_reserved_regions;
2723 return ARRAY_COUNT(vm_reserved_regions);
2724 } else {
2725 /* Don't reserve any VA regions on arm64_32 processes. */
2726 *regions = NULL;
2727 return 0;
2728 }
2729 }
2730
2731 /* These WFE recommendations are expected to be updated on a relatively
2732 * infrequent cadence, possibly from a different cluster, hence
2733 * false cacheline sharing isn't expected to be material
2734 */
2735 static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
2736
2737 uint32_t
ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id,uint64_t wfe_timeout_abstime_interval,__unused uint64_t wfe_hint_flags)2738 ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
2739 {
2740 assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
2741 assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
2742 os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
2743 return 0; /* Success */
2744 }
2745
2746 #if DEVELOPMENT || DEBUG
2747 int wfe_rec_max = 0;
2748 int wfe_rec_none = 0;
2749 uint64_t wfe_rec_override_mat = 0;
2750 uint64_t wfe_rec_clamp = 0;
2751 #endif
2752
2753 uint64_t
ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)2754 ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
2755 {
2756 /* This and its consumer does not synchronize vis-a-vis updates
2757 * of the recommendation; races are acceptable.
2758 */
2759 uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
2760 #if DEVELOPMENT || DEBUG
2761 if (wfe_rec_clamp) {
2762 wfet = MIN(wfe_rec_clamp, wfet);
2763 }
2764
2765 if (wfe_rec_max) {
2766 for (int i = 0; i < MAX_CPU_CLUSTERS; i++) {
2767 if (arm64_cluster_wfe_recs[i] > wfet) {
2768 wfet = arm64_cluster_wfe_recs[i];
2769 }
2770 }
2771 }
2772
2773 if (wfe_rec_none) {
2774 wfet = 0;
2775 }
2776
2777 if (wfe_rec_override_mat) {
2778 wfet = wfe_rec_override_mat;
2779 }
2780 #endif
2781 return wfet;
2782 }
2783
2784 __pure2 bool
ml_addr_in_non_xnu_stack(__unused uintptr_t addr)2785 ml_addr_in_non_xnu_stack(__unused uintptr_t addr)
2786 {
2787 #if XNU_MONITOR
2788 return (addr >= (uintptr_t)pmap_stacks_start) && (addr < (uintptr_t)pmap_stacks_end);
2789 #else
2790 return false;
2791 #endif /* XNU_MONITOR */
2792 }
2793
2794 uint64_t
ml_get_backtrace_pc(struct arm_saved_state * state)2795 ml_get_backtrace_pc(struct arm_saved_state *state)
2796 {
2797 assert((state != NULL) && is_saved_state64(state));
2798
2799
2800 return get_saved_state_pc(state);
2801 }
2802
2803
2804 /**
2805 * Panic because an ARM saved-state accessor expected user saved-state but was
2806 * passed non-user saved-state.
2807 *
2808 * @param ss invalid saved-state (CPSR.M != EL0)
2809 */
2810 void
ml_panic_on_invalid_old_cpsr(const arm_saved_state_t * ss)2811 ml_panic_on_invalid_old_cpsr(const arm_saved_state_t *ss)
2812 {
2813 panic("invalid CPSR in user saved-state %p", ss);
2814 }
2815
2816 /**
2817 * Panic because an ARM saved-state accessor was passed user saved-state and
2818 * asked to assign a non-user CPSR.
2819 *
2820 * @param ss original EL0 saved-state
2821 * @param cpsr invalid new CPSR value (CPSR.M != EL0)
2822 */
2823 void
ml_panic_on_invalid_new_cpsr(const arm_saved_state_t * ss,uint32_t cpsr)2824 ml_panic_on_invalid_new_cpsr(const arm_saved_state_t *ss, uint32_t cpsr)
2825 {
2826 panic("attempt to set non-user CPSR %#010x on user saved-state %p", cpsr, ss);
2827 }
2828