1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <i386/machine_routines.h>
30 #include <i386/cpuid.h>
31 #include <i386/fpu.h>
32 #include <mach/processor.h>
33 #include <kern/processor.h>
34 #include <kern/machine.h>
35
36 #include <kern/cpu_number.h>
37 #include <kern/thread.h>
38 #include <kern/thread_call.h>
39 #include <kern/policy_internal.h>
40
41 #include <prng/random.h>
42 #include <prng/entropy.h>
43 #include <i386/machine_cpu.h>
44 #include <i386/lapic.h>
45 #include <i386/bit_routines.h>
46 #include <i386/mp_events.h>
47 #include <i386/pmCPU.h>
48 #include <i386/trap.h>
49 #include <i386/tsc.h>
50 #include <i386/cpu_threads.h>
51 #include <i386/proc_reg.h>
52 #include <mach/vm_param.h>
53 #include <i386/pmap.h>
54 #include <i386/pmap_internal.h>
55 #include <i386/misc_protos.h>
56 #include <kern/timer_queue.h>
57 #include <vm/vm_map.h>
58 #if KPC
59 #include <kern/kpc.h>
60 #endif
61 #include <architecture/i386/pio.h>
62 #include <i386/cpu_data.h>
63 #if DEBUG
64 #define DBG(x...) kprintf("DBG: " x)
65 #else
66 #define DBG(x...)
67 #endif
68
69 #if MONOTONIC
70 #include <kern/monotonic.h>
71 #endif /* MONOTONIC */
72
73 extern void wakeup(void *);
74
75 uint64_t LockTimeOut;
76 uint64_t TLBTimeOut;
77 uint64_t LockTimeOutTSC;
78 uint32_t LockTimeOutUsec;
79 uint64_t MutexSpin;
80 uint64_t low_MutexSpin;
81 int64_t high_MutexSpin;
82 uint64_t LastDebuggerEntryAllowance;
83 uint64_t delay_spin_threshold;
84
85 extern uint64_t panic_restart_timeout;
86
87 boolean_t virtualized = FALSE;
88
89 static SIMPLE_LOCK_DECLARE(ml_timer_evaluation_slock, 0);
90 uint32_t ml_timer_eager_evaluations;
91 uint64_t ml_timer_eager_evaluation_max;
92 static boolean_t ml_timer_evaluation_in_progress = FALSE;
93
94 LCK_GRP_DECLARE(max_cpus_grp, "max_cpus");
95 LCK_MTX_DECLARE(max_cpus_lock, &max_cpus_grp);
96 static int max_cpus_initialized = 0;
97 #define MAX_CPUS_SET 0x1
98 #define MAX_CPUS_WAIT 0x2
99
100 /* IO memory map services */
101
102 /* Map memory map IO space */
103 vm_offset_t
ml_io_map(vm_offset_t phys_addr,vm_size_t size)104 ml_io_map(
105 vm_offset_t phys_addr,
106 vm_size_t size)
107 {
108 return io_map(phys_addr, size, VM_WIMG_IO, VM_PROT_DEFAULT, false);
109 }
110
111 vm_offset_t
ml_io_map_wcomb(vm_offset_t phys_addr,vm_size_t size)112 ml_io_map_wcomb(
113 vm_offset_t phys_addr,
114 vm_size_t size)
115 {
116 return io_map(phys_addr, size, VM_WIMG_WCOMB, VM_PROT_DEFAULT, false);
117 }
118
119 vm_offset_t
ml_io_map_unmappable(vm_offset_t phys_addr,vm_size_t size,unsigned int flags)120 ml_io_map_unmappable(
121 vm_offset_t phys_addr,
122 vm_size_t size,
123 unsigned int flags)
124 {
125 return io_map(phys_addr, size, flags, VM_PROT_DEFAULT, true);
126 }
127
128 void
ml_get_bouncepool_info(vm_offset_t * phys_addr,vm_size_t * size)129 ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
130 {
131 *phys_addr = 0;
132 *size = 0;
133 }
134
135
136 vm_offset_t
ml_static_ptovirt(vm_offset_t paddr)137 ml_static_ptovirt(
138 vm_offset_t paddr)
139 {
140 #if defined(__x86_64__)
141 return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
142 #else
143 return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
144 #endif
145 }
146
147 vm_offset_t
ml_static_slide(vm_offset_t vaddr)148 ml_static_slide(
149 vm_offset_t vaddr)
150 {
151 return vaddr + vm_kernel_slide;
152 }
153
154 /*
155 * base must be page-aligned, and size must be a multiple of PAGE_SIZE
156 */
157 kern_return_t
ml_static_verify_page_protections(uint64_t base,uint64_t size,vm_prot_t prot)158 ml_static_verify_page_protections(
159 uint64_t base, uint64_t size, vm_prot_t prot)
160 {
161 vm_prot_t pageprot;
162 uint64_t offset;
163
164 DBG("ml_static_verify_page_protections: vaddr 0x%llx sz 0x%llx prot 0x%x\n", base, size, prot);
165
166 /*
167 * base must be within the static bounds, defined to be:
168 * (vm_kernel_stext, kc_highest_nonlinkedit_vmaddr)
169 */
170 #if DEVELOPMENT || DEBUG || KASAN
171 assert(kc_highest_nonlinkedit_vmaddr > 0 && base > vm_kernel_stext && base < kc_highest_nonlinkedit_vmaddr);
172 #else /* On release kernels, assume this is a protection mismatch failure. */
173 if (kc_highest_nonlinkedit_vmaddr == 0 || base < vm_kernel_stext || base >= kc_highest_nonlinkedit_vmaddr) {
174 return KERN_FAILURE;
175 }
176 #endif
177
178 for (offset = 0; offset < size; offset += PAGE_SIZE) {
179 if (pmap_get_prot(kernel_pmap, base + offset, &pageprot) == KERN_FAILURE) {
180 return KERN_FAILURE;
181 }
182 if ((pageprot & prot) != prot) {
183 return KERN_FAILURE;
184 }
185 }
186
187 return KERN_SUCCESS;
188 }
189
190 vm_offset_t
ml_static_unslide(vm_offset_t vaddr)191 ml_static_unslide(
192 vm_offset_t vaddr)
193 {
194 return vaddr - vm_kernel_slide;
195 }
196
197 /*
198 * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
199 * by the kernel.
200 */
201 void
ml_static_mfree(vm_offset_t vaddr,vm_size_t size)202 ml_static_mfree(
203 vm_offset_t vaddr,
204 vm_size_t size)
205 {
206 addr64_t vaddr_cur;
207 ppnum_t ppn;
208 uint32_t freed_pages = 0;
209 vm_size_t map_size;
210
211 assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
212
213 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
214
215 for (vaddr_cur = vaddr; vaddr_cur < round_page_64(vaddr + size);) {
216 map_size = pmap_query_pagesize(kernel_pmap, vaddr_cur);
217
218 /* just skip if nothing mapped here */
219 if (map_size == 0) {
220 vaddr_cur += PAGE_SIZE;
221 continue;
222 }
223
224 /*
225 * Can't free from the middle of a large page.
226 */
227 assert((vaddr_cur & (map_size - 1)) == 0);
228
229 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
230 assert(ppn != (ppnum_t)NULL);
231
232 pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + map_size);
233 while (map_size > 0) {
234 assert(pmap_valid_page(ppn));
235 if (IS_MANAGED_PAGE(ppn)) {
236 vm_page_create(ppn, (ppn + 1));
237 freed_pages++;
238 }
239 map_size -= PAGE_SIZE;
240 vaddr_cur += PAGE_SIZE;
241 ppn++;
242 }
243 }
244 vm_page_lockspin_queues();
245 vm_page_wire_count -= freed_pages;
246 vm_page_wire_count_initial -= freed_pages;
247 if (vm_page_wire_count_on_boot != 0) {
248 assert(vm_page_wire_count_on_boot >= freed_pages);
249 vm_page_wire_count_on_boot -= freed_pages;
250 }
251 vm_page_unlock_queues();
252
253 #if DEBUG
254 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
255 #endif
256 }
257
258 /* Change page protections for addresses previously loaded by efiboot */
259 kern_return_t
ml_static_protect(vm_offset_t vmaddr,vm_size_t size,vm_prot_t prot)260 ml_static_protect(vm_offset_t vmaddr, vm_size_t size, vm_prot_t prot)
261 {
262 boolean_t NX = !!!(prot & VM_PROT_EXECUTE), ro = !!!(prot & VM_PROT_WRITE);
263
264 assert(prot & VM_PROT_READ);
265
266 pmap_mark_range(kernel_pmap, vmaddr, size, NX, ro);
267
268 return KERN_SUCCESS;
269 }
270
271 /* virtual to physical on wired pages */
272 vm_offset_t
ml_vtophys(vm_offset_t vaddr)273 ml_vtophys(
274 vm_offset_t vaddr)
275 {
276 return (vm_offset_t)kvtophys(vaddr);
277 }
278
279 /*
280 * Routine: ml_nofault_copy
281 * Function: Perform a physical mode copy if the source and
282 * destination have valid translations in the kernel pmap.
283 * If translations are present, they are assumed to
284 * be wired; i.e. no attempt is made to guarantee that the
285 * translations obtained remained valid for
286 * the duration of the copy process.
287 */
288
289 vm_size_t
ml_nofault_copy(vm_offset_t virtsrc,vm_offset_t virtdst,vm_size_t size)290 ml_nofault_copy(
291 vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
292 {
293 addr64_t cur_phys_dst, cur_phys_src;
294 uint32_t count, nbytes = 0;
295
296 while (size > 0) {
297 if (!(cur_phys_src = kvtophys(virtsrc))) {
298 break;
299 }
300 if (!(cur_phys_dst = kvtophys(virtdst))) {
301 break;
302 }
303 if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
304 break;
305 }
306 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
307 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
308 count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
309 }
310 if (count > size) {
311 count = (uint32_t)size;
312 }
313
314 bcopy_phys(cur_phys_src, cur_phys_dst, count);
315
316 nbytes += count;
317 virtsrc += count;
318 virtdst += count;
319 size -= count;
320 }
321
322 return nbytes;
323 }
324
325 /*
326 * Routine: ml_validate_nofault
327 * Function: Validate that ths address range has a valid translations
328 * in the kernel pmap. If translations are present, they are
329 * assumed to be wired; i.e. no attempt is made to guarantee
330 * that the translation persist after the check.
331 * Returns: TRUE if the range is mapped and will not cause a fault,
332 * FALSE otherwise.
333 */
334
335 boolean_t
ml_validate_nofault(vm_offset_t virtsrc,vm_size_t size)336 ml_validate_nofault(
337 vm_offset_t virtsrc, vm_size_t size)
338 {
339 addr64_t cur_phys_src;
340 uint32_t count;
341
342 while (size > 0) {
343 if (!(cur_phys_src = kvtophys(virtsrc))) {
344 return FALSE;
345 }
346 if (!pmap_valid_page(i386_btop(cur_phys_src))) {
347 return FALSE;
348 }
349 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
350 if (count > size) {
351 count = (uint32_t)size;
352 }
353
354 virtsrc += count;
355 size -= count;
356 }
357
358 return TRUE;
359 }
360
361 /* Interrupt handling */
362
363 /* Initialize Interrupts */
364 void
ml_init_interrupt(void)365 ml_init_interrupt(void)
366 {
367 (void) ml_set_interrupts_enabled(TRUE);
368 }
369
370
371 /* Get Interrupts Enabled */
372 boolean_t
ml_get_interrupts_enabled(void)373 ml_get_interrupts_enabled(void)
374 {
375 unsigned long flags;
376
377 __asm__ volatile ("pushf; pop %0": "=r" (flags));
378 return (flags & EFL_IF) != 0;
379 }
380
381 /* Set Interrupts Enabled */
382 boolean_t
ml_set_interrupts_enabled(boolean_t enable)383 ml_set_interrupts_enabled(boolean_t enable)
384 {
385 unsigned long flags;
386 boolean_t istate;
387
388 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
389
390 assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
391
392 istate = ((flags & EFL_IF) != 0);
393
394 if (enable) {
395 __asm__ volatile ("sti;nop");
396
397 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
398 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
399 }
400 } else {
401 if (istate) {
402 __asm__ volatile ("cli");
403 }
404 }
405
406 return istate;
407 }
408
409 /* Early Set Interrupts Enabled */
410 boolean_t
ml_early_set_interrupts_enabled(boolean_t enable)411 ml_early_set_interrupts_enabled(boolean_t enable)
412 {
413 if (enable == TRUE) {
414 kprintf("Caller attempted to enable interrupts too early in "
415 "kernel startup. Halting.\n");
416 hlt();
417 /*NOTREACHED*/
418 }
419
420 /* On x86, do not allow interrupts to be enabled very early */
421 return FALSE;
422 }
423
424 /* Check if running at interrupt context */
425 boolean_t
ml_at_interrupt_context(void)426 ml_at_interrupt_context(void)
427 {
428 return get_interrupt_level() != 0;
429 }
430
431 void
ml_get_power_state(boolean_t * icp,boolean_t * pidlep)432 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
433 {
434 *icp = (get_interrupt_level() != 0);
435 /* These will be technically inaccurate for interrupts that occur
436 * successively within a single "idle exit" event, but shouldn't
437 * matter statistically.
438 */
439 *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
440 }
441
442 /* Generate a fake interrupt */
443 __dead2
444 void
ml_cause_interrupt(void)445 ml_cause_interrupt(void)
446 {
447 panic("ml_cause_interrupt not defined yet on Intel");
448 }
449
450 /*
451 * TODO: transition users of this to kernel_thread_start_priority
452 * ml_thread_policy is an unsupported KPI
453 */
454 void
ml_thread_policy(thread_t thread,__unused unsigned policy_id,unsigned policy_info)455 ml_thread_policy(
456 thread_t thread,
457 __unused unsigned policy_id,
458 unsigned policy_info)
459 {
460 if (policy_info & MACHINE_NETWORK_WORKLOOP) {
461 thread_precedence_policy_data_t info;
462 __assert_only kern_return_t kret;
463
464 info.importance = 1;
465
466 kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
467 (thread_policy_t)&info,
468 THREAD_PRECEDENCE_POLICY_COUNT);
469 assert(kret == KERN_SUCCESS);
470 }
471 }
472
473 /* Initialize Interrupts */
474 void
ml_install_interrupt_handler(void * nub,int source,void * target,IOInterruptHandler handler,void * refCon)475 ml_install_interrupt_handler(
476 void *nub,
477 int source,
478 void *target,
479 IOInterruptHandler handler,
480 void *refCon)
481 {
482 boolean_t current_state;
483
484 current_state = ml_set_interrupts_enabled(FALSE);
485
486 PE_install_interrupt_handler(nub, source, target,
487 (IOInterruptHandler) handler, refCon);
488
489 (void) ml_set_interrupts_enabled(current_state);
490 }
491
492
493 void
machine_signal_idle(processor_t processor)494 machine_signal_idle(
495 processor_t processor)
496 {
497 cpu_interrupt(processor->cpu_id);
498 }
499
500 __dead2
501 void
machine_signal_idle_deferred(__unused processor_t processor)502 machine_signal_idle_deferred(
503 __unused processor_t processor)
504 {
505 panic("Unimplemented");
506 }
507
508 __dead2
509 void
machine_signal_idle_cancel(__unused processor_t processor)510 machine_signal_idle_cancel(
511 __unused processor_t processor)
512 {
513 panic("Unimplemented");
514 }
515
516 static kern_return_t
register_cpu(uint32_t lapic_id,processor_t * processor_out,boolean_t boot_cpu)517 register_cpu(
518 uint32_t lapic_id,
519 processor_t *processor_out,
520 boolean_t boot_cpu )
521 {
522 int target_cpu;
523 cpu_data_t *this_cpu_datap;
524
525 this_cpu_datap = cpu_data_alloc(boot_cpu);
526 if (this_cpu_datap == NULL) {
527 return KERN_FAILURE;
528 }
529 target_cpu = this_cpu_datap->cpu_number;
530 assert((boot_cpu && (target_cpu == 0)) ||
531 (!boot_cpu && (target_cpu != 0)));
532
533 lapic_cpu_map(lapic_id, target_cpu);
534
535 /* The cpu_id is not known at registration phase. Just do
536 * lapic_id for now
537 */
538 this_cpu_datap->cpu_phys_number = lapic_id;
539
540 #if KPC
541 if (kpc_register_cpu(this_cpu_datap) != TRUE) {
542 goto failed;
543 }
544 #endif
545
546 if (!boot_cpu) {
547 cpu_thread_alloc(this_cpu_datap->cpu_number);
548 if (this_cpu_datap->lcpu.core == NULL) {
549 goto failed;
550 }
551 }
552
553 /*
554 * processor_init() deferred to topology start
555 * because "slot numbers" a.k.a. logical processor numbers
556 * are not yet finalized.
557 */
558 *processor_out = this_cpu_datap->cpu_processor;
559
560 return KERN_SUCCESS;
561
562 failed:
563 #if KPC
564 kpc_unregister_cpu(this_cpu_datap);
565 #endif /* KPC */
566
567 return KERN_FAILURE;
568 }
569
570
571 kern_return_t
ml_processor_register(cpu_id_t cpu_id,uint32_t lapic_id,processor_t * processor_out,boolean_t boot_cpu,boolean_t start)572 ml_processor_register(
573 cpu_id_t cpu_id,
574 uint32_t lapic_id,
575 processor_t *processor_out,
576 boolean_t boot_cpu,
577 boolean_t start )
578 {
579 static boolean_t done_topo_sort = FALSE;
580 static uint32_t num_registered = 0;
581
582 /* Register all CPUs first, and track max */
583 if (start == FALSE) {
584 num_registered++;
585
586 DBG( "registering CPU lapic id %d\n", lapic_id );
587
588 return register_cpu( lapic_id, processor_out, boot_cpu );
589 }
590
591 /* Sort by topology before we start anything */
592 if (!done_topo_sort) {
593 DBG( "about to start CPUs. %d registered\n", num_registered );
594
595 cpu_topology_sort( num_registered );
596 done_topo_sort = TRUE;
597 }
598
599 /* Assign the cpu ID */
600 uint32_t cpunum = -1;
601 cpu_data_t *this_cpu_datap = NULL;
602
603 /* find cpu num and pointer */
604 cpunum = ml_get_cpuid( lapic_id );
605
606 if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
607 panic( "trying to start invalid/unregistered CPU %d", lapic_id );
608 }
609
610 this_cpu_datap = cpu_datap(cpunum);
611
612 /* fix the CPU id */
613 this_cpu_datap->cpu_id = cpu_id;
614
615 /* allocate and initialize other per-cpu structures */
616 if (!boot_cpu) {
617 mp_cpus_call_cpu_init(cpunum);
618 random_cpu_init(cpunum);
619 }
620
621 /* output arg */
622 *processor_out = this_cpu_datap->cpu_processor;
623
624 /* OK, try and start this CPU */
625 return cpu_topology_start_cpu( cpunum );
626 }
627
628
629 void
ml_cpu_get_info_type(ml_cpu_info_t * cpu_infop,cluster_type_t cluster_type __unused)630 ml_cpu_get_info_type(ml_cpu_info_t *cpu_infop, cluster_type_t cluster_type __unused)
631 {
632 boolean_t os_supports_sse;
633 i386_cpu_info_t *cpuid_infop;
634
635 if (cpu_infop == NULL) {
636 return;
637 }
638
639 /*
640 * Are we supporting MMX/SSE/SSE2/SSE3?
641 * As distinct from whether the cpu has these capabilities.
642 */
643 os_supports_sse = !!(get_cr4() & CR4_OSXMM);
644
645 if (ml_fpu_avx_enabled()) {
646 cpu_infop->vector_unit = 9;
647 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
648 cpu_infop->vector_unit = 8;
649 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
650 cpu_infop->vector_unit = 7;
651 } else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
652 cpu_infop->vector_unit = 6;
653 } else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
654 cpu_infop->vector_unit = 5;
655 } else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
656 cpu_infop->vector_unit = 4;
657 } else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
658 cpu_infop->vector_unit = 3;
659 } else if (cpuid_features() & CPUID_FEATURE_MMX) {
660 cpu_infop->vector_unit = 2;
661 } else {
662 cpu_infop->vector_unit = 0;
663 }
664
665 cpuid_infop = cpuid_info();
666
667 cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
668
669 cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
670 cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
671
672 if (cpuid_infop->cache_size[L2U] > 0) {
673 cpu_infop->l2_settings = 1;
674 cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
675 } else {
676 cpu_infop->l2_settings = 0;
677 cpu_infop->l2_cache_size = 0xFFFFFFFF;
678 }
679
680 if (cpuid_infop->cache_size[L3U] > 0) {
681 cpu_infop->l3_settings = 1;
682 cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
683 } else {
684 cpu_infop->l3_settings = 0;
685 cpu_infop->l3_cache_size = 0xFFFFFFFF;
686 }
687 }
688
689 /*
690 * Routine: ml_cpu_get_info
691 * Function: Fill out the ml_cpu_info_t structure with parameters associated
692 * with the boot cluster.
693 */
694 void
ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)695 ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
696 {
697 ml_cpu_get_info_type(ml_cpu_info, CLUSTER_TYPE_SMP);
698 }
699
700 unsigned int
ml_get_cpu_number_type(cluster_type_t cluster_type __unused,bool logical,bool available)701 ml_get_cpu_number_type(cluster_type_t cluster_type __unused, bool logical, bool available)
702 {
703 /*
704 * At present no supported x86 system features more than 1 CPU type. Because
705 * of this, the cluster_type parameter is ignored.
706 */
707 if (logical && available) {
708 return machine_info.logical_cpu;
709 } else if (logical && !available) {
710 return machine_info.logical_cpu_max;
711 } else if (!logical && available) {
712 return machine_info.physical_cpu;
713 } else {
714 return machine_info.physical_cpu_max;
715 }
716 }
717
718 void
ml_get_cluster_type_name(cluster_type_t cluster_type __unused,char * name,size_t name_size)719 ml_get_cluster_type_name(cluster_type_t cluster_type __unused, char *name, size_t name_size)
720 {
721 strlcpy(name, "Standard", name_size);
722 }
723
724 unsigned int
ml_get_cluster_number_type(cluster_type_t cluster_type __unused)725 ml_get_cluster_number_type(cluster_type_t cluster_type __unused)
726 {
727 /*
728 * At present no supported x86 system has more than 1 CPU type and multiple
729 * clusters.
730 */
731 return 1;
732 }
733
734 unsigned int
ml_get_cpu_types(void)735 ml_get_cpu_types(void)
736 {
737 return 1 << CLUSTER_TYPE_SMP;
738 }
739
740 int
ml_early_cpu_max_number(void)741 ml_early_cpu_max_number(void)
742 {
743 int n = max_ncpus;
744
745 assert(startup_phase >= STARTUP_SUB_TUNABLES);
746 if (max_cpus_from_firmware) {
747 n = MIN(n, max_cpus_from_firmware);
748 }
749 return n - 1;
750 }
751
752 void
ml_set_max_cpus(unsigned int max_cpus)753 ml_set_max_cpus(unsigned int max_cpus)
754 {
755 lck_mtx_lock(&max_cpus_lock);
756 if (max_cpus_initialized != MAX_CPUS_SET) {
757 if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
758 /*
759 * Note: max_cpus is the number of enabled processors
760 * that ACPI found; max_ncpus is the maximum number
761 * that the kernel supports or that the "cpus="
762 * boot-arg has set. Here we take int minimum.
763 */
764 machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
765 }
766 if (max_cpus_initialized == MAX_CPUS_WAIT) {
767 thread_wakeup((event_t) &max_cpus_initialized);
768 }
769 max_cpus_initialized = MAX_CPUS_SET;
770 }
771 lck_mtx_unlock(&max_cpus_lock);
772 }
773
774 unsigned int
ml_wait_max_cpus(void)775 ml_wait_max_cpus(void)
776 {
777 lck_mtx_lock(&max_cpus_lock);
778 while (max_cpus_initialized != MAX_CPUS_SET) {
779 max_cpus_initialized = MAX_CPUS_WAIT;
780 lck_mtx_sleep(&max_cpus_lock, LCK_SLEEP_DEFAULT, &max_cpus_initialized, THREAD_UNINT);
781 }
782 lck_mtx_unlock(&max_cpus_lock);
783 return machine_info.max_cpus;
784 }
785
786 void
ml_panic_trap_to_debugger(__unused const char * panic_format_str,__unused va_list * panic_args,__unused unsigned int reason,__unused void * ctx,__unused uint64_t panic_options_mask,__unused unsigned long panic_caller)787 ml_panic_trap_to_debugger(__unused const char *panic_format_str,
788 __unused va_list *panic_args,
789 __unused unsigned int reason,
790 __unused void *ctx,
791 __unused uint64_t panic_options_mask,
792 __unused unsigned long panic_caller)
793 {
794 return;
795 }
796
797 static uint64_t
virtual_timeout_inflate64(unsigned int vti,uint64_t timeout,uint64_t max_timeout)798 virtual_timeout_inflate64(unsigned int vti, uint64_t timeout, uint64_t max_timeout)
799 {
800 if (vti >= 64) {
801 return max_timeout;
802 }
803
804 if ((timeout << vti) >> vti != timeout) {
805 return max_timeout;
806 }
807
808 if ((timeout << vti) > max_timeout) {
809 return max_timeout;
810 }
811
812 return timeout << vti;
813 }
814
815 static uint32_t
virtual_timeout_inflate32(unsigned int vti,uint32_t timeout,uint32_t max_timeout)816 virtual_timeout_inflate32(unsigned int vti, uint32_t timeout, uint32_t max_timeout)
817 {
818 if (vti >= 32) {
819 return max_timeout;
820 }
821
822 if ((timeout << vti) >> vti != timeout) {
823 return max_timeout;
824 }
825
826 return timeout << vti;
827 }
828
829 /*
830 * Some timeouts are later adjusted or used in calculations setting
831 * other values. In order to avoid overflow, cap the max timeout as
832 * 2^47ns (~39 hours).
833 */
834 static const uint64_t max_timeout_ns = 1ULL << 47;
835
836 /*
837 * Inflate a timeout in absolutetime.
838 */
839 static uint64_t
virtual_timeout_inflate_abs(unsigned int vti,uint64_t timeout)840 virtual_timeout_inflate_abs(unsigned int vti, uint64_t timeout)
841 {
842 uint64_t max_timeout;
843 nanoseconds_to_absolutetime(max_timeout_ns, &max_timeout);
844 return virtual_timeout_inflate64(vti, timeout, max_timeout);
845 }
846
847 /*
848 * Inflate a value in TSC ticks.
849 */
850 static uint64_t
virtual_timeout_inflate_tsc(unsigned int vti,uint64_t timeout)851 virtual_timeout_inflate_tsc(unsigned int vti, uint64_t timeout)
852 {
853 const uint64_t max_timeout = tmrCvt(max_timeout_ns, tscFCvtn2t);
854 return virtual_timeout_inflate64(vti, timeout, max_timeout);
855 }
856
857 /*
858 * Inflate a timeout in microseconds.
859 */
860 static uint32_t
virtual_timeout_inflate_us(unsigned int vti,uint64_t timeout)861 virtual_timeout_inflate_us(unsigned int vti, uint64_t timeout)
862 {
863 const uint32_t max_timeout = ~0;
864 return virtual_timeout_inflate32(vti, timeout, max_timeout);
865 }
866
867 uint64_t
ml_get_timebase_entropy(void)868 ml_get_timebase_entropy(void)
869 {
870 return __builtin_ia32_rdtsc();
871 }
872
873 /*
874 * Routine: ml_init_lock_timeout
875 * Function:
876 */
877 static void __startup_func
ml_init_lock_timeout(void)878 ml_init_lock_timeout(void)
879 {
880 uint64_t abstime;
881 uint32_t mtxspin;
882 #if DEVELOPMENT || DEBUG
883 uint64_t default_timeout_ns = NSEC_PER_SEC >> 2;
884 #else
885 uint64_t default_timeout_ns = NSEC_PER_SEC >> 1;
886 #endif
887 uint32_t slto;
888 uint32_t prt;
889
890 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
891 default_timeout_ns = slto * NSEC_PER_USEC;
892 }
893
894 /*
895 * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
896 * and LockTimeOutUsec is in microseconds and it's 32-bits.
897 */
898 LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
899 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
900 LockTimeOut = abstime;
901 LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
902
903 /*
904 * TLBTimeOut dictates the TLB flush timeout period. It defaults to
905 * LockTimeOut but can be overriden separately. In particular, a
906 * zero value inhibits the timeout-panic and cuts a trace evnt instead
907 * - see pmap_flush_tlbs().
908 */
909 if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
910 default_timeout_ns = slto * NSEC_PER_USEC;
911 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
912 TLBTimeOut = (uint32_t) abstime;
913 } else {
914 TLBTimeOut = LockTimeOut;
915 }
916
917 #if DEVELOPMENT || DEBUG
918 report_phy_read_delay = LockTimeOut >> 1;
919 #endif
920 if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
921 default_timeout_ns = slto * NSEC_PER_USEC;
922 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
923 report_phy_read_delay = abstime;
924 }
925
926 if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
927 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
928 report_phy_write_delay = abstime;
929 }
930
931 if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
932 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
933 trace_phy_read_delay = abstime;
934 }
935
936 if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
937 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
938 trace_phy_write_delay = abstime;
939 }
940
941 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
942 if (mtxspin > USEC_PER_SEC >> 4) {
943 mtxspin = USEC_PER_SEC >> 4;
944 }
945 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
946 } else {
947 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
948 }
949 MutexSpin = (unsigned int)abstime;
950 low_MutexSpin = MutexSpin;
951 /*
952 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
953 * real_ncpus is not set at this time
954 */
955 high_MutexSpin = -1;
956
957 nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
958 if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
959 nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
960 }
961
962 virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
963 if (virtualized) {
964 unsigned int vti;
965
966 if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
967 vti = 6;
968 }
969
970 #define VIRTUAL_TIMEOUT_INFLATE_ABS(_timeout) \
971 MACRO_BEGIN \
972 _timeout = virtual_timeout_inflate_abs(vti, _timeout); \
973 MACRO_END
974
975 #define VIRTUAL_TIMEOUT_INFLATE_TSC(_timeout) \
976 MACRO_BEGIN \
977 _timeout = virtual_timeout_inflate_tsc(vti, _timeout); \
978 MACRO_END
979 #define VIRTUAL_TIMEOUT_INFLATE_US(_timeout) \
980 MACRO_BEGIN \
981 _timeout = virtual_timeout_inflate_us(vti, _timeout); \
982 MACRO_END
983 /*
984 * These timeout values are inflated because they cause
985 * the kernel to panic when they expire.
986 * (Needed when running as a guest VM as the host OS
987 * may not always schedule vcpu threads in time to
988 * meet the deadline implied by the narrower time
989 * window used on hardware.)
990 */
991 VIRTUAL_TIMEOUT_INFLATE_US(LockTimeOutUsec);
992 VIRTUAL_TIMEOUT_INFLATE_ABS(LockTimeOut);
993 VIRTUAL_TIMEOUT_INFLATE_TSC(LockTimeOutTSC);
994 VIRTUAL_TIMEOUT_INFLATE_ABS(TLBTimeOut);
995 VIRTUAL_TIMEOUT_INFLATE_ABS(report_phy_read_delay);
996 VIRTUAL_TIMEOUT_INFLATE_TSC(lock_panic_timeout);
997 }
998
999 interrupt_latency_tracker_setup();
1000 }
1001 STARTUP(TIMEOUTS, STARTUP_RANK_MIDDLE, ml_init_lock_timeout);
1002
1003 /*
1004 * Threshold above which we should attempt to block
1005 * instead of spinning for clock_delay_until().
1006 */
1007
1008 void
ml_init_delay_spin_threshold(int threshold_us)1009 ml_init_delay_spin_threshold(int threshold_us)
1010 {
1011 nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
1012 }
1013
1014 boolean_t
ml_delay_should_spin(uint64_t interval)1015 ml_delay_should_spin(uint64_t interval)
1016 {
1017 return (interval < delay_spin_threshold) ? TRUE : FALSE;
1018 }
1019
1020 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
1021
1022 void
ml_delay_on_yield(void)1023 ml_delay_on_yield(void)
1024 {
1025 #if DEVELOPMENT || DEBUG
1026 if (yield_delay_us) {
1027 delay(yield_delay_us);
1028 }
1029 #endif
1030 }
1031
1032 /*
1033 * This is called from the machine-independent layer
1034 * to perform machine-dependent info updates. Defer to cpu_thread_init().
1035 */
1036 void
ml_cpu_up(void)1037 ml_cpu_up(void)
1038 {
1039 return;
1040 }
1041
1042 void
ml_cpu_up_update_counts(__unused int cpu_id)1043 ml_cpu_up_update_counts(__unused int cpu_id)
1044 {
1045 return;
1046 }
1047
1048 /*
1049 * This is called from the machine-independent layer
1050 * to perform machine-dependent info updates.
1051 */
1052 void
ml_cpu_down(void)1053 ml_cpu_down(void)
1054 {
1055 i386_deactivate_cpu();
1056
1057 return;
1058 }
1059
1060 void
ml_cpu_down_update_counts(__unused int cpu_id)1061 ml_cpu_down_update_counts(__unused int cpu_id)
1062 {
1063 return;
1064 }
1065
1066 thread_t
current_thread(void)1067 current_thread(void)
1068 {
1069 return current_thread_fast();
1070 }
1071
1072
1073 boolean_t
ml_is64bit(void)1074 ml_is64bit(void)
1075 {
1076 return cpu_mode_is64bit();
1077 }
1078
1079
1080 boolean_t
ml_thread_is64bit(thread_t thread)1081 ml_thread_is64bit(thread_t thread)
1082 {
1083 return thread_is_64bit_addr(thread);
1084 }
1085
1086
1087 boolean_t
ml_state_is64bit(void * saved_state)1088 ml_state_is64bit(void *saved_state)
1089 {
1090 return is_saved_state64(saved_state);
1091 }
1092
1093 void
ml_cpu_set_ldt(int selector)1094 ml_cpu_set_ldt(int selector)
1095 {
1096 /*
1097 * Avoid loading the LDT
1098 * if we're setting the KERNEL LDT and it's already set.
1099 */
1100 if (selector == KERNEL_LDT &&
1101 current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
1102 return;
1103 }
1104
1105 lldt(selector);
1106 current_cpu_datap()->cpu_ldt = selector;
1107 }
1108
1109 void
ml_fp_setvalid(boolean_t value)1110 ml_fp_setvalid(boolean_t value)
1111 {
1112 fp_setvalid(value);
1113 }
1114
1115 uint64_t
ml_cpu_int_event_time(void)1116 ml_cpu_int_event_time(void)
1117 {
1118 return current_cpu_datap()->cpu_int_event_time;
1119 }
1120
1121 vm_offset_t
ml_stack_remaining(void)1122 ml_stack_remaining(void)
1123 {
1124 uintptr_t local = (uintptr_t) &local;
1125
1126 if (ml_at_interrupt_context() != 0) {
1127 return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
1128 } else {
1129 return local - current_thread()->kernel_stack;
1130 }
1131 }
1132
1133 #if KASAN
1134 vm_offset_t ml_stack_base(void);
1135 vm_size_t ml_stack_size(void);
1136
1137 vm_offset_t
ml_stack_base(void)1138 ml_stack_base(void)
1139 {
1140 if (ml_at_interrupt_context()) {
1141 return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
1142 } else {
1143 return current_thread()->kernel_stack;
1144 }
1145 }
1146
1147 vm_size_t
ml_stack_size(void)1148 ml_stack_size(void)
1149 {
1150 if (ml_at_interrupt_context()) {
1151 return INTSTACK_SIZE;
1152 } else {
1153 return kernel_stack_size;
1154 }
1155 }
1156 #endif
1157
1158 #if CONFIG_KCOV
1159 kcov_cpu_data_t *
current_kcov_data(void)1160 current_kcov_data(void)
1161 {
1162 return ¤t_cpu_datap()->cpu_kcov_data;
1163 }
1164
1165 kcov_cpu_data_t *
cpu_kcov_data(int cpuid)1166 cpu_kcov_data(int cpuid)
1167 {
1168 return &cpu_datap(cpuid)->cpu_kcov_data;
1169 }
1170 #endif /* CONFIG_KCOV */
1171
1172 void
kernel_preempt_check(void)1173 kernel_preempt_check(void)
1174 {
1175 boolean_t intr;
1176 unsigned long flags;
1177
1178 assert(get_preemption_level() == 0);
1179
1180 if (__improbable(*ast_pending() & AST_URGENT)) {
1181 /*
1182 * can handle interrupts and preemptions
1183 * at this point
1184 */
1185 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
1186
1187 intr = ((flags & EFL_IF) != 0);
1188
1189 /*
1190 * now cause the PRE-EMPTION trap
1191 */
1192 if (intr == TRUE) {
1193 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
1194 }
1195 }
1196 }
1197
1198 boolean_t
machine_timeout_suspended(void)1199 machine_timeout_suspended(void)
1200 {
1201 return pmap_tlb_flush_timeout || lck_spinlock_timeout_in_progress ||
1202 panic_active() || mp_recent_debugger_activity() ||
1203 ml_recent_wake();
1204 }
1205
1206 /* Eagerly evaluate all pending timer and thread callouts
1207 */
1208 void
ml_timer_evaluate(void)1209 ml_timer_evaluate(void)
1210 {
1211 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
1212
1213 uint64_t te_end, te_start = mach_absolute_time();
1214 simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
1215 ml_timer_evaluation_in_progress = TRUE;
1216 thread_call_delayed_timer_rescan_all();
1217 mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
1218 ml_timer_evaluation_in_progress = FALSE;
1219 ml_timer_eager_evaluations++;
1220 te_end = mach_absolute_time();
1221 ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
1222 simple_unlock(&ml_timer_evaluation_slock);
1223
1224 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
1225 }
1226
1227 boolean_t
ml_timer_forced_evaluation(void)1228 ml_timer_forced_evaluation(void)
1229 {
1230 return ml_timer_evaluation_in_progress;
1231 }
1232
1233 void
ml_gpu_stat_update(uint64_t gpu_ns_delta)1234 ml_gpu_stat_update(uint64_t gpu_ns_delta)
1235 {
1236 current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
1237 }
1238
1239 uint64_t
ml_gpu_stat(thread_t t)1240 ml_gpu_stat(thread_t t)
1241 {
1242 return t->machine.thread_gpu_ns;
1243 }
1244
1245 int plctrace_enabled = 0;
1246
1247 void
_disable_preemption(void)1248 _disable_preemption(void)
1249 {
1250 disable_preemption_internal();
1251 }
1252
1253 void
_enable_preemption(void)1254 _enable_preemption(void)
1255 {
1256 enable_preemption_internal();
1257 }
1258
1259 void
plctrace_disable(void)1260 plctrace_disable(void)
1261 {
1262 plctrace_enabled = 0;
1263 }
1264
1265 static boolean_t ml_quiescing;
1266
1267 void
ml_set_is_quiescing(boolean_t quiescing)1268 ml_set_is_quiescing(boolean_t quiescing)
1269 {
1270 ml_quiescing = quiescing;
1271 }
1272
1273 boolean_t
ml_is_quiescing(void)1274 ml_is_quiescing(void)
1275 {
1276 return ml_quiescing;
1277 }
1278
1279 uint64_t
ml_get_booter_memory_size(void)1280 ml_get_booter_memory_size(void)
1281 {
1282 return 0;
1283 }
1284
1285 void
machine_lockdown(void)1286 machine_lockdown(void)
1287 {
1288 x86_64_protect_data_const();
1289 }
1290
1291 bool
ml_cpu_can_exit(__unused int cpu_id,__unused processor_reason_t reason)1292 ml_cpu_can_exit(__unused int cpu_id, __unused processor_reason_t reason)
1293 {
1294 return true;
1295 }
1296
1297 void
ml_cpu_begin_state_transition(__unused int cpu_id)1298 ml_cpu_begin_state_transition(__unused int cpu_id)
1299 {
1300 }
1301
1302 void
ml_cpu_end_state_transition(__unused int cpu_id)1303 ml_cpu_end_state_transition(__unused int cpu_id)
1304 {
1305 }
1306
1307 void
ml_cpu_begin_loop(void)1308 ml_cpu_begin_loop(void)
1309 {
1310 }
1311
1312 void
ml_cpu_end_loop(void)1313 ml_cpu_end_loop(void)
1314 {
1315 }
1316
1317 size_t
ml_get_vm_reserved_regions(bool vm_is64bit,const struct vm_reserved_region ** regions)1318 ml_get_vm_reserved_regions(bool vm_is64bit, const struct vm_reserved_region **regions)
1319 {
1320 #pragma unused(vm_is64bit)
1321 assert(regions != NULL);
1322
1323 *regions = NULL;
1324 return 0;
1325 }
1326
1327 void
ml_cpu_power_enable(__unused int cpu_id)1328 ml_cpu_power_enable(__unused int cpu_id)
1329 {
1330 }
1331
1332 void
ml_cpu_power_disable(__unused int cpu_id)1333 ml_cpu_power_disable(__unused int cpu_id)
1334 {
1335 }
1336
1337 int
ml_page_protection_type(void)1338 ml_page_protection_type(void)
1339 {
1340 return 0; // not supported on x86
1341 }
1342
1343 bool
ml_addr_in_non_xnu_stack(__unused uintptr_t addr)1344 ml_addr_in_non_xnu_stack(__unused uintptr_t addr)
1345 {
1346 /* There are no non-XNU stacks on x86 systems. */
1347 return false;
1348 }
1349