1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <i386/machine_routines.h>
30 #include <i386/io_map_entries.h>
31 #include <i386/cpuid.h>
32 #include <i386/fpu.h>
33 #include <mach/processor.h>
34 #include <kern/processor.h>
35 #include <kern/machine.h>
36
37 #include <kern/cpu_number.h>
38 #include <kern/thread.h>
39 #include <kern/thread_call.h>
40 #include <kern/policy_internal.h>
41
42 #include <prng/random.h>
43 #include <prng/entropy.h>
44 #include <i386/machine_cpu.h>
45 #include <i386/lapic.h>
46 #include <i386/bit_routines.h>
47 #include <i386/mp_events.h>
48 #include <i386/pmCPU.h>
49 #include <i386/trap.h>
50 #include <i386/tsc.h>
51 #include <i386/cpu_threads.h>
52 #include <i386/proc_reg.h>
53 #include <mach/vm_param.h>
54 #include <i386/pmap.h>
55 #include <i386/pmap_internal.h>
56 #include <i386/misc_protos.h>
57 #include <kern/timer_queue.h>
58 #include <vm/vm_map.h>
59 #if KPC
60 #include <kern/kpc.h>
61 #endif
62 #include <architecture/i386/pio.h>
63 #include <i386/cpu_data.h>
64 #if DEBUG
65 #define DBG(x...) kprintf("DBG: " x)
66 #else
67 #define DBG(x...)
68 #endif
69
70 #if MONOTONIC
71 #include <kern/monotonic.h>
72 #endif /* MONOTONIC */
73
74 extern void wakeup(void *);
75
76 uint64_t LockTimeOut;
77 uint64_t TLBTimeOut;
78 uint64_t LockTimeOutTSC;
79 uint32_t LockTimeOutUsec;
80 uint64_t MutexSpin;
81 uint64_t low_MutexSpin;
82 int64_t high_MutexSpin;
83 uint64_t LastDebuggerEntryAllowance;
84 uint64_t delay_spin_threshold;
85
86 extern uint64_t panic_restart_timeout;
87
88 boolean_t virtualized = FALSE;
89
90 decl_simple_lock_data(static, ml_timer_evaluation_slock);
91 uint32_t ml_timer_eager_evaluations;
92 uint64_t ml_timer_eager_evaluation_max;
93 static boolean_t ml_timer_evaluation_in_progress = FALSE;
94
95 LCK_GRP_DECLARE(max_cpus_grp, "max_cpus");
96 LCK_MTX_DECLARE(max_cpus_lock, &max_cpus_grp);
97 static int max_cpus_initialized = 0;
98 #define MAX_CPUS_SET 0x1
99 #define MAX_CPUS_WAIT 0x2
100
101 /* IO memory map services */
102
103 /* Map memory map IO space */
104 vm_offset_t
ml_io_map(vm_offset_t phys_addr,vm_size_t size)105 ml_io_map(
106 vm_offset_t phys_addr,
107 vm_size_t size)
108 {
109 return io_map(phys_addr, size, VM_WIMG_IO);
110 }
111
112 /* boot memory allocation */
113 vm_offset_t
ml_static_malloc(__unused vm_size_t size)114 ml_static_malloc(
115 __unused vm_size_t size)
116 {
117 return (vm_offset_t)NULL;
118 }
119
120
121 void
ml_get_bouncepool_info(vm_offset_t * phys_addr,vm_size_t * size)122 ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
123 {
124 *phys_addr = 0;
125 *size = 0;
126 }
127
128
129 vm_offset_t
ml_static_ptovirt(vm_offset_t paddr)130 ml_static_ptovirt(
131 vm_offset_t paddr)
132 {
133 #if defined(__x86_64__)
134 return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
135 #else
136 return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
137 #endif
138 }
139
140 vm_offset_t
ml_static_slide(vm_offset_t vaddr)141 ml_static_slide(
142 vm_offset_t vaddr)
143 {
144 return VM_KERNEL_SLIDE(vaddr);
145 }
146
147 /*
148 * base must be page-aligned, and size must be a multiple of PAGE_SIZE
149 */
150 kern_return_t
ml_static_verify_page_protections(uint64_t base,uint64_t size,vm_prot_t prot)151 ml_static_verify_page_protections(
152 uint64_t base, uint64_t size, vm_prot_t prot)
153 {
154 vm_prot_t pageprot;
155 uint64_t offset;
156
157 DBG("ml_static_verify_page_protections: vaddr 0x%llx sz 0x%llx prot 0x%x\n", base, size, prot);
158
159 /*
160 * base must be within the static bounds, defined to be:
161 * (vm_kernel_stext, kc_highest_nonlinkedit_vmaddr)
162 */
163 #if DEVELOPMENT || DEBUG || KASAN
164 assert(kc_highest_nonlinkedit_vmaddr > 0 && base > vm_kernel_stext && base < kc_highest_nonlinkedit_vmaddr);
165 #else /* On release kernels, assume this is a protection mismatch failure. */
166 if (kc_highest_nonlinkedit_vmaddr == 0 || base < vm_kernel_stext || base >= kc_highest_nonlinkedit_vmaddr) {
167 return KERN_FAILURE;
168 }
169 #endif
170
171 for (offset = 0; offset < size; offset += PAGE_SIZE) {
172 if (pmap_get_prot(kernel_pmap, base + offset, &pageprot) == KERN_FAILURE) {
173 return KERN_FAILURE;
174 }
175 if ((pageprot & prot) != prot) {
176 return KERN_FAILURE;
177 }
178 }
179
180 return KERN_SUCCESS;
181 }
182
183 vm_offset_t
ml_static_unslide(vm_offset_t vaddr)184 ml_static_unslide(
185 vm_offset_t vaddr)
186 {
187 return VM_KERNEL_UNSLIDE(vaddr);
188 }
189
190 /*
191 * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
192 * by the kernel.
193 */
194 void
ml_static_mfree(vm_offset_t vaddr,vm_size_t size)195 ml_static_mfree(
196 vm_offset_t vaddr,
197 vm_size_t size)
198 {
199 addr64_t vaddr_cur;
200 ppnum_t ppn;
201 uint32_t freed_pages = 0;
202 vm_size_t map_size;
203
204 assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
205
206 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
207
208 for (vaddr_cur = vaddr; vaddr_cur < round_page_64(vaddr + size);) {
209 map_size = pmap_query_pagesize(kernel_pmap, vaddr_cur);
210
211 /* just skip if nothing mapped here */
212 if (map_size == 0) {
213 vaddr_cur += PAGE_SIZE;
214 continue;
215 }
216
217 /*
218 * Can't free from the middle of a large page.
219 */
220 assert((vaddr_cur & (map_size - 1)) == 0);
221
222 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
223 assert(ppn != (ppnum_t)NULL);
224
225 pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + map_size);
226 while (map_size > 0) {
227 assert(pmap_valid_page(ppn));
228 if (IS_MANAGED_PAGE(ppn)) {
229 vm_page_create(ppn, (ppn + 1));
230 freed_pages++;
231 }
232 map_size -= PAGE_SIZE;
233 vaddr_cur += PAGE_SIZE;
234 ppn++;
235 }
236 }
237 vm_page_lockspin_queues();
238 vm_page_wire_count -= freed_pages;
239 vm_page_wire_count_initial -= freed_pages;
240 if (vm_page_wire_count_on_boot != 0) {
241 assert(vm_page_wire_count_on_boot >= freed_pages);
242 vm_page_wire_count_on_boot -= freed_pages;
243 }
244 vm_page_unlock_queues();
245
246 #if DEBUG
247 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
248 #endif
249 }
250
251 /* Change page protections for addresses previously loaded by efiboot */
252 kern_return_t
ml_static_protect(vm_offset_t vmaddr,vm_size_t size,vm_prot_t prot)253 ml_static_protect(vm_offset_t vmaddr, vm_size_t size, vm_prot_t prot)
254 {
255 boolean_t NX = !!!(prot & VM_PROT_EXECUTE), ro = !!!(prot & VM_PROT_WRITE);
256
257 assert(prot & VM_PROT_READ);
258
259 pmap_mark_range(kernel_pmap, vmaddr, size, NX, ro);
260
261 return KERN_SUCCESS;
262 }
263
264 /* virtual to physical on wired pages */
265 vm_offset_t
ml_vtophys(vm_offset_t vaddr)266 ml_vtophys(
267 vm_offset_t vaddr)
268 {
269 return (vm_offset_t)kvtophys(vaddr);
270 }
271
272 /*
273 * Routine: ml_nofault_copy
274 * Function: Perform a physical mode copy if the source and
275 * destination have valid translations in the kernel pmap.
276 * If translations are present, they are assumed to
277 * be wired; i.e. no attempt is made to guarantee that the
278 * translations obtained remained valid for
279 * the duration of the copy process.
280 */
281
282 vm_size_t
ml_nofault_copy(vm_offset_t virtsrc,vm_offset_t virtdst,vm_size_t size)283 ml_nofault_copy(
284 vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
285 {
286 addr64_t cur_phys_dst, cur_phys_src;
287 uint32_t count, nbytes = 0;
288
289 while (size > 0) {
290 if (!(cur_phys_src = kvtophys(virtsrc))) {
291 break;
292 }
293 if (!(cur_phys_dst = kvtophys(virtdst))) {
294 break;
295 }
296 if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
297 break;
298 }
299 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
300 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
301 count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
302 }
303 if (count > size) {
304 count = (uint32_t)size;
305 }
306
307 bcopy_phys(cur_phys_src, cur_phys_dst, count);
308
309 nbytes += count;
310 virtsrc += count;
311 virtdst += count;
312 size -= count;
313 }
314
315 return nbytes;
316 }
317
318 /*
319 * Routine: ml_validate_nofault
320 * Function: Validate that ths address range has a valid translations
321 * in the kernel pmap. If translations are present, they are
322 * assumed to be wired; i.e. no attempt is made to guarantee
323 * that the translation persist after the check.
324 * Returns: TRUE if the range is mapped and will not cause a fault,
325 * FALSE otherwise.
326 */
327
328 boolean_t
ml_validate_nofault(vm_offset_t virtsrc,vm_size_t size)329 ml_validate_nofault(
330 vm_offset_t virtsrc, vm_size_t size)
331 {
332 addr64_t cur_phys_src;
333 uint32_t count;
334
335 while (size > 0) {
336 if (!(cur_phys_src = kvtophys(virtsrc))) {
337 return FALSE;
338 }
339 if (!pmap_valid_page(i386_btop(cur_phys_src))) {
340 return FALSE;
341 }
342 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
343 if (count > size) {
344 count = (uint32_t)size;
345 }
346
347 virtsrc += count;
348 size -= count;
349 }
350
351 return TRUE;
352 }
353
354 /* Interrupt handling */
355
356 /* Initialize Interrupts */
357 void
ml_init_interrupt(void)358 ml_init_interrupt(void)
359 {
360 (void) ml_set_interrupts_enabled(TRUE);
361 }
362
363
364 /* Get Interrupts Enabled */
365 boolean_t
ml_get_interrupts_enabled(void)366 ml_get_interrupts_enabled(void)
367 {
368 unsigned long flags;
369
370 __asm__ volatile ("pushf; pop %0": "=r" (flags));
371 return (flags & EFL_IF) != 0;
372 }
373
374 /* Set Interrupts Enabled */
375 boolean_t
ml_set_interrupts_enabled(boolean_t enable)376 ml_set_interrupts_enabled(boolean_t enable)
377 {
378 unsigned long flags;
379 boolean_t istate;
380
381 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
382
383 assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
384
385 istate = ((flags & EFL_IF) != 0);
386
387 if (enable) {
388 __asm__ volatile ("sti;nop");
389
390 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
391 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
392 }
393 } else {
394 if (istate) {
395 __asm__ volatile ("cli");
396 }
397 }
398
399 return istate;
400 }
401
402 /* Early Set Interrupts Enabled */
403 boolean_t
ml_early_set_interrupts_enabled(boolean_t enable)404 ml_early_set_interrupts_enabled(boolean_t enable)
405 {
406 if (enable == TRUE) {
407 kprintf("Caller attempted to enable interrupts too early in "
408 "kernel startup. Halting.\n");
409 hlt();
410 /*NOTREACHED*/
411 }
412
413 /* On x86, do not allow interrupts to be enabled very early */
414 return FALSE;
415 }
416
417 /* Check if running at interrupt context */
418 boolean_t
ml_at_interrupt_context(void)419 ml_at_interrupt_context(void)
420 {
421 return get_interrupt_level() != 0;
422 }
423
424 void
ml_get_power_state(boolean_t * icp,boolean_t * pidlep)425 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
426 {
427 *icp = (get_interrupt_level() != 0);
428 /* These will be technically inaccurate for interrupts that occur
429 * successively within a single "idle exit" event, but shouldn't
430 * matter statistically.
431 */
432 *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
433 }
434
435 /* Generate a fake interrupt */
436 __dead2
437 void
ml_cause_interrupt(void)438 ml_cause_interrupt(void)
439 {
440 panic("ml_cause_interrupt not defined yet on Intel");
441 }
442
443 /*
444 * TODO: transition users of this to kernel_thread_start_priority
445 * ml_thread_policy is an unsupported KPI
446 */
447 void
ml_thread_policy(thread_t thread,__unused unsigned policy_id,unsigned policy_info)448 ml_thread_policy(
449 thread_t thread,
450 __unused unsigned policy_id,
451 unsigned policy_info)
452 {
453 if (policy_info & MACHINE_NETWORK_WORKLOOP) {
454 thread_precedence_policy_data_t info;
455 __assert_only kern_return_t kret;
456
457 info.importance = 1;
458
459 kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
460 (thread_policy_t)&info,
461 THREAD_PRECEDENCE_POLICY_COUNT);
462 assert(kret == KERN_SUCCESS);
463 }
464 }
465
466 /* Initialize Interrupts */
467 void
ml_install_interrupt_handler(void * nub,int source,void * target,IOInterruptHandler handler,void * refCon)468 ml_install_interrupt_handler(
469 void *nub,
470 int source,
471 void *target,
472 IOInterruptHandler handler,
473 void *refCon)
474 {
475 boolean_t current_state;
476
477 current_state = ml_set_interrupts_enabled(FALSE);
478
479 PE_install_interrupt_handler(nub, source, target,
480 (IOInterruptHandler) handler, refCon);
481
482 (void) ml_set_interrupts_enabled(current_state);
483 }
484
485
486 void
machine_signal_idle(processor_t processor)487 machine_signal_idle(
488 processor_t processor)
489 {
490 cpu_interrupt(processor->cpu_id);
491 }
492
493 __dead2
494 void
machine_signal_idle_deferred(__unused processor_t processor)495 machine_signal_idle_deferred(
496 __unused processor_t processor)
497 {
498 panic("Unimplemented");
499 }
500
501 __dead2
502 void
machine_signal_idle_cancel(__unused processor_t processor)503 machine_signal_idle_cancel(
504 __unused processor_t processor)
505 {
506 panic("Unimplemented");
507 }
508
509 static kern_return_t
register_cpu(uint32_t lapic_id,processor_t * processor_out,boolean_t boot_cpu)510 register_cpu(
511 uint32_t lapic_id,
512 processor_t *processor_out,
513 boolean_t boot_cpu )
514 {
515 int target_cpu;
516 cpu_data_t *this_cpu_datap;
517
518 this_cpu_datap = cpu_data_alloc(boot_cpu);
519 if (this_cpu_datap == NULL) {
520 return KERN_FAILURE;
521 }
522 target_cpu = this_cpu_datap->cpu_number;
523 assert((boot_cpu && (target_cpu == 0)) ||
524 (!boot_cpu && (target_cpu != 0)));
525
526 lapic_cpu_map(lapic_id, target_cpu);
527
528 /* The cpu_id is not known at registration phase. Just do
529 * lapic_id for now
530 */
531 this_cpu_datap->cpu_phys_number = lapic_id;
532
533 #if KPC
534 if (kpc_register_cpu(this_cpu_datap) != TRUE) {
535 goto failed;
536 }
537 #endif
538
539 if (!boot_cpu) {
540 cpu_thread_alloc(this_cpu_datap->cpu_number);
541 if (this_cpu_datap->lcpu.core == NULL) {
542 goto failed;
543 }
544 }
545
546 /*
547 * processor_init() deferred to topology start
548 * because "slot numbers" a.k.a. logical processor numbers
549 * are not yet finalized.
550 */
551 *processor_out = this_cpu_datap->cpu_processor;
552
553 return KERN_SUCCESS;
554
555 failed:
556 #if KPC
557 kpc_unregister_cpu(this_cpu_datap);
558 #endif /* KPC */
559
560 return KERN_FAILURE;
561 }
562
563
564 kern_return_t
ml_processor_register(cpu_id_t cpu_id,uint32_t lapic_id,processor_t * processor_out,boolean_t boot_cpu,boolean_t start)565 ml_processor_register(
566 cpu_id_t cpu_id,
567 uint32_t lapic_id,
568 processor_t *processor_out,
569 boolean_t boot_cpu,
570 boolean_t start )
571 {
572 static boolean_t done_topo_sort = FALSE;
573 static uint32_t num_registered = 0;
574
575 /* Register all CPUs first, and track max */
576 if (start == FALSE) {
577 num_registered++;
578
579 DBG( "registering CPU lapic id %d\n", lapic_id );
580
581 return register_cpu( lapic_id, processor_out, boot_cpu );
582 }
583
584 /* Sort by topology before we start anything */
585 if (!done_topo_sort) {
586 DBG( "about to start CPUs. %d registered\n", num_registered );
587
588 cpu_topology_sort( num_registered );
589 done_topo_sort = TRUE;
590 }
591
592 /* Assign the cpu ID */
593 uint32_t cpunum = -1;
594 cpu_data_t *this_cpu_datap = NULL;
595
596 /* find cpu num and pointer */
597 cpunum = ml_get_cpuid( lapic_id );
598
599 if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
600 panic( "trying to start invalid/unregistered CPU %d", lapic_id );
601 }
602
603 this_cpu_datap = cpu_datap(cpunum);
604
605 /* fix the CPU id */
606 this_cpu_datap->cpu_id = cpu_id;
607
608 /* allocate and initialize other per-cpu structures */
609 if (!boot_cpu) {
610 mp_cpus_call_cpu_init(cpunum);
611 random_cpu_init(cpunum);
612 }
613
614 /* output arg */
615 *processor_out = this_cpu_datap->cpu_processor;
616
617 /* OK, try and start this CPU */
618 return cpu_topology_start_cpu( cpunum );
619 }
620
621
622 void
ml_cpu_get_info_type(ml_cpu_info_t * cpu_infop,cluster_type_t cluster_type __unused)623 ml_cpu_get_info_type(ml_cpu_info_t *cpu_infop, cluster_type_t cluster_type __unused)
624 {
625 boolean_t os_supports_sse;
626 i386_cpu_info_t *cpuid_infop;
627
628 if (cpu_infop == NULL) {
629 return;
630 }
631
632 /*
633 * Are we supporting MMX/SSE/SSE2/SSE3?
634 * As distinct from whether the cpu has these capabilities.
635 */
636 os_supports_sse = !!(get_cr4() & CR4_OSXMM);
637
638 if (ml_fpu_avx_enabled()) {
639 cpu_infop->vector_unit = 9;
640 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
641 cpu_infop->vector_unit = 8;
642 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
643 cpu_infop->vector_unit = 7;
644 } else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
645 cpu_infop->vector_unit = 6;
646 } else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
647 cpu_infop->vector_unit = 5;
648 } else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
649 cpu_infop->vector_unit = 4;
650 } else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
651 cpu_infop->vector_unit = 3;
652 } else if (cpuid_features() & CPUID_FEATURE_MMX) {
653 cpu_infop->vector_unit = 2;
654 } else {
655 cpu_infop->vector_unit = 0;
656 }
657
658 cpuid_infop = cpuid_info();
659
660 cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
661
662 cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
663 cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
664
665 if (cpuid_infop->cache_size[L2U] > 0) {
666 cpu_infop->l2_settings = 1;
667 cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
668 } else {
669 cpu_infop->l2_settings = 0;
670 cpu_infop->l2_cache_size = 0xFFFFFFFF;
671 }
672
673 if (cpuid_infop->cache_size[L3U] > 0) {
674 cpu_infop->l3_settings = 1;
675 cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
676 } else {
677 cpu_infop->l3_settings = 0;
678 cpu_infop->l3_cache_size = 0xFFFFFFFF;
679 }
680 }
681
682 /*
683 * Routine: ml_cpu_get_info
684 * Function: Fill out the ml_cpu_info_t structure with parameters associated
685 * with the boot cluster.
686 */
687 void
ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)688 ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
689 {
690 ml_cpu_get_info_type(ml_cpu_info, CLUSTER_TYPE_SMP);
691 }
692
693 unsigned int
ml_get_cpu_number_type(cluster_type_t cluster_type __unused,bool logical,bool available)694 ml_get_cpu_number_type(cluster_type_t cluster_type __unused, bool logical, bool available)
695 {
696 /*
697 * At present no supported x86 system features more than 1 CPU type. Because
698 * of this, the cluster_type parameter is ignored.
699 */
700 if (logical && available) {
701 return machine_info.logical_cpu;
702 } else if (logical && !available) {
703 return machine_info.logical_cpu_max;
704 } else if (!logical && available) {
705 return machine_info.physical_cpu;
706 } else {
707 return machine_info.physical_cpu_max;
708 }
709 }
710
711 unsigned int
ml_get_cpu_types(void)712 ml_get_cpu_types(void)
713 {
714 return 1 << CLUSTER_TYPE_SMP;
715 }
716
717 int
ml_early_cpu_max_number(void)718 ml_early_cpu_max_number(void)
719 {
720 int n = max_ncpus;
721
722 assert(startup_phase >= STARTUP_SUB_TUNABLES);
723 if (max_cpus_from_firmware) {
724 n = MIN(n, max_cpus_from_firmware);
725 }
726 return n - 1;
727 }
728
729 void
ml_set_max_cpus(unsigned int max_cpus)730 ml_set_max_cpus(unsigned int max_cpus)
731 {
732 lck_mtx_lock(&max_cpus_lock);
733 if (max_cpus_initialized != MAX_CPUS_SET) {
734 if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
735 /*
736 * Note: max_cpus is the number of enabled processors
737 * that ACPI found; max_ncpus is the maximum number
738 * that the kernel supports or that the "cpus="
739 * boot-arg has set. Here we take int minimum.
740 */
741 machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
742 }
743 if (max_cpus_initialized == MAX_CPUS_WAIT) {
744 thread_wakeup((event_t) &max_cpus_initialized);
745 }
746 max_cpus_initialized = MAX_CPUS_SET;
747 }
748 lck_mtx_unlock(&max_cpus_lock);
749 }
750
751 unsigned int
ml_wait_max_cpus(void)752 ml_wait_max_cpus(void)
753 {
754 lck_mtx_lock(&max_cpus_lock);
755 while (max_cpus_initialized != MAX_CPUS_SET) {
756 max_cpus_initialized = MAX_CPUS_WAIT;
757 lck_mtx_sleep(&max_cpus_lock, LCK_SLEEP_DEFAULT, &max_cpus_initialized, THREAD_UNINT);
758 }
759 lck_mtx_unlock(&max_cpus_lock);
760 return machine_info.max_cpus;
761 }
762
763 void
ml_panic_trap_to_debugger(__unused const char * panic_format_str,__unused va_list * panic_args,__unused unsigned int reason,__unused void * ctx,__unused uint64_t panic_options_mask,__unused unsigned long panic_caller)764 ml_panic_trap_to_debugger(__unused const char *panic_format_str,
765 __unused va_list *panic_args,
766 __unused unsigned int reason,
767 __unused void *ctx,
768 __unused uint64_t panic_options_mask,
769 __unused unsigned long panic_caller)
770 {
771 return;
772 }
773
774 static uint64_t
virtual_timeout_inflate64(unsigned int vti,uint64_t timeout,uint64_t max_timeout)775 virtual_timeout_inflate64(unsigned int vti, uint64_t timeout, uint64_t max_timeout)
776 {
777 if (vti >= 64) {
778 return max_timeout;
779 }
780
781 if ((timeout << vti) >> vti != timeout) {
782 return max_timeout;
783 }
784
785 if ((timeout << vti) > max_timeout) {
786 return max_timeout;
787 }
788
789 return timeout << vti;
790 }
791
792 static uint32_t
virtual_timeout_inflate32(unsigned int vti,uint32_t timeout,uint32_t max_timeout)793 virtual_timeout_inflate32(unsigned int vti, uint32_t timeout, uint32_t max_timeout)
794 {
795 if (vti >= 32) {
796 return max_timeout;
797 }
798
799 if ((timeout << vti) >> vti != timeout) {
800 return max_timeout;
801 }
802
803 return timeout << vti;
804 }
805
806 /*
807 * Some timeouts are later adjusted or used in calculations setting
808 * other values. In order to avoid overflow, cap the max timeout as
809 * 2^47ns (~39 hours).
810 */
811 static const uint64_t max_timeout_ns = 1ULL << 47;
812
813 /*
814 * Inflate a timeout in absolutetime.
815 */
816 static uint64_t
virtual_timeout_inflate_abs(unsigned int vti,uint64_t timeout)817 virtual_timeout_inflate_abs(unsigned int vti, uint64_t timeout)
818 {
819 uint64_t max_timeout;
820 nanoseconds_to_absolutetime(max_timeout_ns, &max_timeout);
821 return virtual_timeout_inflate64(vti, timeout, max_timeout);
822 }
823
824 /*
825 * Inflate a value in TSC ticks.
826 */
827 static uint64_t
virtual_timeout_inflate_tsc(unsigned int vti,uint64_t timeout)828 virtual_timeout_inflate_tsc(unsigned int vti, uint64_t timeout)
829 {
830 const uint64_t max_timeout = tmrCvt(max_timeout_ns, tscFCvtn2t);
831 return virtual_timeout_inflate64(vti, timeout, max_timeout);
832 }
833
834 /*
835 * Inflate a timeout in microseconds.
836 */
837 static uint32_t
virtual_timeout_inflate_us(unsigned int vti,uint64_t timeout)838 virtual_timeout_inflate_us(unsigned int vti, uint64_t timeout)
839 {
840 const uint32_t max_timeout = ~0;
841 return virtual_timeout_inflate32(vti, timeout, max_timeout);
842 }
843
844 uint64_t
ml_get_timebase_entropy(void)845 ml_get_timebase_entropy(void)
846 {
847 return __builtin_ia32_rdtsc();
848 }
849
850 /*
851 * Routine: ml_init_lock_timeout
852 * Function:
853 */
854 void
ml_init_lock_timeout(void)855 ml_init_lock_timeout(void)
856 {
857 uint64_t abstime;
858 uint32_t mtxspin;
859 #if DEVELOPMENT || DEBUG
860 uint64_t default_timeout_ns = NSEC_PER_SEC >> 2;
861 #else
862 uint64_t default_timeout_ns = NSEC_PER_SEC >> 1;
863 #endif
864 uint32_t slto;
865 uint32_t prt;
866
867 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
868 default_timeout_ns = slto * NSEC_PER_USEC;
869 }
870
871 /*
872 * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
873 * and LockTimeOutUsec is in microseconds and it's 32-bits.
874 */
875 LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
876 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
877 LockTimeOut = abstime;
878 LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
879
880 /*
881 * TLBTimeOut dictates the TLB flush timeout period. It defaults to
882 * LockTimeOut but can be overriden separately. In particular, a
883 * zero value inhibits the timeout-panic and cuts a trace evnt instead
884 * - see pmap_flush_tlbs().
885 */
886 if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
887 default_timeout_ns = slto * NSEC_PER_USEC;
888 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
889 TLBTimeOut = (uint32_t) abstime;
890 } else {
891 TLBTimeOut = LockTimeOut;
892 }
893
894 #if DEVELOPMENT || DEBUG
895 report_phy_read_delay = LockTimeOut >> 1;
896 #endif
897 if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
898 default_timeout_ns = slto * NSEC_PER_USEC;
899 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
900 report_phy_read_delay = abstime;
901 }
902
903 if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
904 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
905 report_phy_write_delay = abstime;
906 }
907
908 if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
909 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
910 trace_phy_read_delay = abstime;
911 }
912
913 if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
914 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
915 trace_phy_write_delay = abstime;
916 }
917
918 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
919 if (mtxspin > USEC_PER_SEC >> 4) {
920 mtxspin = USEC_PER_SEC >> 4;
921 }
922 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
923 } else {
924 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
925 }
926 MutexSpin = (unsigned int)abstime;
927 low_MutexSpin = MutexSpin;
928 /*
929 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
930 * real_ncpus is not set at this time
931 */
932 high_MutexSpin = -1;
933
934 nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
935 if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
936 nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
937 }
938
939 virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
940 if (virtualized) {
941 unsigned int vti;
942
943 if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
944 vti = 6;
945 }
946 printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
947 kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
948 #define VIRTUAL_TIMEOUT_INFLATE_ABS(_timeout) \
949 MACRO_BEGIN \
950 kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \
951 _timeout = virtual_timeout_inflate_abs(vti, _timeout); \
952 kprintf("-> 0x%016llx\n", _timeout); \
953 MACRO_END
954
955 #define VIRTUAL_TIMEOUT_INFLATE_TSC(_timeout) \
956 MACRO_BEGIN \
957 kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \
958 _timeout = virtual_timeout_inflate_tsc(vti, _timeout); \
959 kprintf("-> 0x%016llx\n", _timeout); \
960 MACRO_END
961 #define VIRTUAL_TIMEOUT_INFLATE_US(_timeout) \
962 MACRO_BEGIN \
963 kprintf("%24s: 0x%08x ", #_timeout, _timeout); \
964 _timeout = virtual_timeout_inflate_us(vti, _timeout); \
965 kprintf("-> 0x%08x\n", _timeout); \
966 MACRO_END
967 /*
968 * These timeout values are inflated because they cause
969 * the kernel to panic when they expire.
970 * (Needed when running as a guest VM as the host OS
971 * may not always schedule vcpu threads in time to
972 * meet the deadline implied by the narrower time
973 * window used on hardware.)
974 */
975 VIRTUAL_TIMEOUT_INFLATE_US(LockTimeOutUsec);
976 VIRTUAL_TIMEOUT_INFLATE_ABS(LockTimeOut);
977 VIRTUAL_TIMEOUT_INFLATE_TSC(LockTimeOutTSC);
978 VIRTUAL_TIMEOUT_INFLATE_ABS(TLBTimeOut);
979 VIRTUAL_TIMEOUT_INFLATE_ABS(report_phy_read_delay);
980 VIRTUAL_TIMEOUT_INFLATE_TSC(lock_panic_timeout);
981 }
982
983 interrupt_latency_tracker_setup();
984 simple_lock_init(&ml_timer_evaluation_slock, 0);
985 }
986
987 /*
988 * Threshold above which we should attempt to block
989 * instead of spinning for clock_delay_until().
990 */
991
992 void
ml_init_delay_spin_threshold(int threshold_us)993 ml_init_delay_spin_threshold(int threshold_us)
994 {
995 nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
996 }
997
998 boolean_t
ml_delay_should_spin(uint64_t interval)999 ml_delay_should_spin(uint64_t interval)
1000 {
1001 return (interval < delay_spin_threshold) ? TRUE : FALSE;
1002 }
1003
1004 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
1005
1006 void
ml_delay_on_yield(void)1007 ml_delay_on_yield(void)
1008 {
1009 #if DEVELOPMENT || DEBUG
1010 if (yield_delay_us) {
1011 delay(yield_delay_us);
1012 }
1013 #endif
1014 }
1015
1016 /*
1017 * This is called from the machine-independent layer
1018 * to perform machine-dependent info updates. Defer to cpu_thread_init().
1019 */
1020 void
ml_cpu_up(void)1021 ml_cpu_up(void)
1022 {
1023 return;
1024 }
1025
1026 /*
1027 * This is called from the machine-independent layer
1028 * to perform machine-dependent info updates.
1029 */
1030 void
ml_cpu_down(void)1031 ml_cpu_down(void)
1032 {
1033 i386_deactivate_cpu();
1034
1035 return;
1036 }
1037
1038 thread_t
current_thread(void)1039 current_thread(void)
1040 {
1041 return current_thread_fast();
1042 }
1043
1044
1045 boolean_t
ml_is64bit(void)1046 ml_is64bit(void)
1047 {
1048 return cpu_mode_is64bit();
1049 }
1050
1051
1052 boolean_t
ml_thread_is64bit(thread_t thread)1053 ml_thread_is64bit(thread_t thread)
1054 {
1055 return thread_is_64bit_addr(thread);
1056 }
1057
1058
1059 boolean_t
ml_state_is64bit(void * saved_state)1060 ml_state_is64bit(void *saved_state)
1061 {
1062 return is_saved_state64(saved_state);
1063 }
1064
1065 void
ml_cpu_set_ldt(int selector)1066 ml_cpu_set_ldt(int selector)
1067 {
1068 /*
1069 * Avoid loading the LDT
1070 * if we're setting the KERNEL LDT and it's already set.
1071 */
1072 if (selector == KERNEL_LDT &&
1073 current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
1074 return;
1075 }
1076
1077 lldt(selector);
1078 current_cpu_datap()->cpu_ldt = selector;
1079 }
1080
1081 void
ml_fp_setvalid(boolean_t value)1082 ml_fp_setvalid(boolean_t value)
1083 {
1084 fp_setvalid(value);
1085 }
1086
1087 uint64_t
ml_cpu_int_event_time(void)1088 ml_cpu_int_event_time(void)
1089 {
1090 return current_cpu_datap()->cpu_int_event_time;
1091 }
1092
1093 vm_offset_t
ml_stack_remaining(void)1094 ml_stack_remaining(void)
1095 {
1096 uintptr_t local = (uintptr_t) &local;
1097
1098 if (ml_at_interrupt_context() != 0) {
1099 return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
1100 } else {
1101 return local - current_thread()->kernel_stack;
1102 }
1103 }
1104
1105 #if KASAN
1106 vm_offset_t ml_stack_base(void);
1107 vm_size_t ml_stack_size(void);
1108
1109 vm_offset_t
ml_stack_base(void)1110 ml_stack_base(void)
1111 {
1112 if (ml_at_interrupt_context()) {
1113 return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
1114 } else {
1115 return current_thread()->kernel_stack;
1116 }
1117 }
1118
1119 vm_size_t
ml_stack_size(void)1120 ml_stack_size(void)
1121 {
1122 if (ml_at_interrupt_context()) {
1123 return INTSTACK_SIZE;
1124 } else {
1125 return kernel_stack_size;
1126 }
1127 }
1128 #endif
1129
1130 #if CONFIG_KCOV
1131 kcov_cpu_data_t *
current_kcov_data(void)1132 current_kcov_data(void)
1133 {
1134 return ¤t_cpu_datap()->cpu_kcov_data;
1135 }
1136
1137 kcov_cpu_data_t *
cpu_kcov_data(int cpuid)1138 cpu_kcov_data(int cpuid)
1139 {
1140 return &cpu_datap(cpuid)->cpu_kcov_data;
1141 }
1142 #endif /* CONFIG_KCOV */
1143
1144 void
kernel_preempt_check(void)1145 kernel_preempt_check(void)
1146 {
1147 boolean_t intr;
1148 unsigned long flags;
1149
1150 assert(get_preemption_level() == 0);
1151
1152 if (__improbable(*ast_pending() & AST_URGENT)) {
1153 /*
1154 * can handle interrupts and preemptions
1155 * at this point
1156 */
1157 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
1158
1159 intr = ((flags & EFL_IF) != 0);
1160
1161 /*
1162 * now cause the PRE-EMPTION trap
1163 */
1164 if (intr == TRUE) {
1165 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
1166 }
1167 }
1168 }
1169
1170 boolean_t
machine_timeout_suspended(void)1171 machine_timeout_suspended(void)
1172 {
1173 return pmap_tlb_flush_timeout || lck_spinlock_timeout_in_progress ||
1174 panic_active() || mp_recent_debugger_activity() ||
1175 ml_recent_wake();
1176 }
1177
1178 /* Eagerly evaluate all pending timer and thread callouts
1179 */
1180 void
ml_timer_evaluate(void)1181 ml_timer_evaluate(void)
1182 {
1183 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
1184
1185 uint64_t te_end, te_start = mach_absolute_time();
1186 simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
1187 ml_timer_evaluation_in_progress = TRUE;
1188 thread_call_delayed_timer_rescan_all();
1189 mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
1190 ml_timer_evaluation_in_progress = FALSE;
1191 ml_timer_eager_evaluations++;
1192 te_end = mach_absolute_time();
1193 ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
1194 simple_unlock(&ml_timer_evaluation_slock);
1195
1196 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
1197 }
1198
1199 boolean_t
ml_timer_forced_evaluation(void)1200 ml_timer_forced_evaluation(void)
1201 {
1202 return ml_timer_evaluation_in_progress;
1203 }
1204
1205 uint64_t
ml_energy_stat(__unused thread_t t)1206 ml_energy_stat(__unused thread_t t)
1207 {
1208 return 0;
1209 }
1210
1211 void
ml_gpu_stat_update(uint64_t gpu_ns_delta)1212 ml_gpu_stat_update(uint64_t gpu_ns_delta)
1213 {
1214 current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
1215 }
1216
1217 uint64_t
ml_gpu_stat(thread_t t)1218 ml_gpu_stat(thread_t t)
1219 {
1220 return t->machine.thread_gpu_ns;
1221 }
1222
1223 int plctrace_enabled = 0;
1224
1225 void
_disable_preemption(void)1226 _disable_preemption(void)
1227 {
1228 disable_preemption_internal();
1229 }
1230
1231 void
_enable_preemption(void)1232 _enable_preemption(void)
1233 {
1234 enable_preemption_internal();
1235 }
1236
1237 void
plctrace_disable(void)1238 plctrace_disable(void)
1239 {
1240 plctrace_enabled = 0;
1241 }
1242
1243 static boolean_t ml_quiescing;
1244
1245 void
ml_set_is_quiescing(boolean_t quiescing)1246 ml_set_is_quiescing(boolean_t quiescing)
1247 {
1248 ml_quiescing = quiescing;
1249 }
1250
1251 boolean_t
ml_is_quiescing(void)1252 ml_is_quiescing(void)
1253 {
1254 return ml_quiescing;
1255 }
1256
1257 uint64_t
ml_get_booter_memory_size(void)1258 ml_get_booter_memory_size(void)
1259 {
1260 return 0;
1261 }
1262
1263 void
machine_lockdown(void)1264 machine_lockdown(void)
1265 {
1266 x86_64_protect_data_const();
1267 }
1268
1269 bool
ml_cpu_can_exit(__unused int cpu_id)1270 ml_cpu_can_exit(__unused int cpu_id)
1271 {
1272 return true;
1273 }
1274
1275 void
ml_cpu_begin_state_transition(__unused int cpu_id)1276 ml_cpu_begin_state_transition(__unused int cpu_id)
1277 {
1278 }
1279
1280 void
ml_cpu_end_state_transition(__unused int cpu_id)1281 ml_cpu_end_state_transition(__unused int cpu_id)
1282 {
1283 }
1284
1285 void
ml_cpu_begin_loop(void)1286 ml_cpu_begin_loop(void)
1287 {
1288 }
1289
1290 void
ml_cpu_end_loop(void)1291 ml_cpu_end_loop(void)
1292 {
1293 }
1294
1295 size_t
ml_get_vm_reserved_regions(bool vm_is64bit,struct vm_reserved_region ** regions)1296 ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
1297 {
1298 #pragma unused(vm_is64bit)
1299 assert(regions != NULL);
1300
1301 *regions = NULL;
1302 return 0;
1303 }
1304