/* * Copyright (c) 2000-2020 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HYPERVISOR #include #endif #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \ extern char assert_is_16byte_multiple_sizeof_ ## _type_ \ [(sizeof(_type_) % 16) == 0 ? 1 : -1] /* Compile-time checks for vital save area sizing: */ ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t); ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t); #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT) extern zone_t iss_zone; /* zone for saved_state area */ extern zone_t ids_zone; /* zone for debug_state area */ extern int tecs_mode_supported; extern boolean_t cpuid_tsx_supported; bool lbr_need_tsx_workaround = false; int force_thread_policy_tecs; struct lbr_group { uint32_t msr_from; uint32_t msr_to; uint32_t msr_info; }; struct cpu_lbrs { uint32_t lbr_count; struct lbr_group msr_lbrs[X86_MAX_LBRS]; }; const struct cpu_lbrs *cpu_lbr_setp = NULL; int cpu_lbr_type; const struct cpu_lbrs nhm_cpu_lbrs = { 16 /* LBR count */, { { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0 /* INFO_0 */ }, { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0 /* INFO_1 */ }, { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0 /* INFO_2 */ }, { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0 /* INFO_3 */ }, { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0 /* INFO_4 */ }, { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0 /* INFO_5 */ }, { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0 /* INFO_6 */ }, { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0 /* INFO_7 */ }, { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0 /* INFO_8 */ }, { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0 /* INFO_9 */ }, { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0 /* INFO_10 */ }, { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0 /* INFO_11 */ }, { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0 /* INFO_12 */ }, { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0 /* INFO_13 */ }, { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0 /* INFO_14 */ }, { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0 /* INFO_15 */ } } }, skl_cpu_lbrs = { 32 /* LBR count */, { { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0xdc0 /* INFO_0 */ }, { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0xdc1 /* INFO_1 */ }, { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0xdc2 /* INFO_2 */ }, { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0xdc3 /* INFO_3 */ }, { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0xdc4 /* INFO_4 */ }, { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0xdc5 /* INFO_5 */ }, { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0xdc6 /* INFO_6 */ }, { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0xdc7 /* INFO_7 */ }, { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0xdc8 /* INFO_8 */ }, { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0xdc9 /* INFO_9 */ }, { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0xdca /* INFO_10 */ }, { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0xdcb /* INFO_11 */ }, { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0xdcc /* INFO_12 */ }, { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0xdcd /* INFO_13 */ }, { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0xdce /* INFO_14 */ }, { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0xdcf /* INFO_15 */ }, { 0x690 /* FROM_16 */, 0x6d0 /* TO_16 */, 0xdd0 /* INFO_16 */ }, { 0x691 /* FROM_17 */, 0x6d1 /* TO_17 */, 0xdd1 /* INFO_17 */ }, { 0x692 /* FROM_18 */, 0x6d2 /* TO_18 */, 0xdd2 /* INFO_18 */ }, { 0x693 /* FROM_19 */, 0x6d3 /* TO_19 */, 0xdd3 /* INFO_19 */ }, { 0x694 /* FROM_20 */, 0x6d4 /* TO_20 */, 0xdd4 /* INFO_20 */ }, { 0x695 /* FROM_21 */, 0x6d5 /* TO_21 */, 0xdd5 /* INFO_21 */ }, { 0x696 /* FROM_22 */, 0x6d6 /* TO_22 */, 0xdd6 /* INFO_22 */ }, { 0x697 /* FROM_23 */, 0x6d7 /* TO_23 */, 0xdd7 /* INFO_23 */ }, { 0x698 /* FROM_24 */, 0x6d8 /* TO_24 */, 0xdd8 /* INFO_24 */ }, { 0x699 /* FROM_25 */, 0x6d9 /* TO_25 */, 0xdd9 /* INFO_25 */ }, { 0x69a /* FROM_26 */, 0x6da /* TO_26 */, 0xdda /* INFO_26 */ }, { 0x69b /* FROM_27 */, 0x6db /* TO_27 */, 0xddb /* INFO_27 */ }, { 0x69c /* FROM_28 */, 0x6dc /* TO_28 */, 0xddc /* INFO_28 */ }, { 0x69d /* FROM_29 */, 0x6dd /* TO_29 */, 0xddd /* INFO_29 */ }, { 0x69e /* FROM_30 */, 0x6de /* TO_30 */, 0xdde /* INFO_30 */ }, { 0x69f /* FROM_31 */, 0x6df /* TO_31 */, 0xddf /* INFO_31 */ } } }; void i386_lbr_disable(void) { /* Enable LBRs */ wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) & ~DEBUGCTL_LBR_ENA); } /* * Disable ASAN for i386_lbr_enable and i386_lbr_init, otherwise we get a KASAN panic * because the shadow map is not been initialized when these functions are called in * early boot. */ void __attribute__((no_sanitize("address"))) i386_lbr_enable(void) { /* last_branch_kmode_only_enabled controls LBR data collection for core files and paniclogs */ switch (last_branch_enabled_modes) { case LBR_ENABLED_USERMODE: case LBR_ENABLED_KERNELMODE: /* Enable LBRs */ wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA); break; case LBR_ENABLED_NONE: case LBR_ENABLED_ALLMODES: default: break; } } void __attribute__((no_sanitize("address"))) i386_lbr_init(i386_cpu_info_t *info_p, bool is_master) { if (last_branch_enabled_modes == LBR_ENABLED_NONE) { i386_lbr_disable(); return; } if (last_branch_enabled_modes == LBR_ENABLED_ALLMODES) { panic("Collecting LBR data from both user and kernel mode is not supported."); } if (is_master) { /* All NHM+ CPUs support PERF_CAPABILITIES, so no need to check cpuid for its presence */ cpu_lbr_type = PERFCAP_LBR_TYPE(rdmsr64(MSR_IA32_PERF_CAPABILITIES)); /* Sanity-check the LBR type -- some VMMs do not properly support it */ if (cpu_lbr_type < PERFCAP_LBR_TYPE_MISPRED || cpu_lbr_type > PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO) { kprintf("CPU-reported LBR type is invalid or is not supported (%d)." " Disabling LBR support.\n", cpu_lbr_type); last_branch_enabled_modes = LBR_ENABLED_NONE; i386_lbr_disable(); return; } switch (info_p->cpuid_cpufamily) { case CPUFAMILY_INTEL_NEHALEM: case CPUFAMILY_INTEL_WESTMERE: /* NHM family shares an LBR_SELECT MSR for both logical CPUs per core */ cpu_lbr_setp = &nhm_cpu_lbrs; break; case CPUFAMILY_INTEL_SANDYBRIDGE: case CPUFAMILY_INTEL_IVYBRIDGE: /* SNB+ has dedicated LBR_SELECT MSRs for each logical CPU per core */ cpu_lbr_setp = &nhm_cpu_lbrs; break; case CPUFAMILY_INTEL_HASWELL: case CPUFAMILY_INTEL_BROADWELL: lbr_need_tsx_workaround = cpuid_tsx_supported ? false : true; cpu_lbr_setp = &nhm_cpu_lbrs; break; case CPUFAMILY_INTEL_SKYLAKE: case CPUFAMILY_INTEL_KABYLAKE: case CPUFAMILY_INTEL_ICELAKE: case CPUFAMILY_INTEL_COMETLAKE: cpu_lbr_setp = &skl_cpu_lbrs; break; default: panic("Unknown CPU family"); } if (last_branch_enabled_modes == LBR_ENABLED_KERNELMODE) { /* This depends on cpu_lbr_setp being setup first */ lbr_for_kmode_init(cpu_lbr_setp->lbr_count); } } /* Configure LBR_SELECT for CPL > 0 records only or CPL = 0 for use in panic logs and core files */ switch (last_branch_enabled_modes) { case LBR_ENABLED_USERMODE: wrmsr64(MSR_IA32_LBR_SELECT, LBR_SELECT_CPL_EQ_0); break; case LBR_ENABLED_KERNELMODE: #if DEBUG || DEVELOPMENT wrmsr64(MSR_IA32_LBR_SELECT, 0); #else wrmsr64(MSR_IA32_LBR_SELECT, LBR_SELECT_CPL_NEQ_0); #endif break; case LBR_ENABLED_NONE: case LBR_ENABLED_ALLMODES: default: break; } /* Enable LBRs */ wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA); } static uint64_t lbr_mode_based_filter(uint64_t record, __unused boolean_t from_userspace) { uint64_t filtered_record; #define LBR_SENTINEL_KERNEL_MODE (0x66726d6b65726e6cULL /* "frmkernl" */ ) #define LBR_SENTINEL_USER_MODE (0x757365726C616E64ULL /* "userland" */ ) switch (last_branch_enabled_modes) { case LBR_ENABLED_USERMODE: filtered_record = (record > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : record; break; case LBR_ENABLED_KERNELMODE: /* For internal builds don't filter out userspace addresses from panic logs and core files. */ #if DEBUG || DEVELOPMENT filtered_record = record; #else /* If coming from user space use the correct filter in release builds * When LBRs are enabled for kernel mode and user space requests LBR data: remove kernel addresses * " " and kernel mode requests LBR data: remove usermode addresses */ if (from_userspace) { filtered_record = (record > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : record; } else { filtered_record = (VM_KERNEL_ADDRESS(record)) ? record : LBR_SENTINEL_USER_MODE; } #endif break; case LBR_ENABLED_ALLMODES: case LBR_ENABLED_NONE: default: /* Set LBR to 0 for unsupported use cases */ filtered_record = 0x0; break; } return filtered_record; } static int i386_lbr_native_state_to_mach_thread_state(pcb_t pcb, last_branch_state_t *machlbrp, boolean_t from_userspace) { int last_entry; int i, j, lbr_tos; uint64_t from_rip, to_rip; machlbrp->lbr_count = cpu_lbr_setp->lbr_count; lbr_tos = pcb->lbrs.lbr_tos & (X86_MAX_LBRS - 1); last_entry = (lbr_tos == (cpu_lbr_setp->lbr_count - 1)) ? 0 : (lbr_tos + 1); switch (cpu_lbr_type) { case PERFCAP_LBR_TYPE_MISPRED: /* NHM */ machlbrp->lbr_supported_tsx = 0; machlbrp->lbr_supported_cycle_count = 0; for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) { to_rip = pcb->lbrs.lbrs[i].to_rip; machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace); from_rip = LBR_TYPE_MISPRED_FROMRIP(pcb->lbrs.lbrs[i].from_rip); machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace); machlbrp->lbrs[j].mispredict = LBR_TYPE_MISPRED_MISPREDICT(pcb->lbrs.lbrs[i].from_rip); machlbrp->lbrs[j].tsx_abort = machlbrp->lbrs[j].in_tsx = 0; /* Not Supported */ if (i == last_entry) { break; } } break; case PERFCAP_LBR_TYPE_TSXINFO: /* HSW/BDW */ machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0; machlbrp->lbr_supported_cycle_count = 0; for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) { to_rip = pcb->lbrs.lbrs[i].to_rip; machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace); from_rip = LBR_TYPE_TSXINFO_FROMRIP(pcb->lbrs.lbrs[i].from_rip); machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace); machlbrp->lbrs[j].mispredict = LBR_TYPE_TSXINFO_MISPREDICT(pcb->lbrs.lbrs[i].from_rip); if (cpuid_tsx_supported) { machlbrp->lbrs[j].tsx_abort = LBR_TYPE_TSXINFO_TSX_ABORT(pcb->lbrs.lbrs[i].from_rip); machlbrp->lbrs[j].in_tsx = LBR_TYPE_TSXINFO_IN_TSX(pcb->lbrs.lbrs[i].from_rip); } else { machlbrp->lbrs[j].tsx_abort = 0; machlbrp->lbrs[j].in_tsx = 0; } if (i == last_entry) { break; } } break; case PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO: /* SKL+ */ machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0; machlbrp->lbr_supported_cycle_count = 1; for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) { from_rip = pcb->lbrs.lbrs[i].from_rip; machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace); to_rip = pcb->lbrs.lbrs[i].to_rip; machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace); machlbrp->lbrs[j].mispredict = LBR_TYPE_EIP_WITH_LBRINFO_MISPREDICT(pcb->lbrs.lbrs[i].info); machlbrp->lbrs[j].tsx_abort = LBR_TYPE_EIP_WITH_LBRINFO_TSX_ABORT(pcb->lbrs.lbrs[i].info); machlbrp->lbrs[j].in_tsx = LBR_TYPE_EIP_WITH_LBRINFO_IN_TSX(pcb->lbrs.lbrs[i].info); machlbrp->lbrs[j].cycle_count = LBR_TYPE_EIP_WITH_LBRINFO_CYC_COUNT(pcb->lbrs.lbrs[i].info); if (i == last_entry) { break; } } break; default: #if DEBUG || DEVELOPMENT /* This should be impossible, based on the filtering we do in i386_lbr_init() */ panic("Unknown LBR format: %d!", cpu_lbr_type); /*NOTREACHED*/ #else return -1; #endif } return 0; } int i386_filtered_lbr_state_to_mach_thread_state(thread_t thr_act, last_branch_state_t *machlbrp, boolean_t from_userspace) { boolean_t istate; istate = ml_set_interrupts_enabled(FALSE); /* If the current thread is asking for its own LBR data, synch the LBRs first */ if (thr_act == current_thread()) { i386_lbr_synch(thr_act); } ml_set_interrupts_enabled(istate); return i386_lbr_native_state_to_mach_thread_state(THREAD_TO_PCB(thr_act), machlbrp, from_userspace); } void i386_lbr_synch(thread_t thr) { pcb_t old_pcb = THREAD_TO_PCB(thr); int i; /* First, save current LBRs to the old thread's PCB */ if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) { for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from); old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to); old_pcb->lbrs.lbrs[i].info = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info); } } else { for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from); old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to); } } /* Finally, save the TOS */ old_pcb->lbrs.lbr_tos = rdmsr64(MSR_IA32_LASTBRANCH_TOS); } void i386_switch_lbrs(thread_t old, thread_t new) { pcb_t new_pcb; int i; bool save_old = (old != NULL && get_threadtask(old) != kernel_task); bool restore_new = (get_threadtask(new) != kernel_task); if (!save_old && !restore_new) { return; } assert(cpu_lbr_setp != NULL); new_pcb = THREAD_TO_PCB(new); i386_lbr_disable(); if (save_old) { i386_lbr_synch(old); } if (restore_new) { /* Now restore the new threads's LBRs */ if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) { for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip); wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip); wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info, new_pcb->lbrs.lbrs[i].info); } } else { if (lbr_need_tsx_workaround) { for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { /* * If TSX has been disabled, the hardware expects those two bits to be sign * extensions of bit 47 (even though it didn't return them that way via the rdmsr!) */ #define BIT_47 (1ULL << 47) wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip | ((new_pcb->lbrs.lbrs[i].from_rip & BIT_47) ? 0x6000000000000000ULL : 0)); wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip | ((new_pcb->lbrs.lbrs[i].to_rip & BIT_47) ? 0x6000000000000000ULL : 0)); } } else { for (i = 0; i < cpu_lbr_setp->lbr_count; i++) { wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip); wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip); } } } /* Lastly, restore the new threads's TOS */ wrmsr64(MSR_IA32_LASTBRANCH_TOS, new_pcb->lbrs.lbr_tos); } i386_lbr_enable(); } void act_machine_switch_pcb(thread_t old, thread_t new) { pcb_t pcb = THREAD_TO_PCB(new); cpu_data_t *cdp = current_cpu_datap(); struct real_descriptor *ldtp; mach_vm_offset_t pcb_stack_top; assert(new->kernel_stack != 0); assert(ml_get_interrupts_enabled() == FALSE); #ifdef DIRECTION_FLAG_DEBUG if (x86_get_flags() & EFL_DF) { panic("Direction flag detected: 0x%lx", x86_get_flags()); } #endif /* * Clear segment state * unconditionally for DS/ES/FS but more carefully for GS whose * cached state we track. */ set_ds(NULL_SEG); set_es(NULL_SEG); set_fs(NULL_SEG); if (get_gs() != NULL_SEG) { swapgs(); /* switch to user's GS context */ set_gs(NULL_SEG); swapgs(); /* and back to kernel */ /* record the active machine state lost */ cdp->cpu_uber.cu_user_gs_base = 0; } vm_offset_t isf; /* * Set pointer to PCB's interrupt stack frame in cpu data. * Used by syscall and double-fault trap handlers. */ isf = (vm_offset_t) &pcb->iss->ss_64.isf; cdp->cpu_uber.cu_isf = isf; pcb_stack_top = (vm_offset_t) (pcb->iss + 1); /* require 16-byte alignment */ assert((pcb_stack_top & 0xF) == 0); current_ktss64()->rsp0 = cdp->cpu_desc_index.cdi_sstku; /* * Top of temporary sysenter stack points to pcb stack. * Although this is not normally used by 64-bit users, * it needs to be set in case a sysenter is attempted. */ *current_sstk64() = pcb_stack_top; cdp->cd_estack = cpu_shadowp(cdp->cpu_number)->cd_estack = cdp->cpu_desc_index.cdi_sstku; if (is_saved_state64(pcb->iss)) { cdp->cpu_task_map = new->map->pmap->pm_task_map; /* * Enable the 64-bit user code segment, USER64_CS. * Disable the 32-bit user code segment, USER_CS. */ gdt_desc_p(USER64_CS)->access |= ACC_PL_U; gdt_desc_p(USER_CS)->access &= ~ACC_PL_U; /* * Switch user's GS base if necessary * by setting the Kernel's GS base MSR * - this will become the user's on the swapgs when * returning to user-space. Avoid this for * kernel threads (no user TLS support required) * and verify the memory shadow of the segment base * in the event it was altered in user space. */ if ((pcb->cthread_self != 0) || (get_threadtask(new) != kernel_task)) { if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); } } } else { cdp->cpu_task_map = TASK_MAP_32BIT; /* * Disable USER64_CS * Enable USER_CS */ /* It's possible that writing to the GDT areas * is expensive, if the processor intercepts those * writes to invalidate its internal segment caches * TODO: perhaps only do this if switching bitness */ gdt_desc_p(USER64_CS)->access &= ~ACC_PL_U; gdt_desc_p(USER_CS)->access |= ACC_PL_U; /* * Set the thread`s cthread (a.k.a pthread) * For 32-bit user this involves setting the USER_CTHREAD * descriptor in the LDT to point to the cthread data. * The involves copying in the pre-initialized descriptor. */ ldtp = current_ldt(); ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; if (pcb->uldt_selector != 0) { ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; } cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; } cdp->cpu_curthread_do_segchk = new->machine.mthr_do_segchk; if (last_branch_enabled_modes == LBR_ENABLED_USERMODE) { i386_switch_lbrs(old, new); } /* * Set the thread's LDT or LDT entry. */ task_t task = get_threadtask_early(new); if (__probable(task == TASK_NULL || task->i386_ldt == 0)) { /* * Use system LDT. */ ml_cpu_set_ldt(KERNEL_LDT); cdp->cpu_curtask_has_ldt = 0; } else { /* * Task has its own LDT. */ user_ldt_set(new); cdp->cpu_curtask_has_ldt = 1; } } kern_return_t thread_set_wq_state32(thread_t thread, thread_state_t tstate) { x86_thread_state32_t *state; x86_saved_state32_t *saved_state; thread_t curth = current_thread(); spl_t s = 0; pal_register_cache_state(thread, DIRTY); saved_state = USER_REGS32(thread); state = (x86_thread_state32_t *)tstate; if (curth != thread) { s = splsched(); thread_lock(thread); } saved_state->ebp = 0; saved_state->eip = state->eip; saved_state->eax = state->eax; saved_state->ebx = state->ebx; saved_state->ecx = state->ecx; saved_state->edx = state->edx; saved_state->edi = state->edi; saved_state->esi = state->esi; saved_state->uesp = state->esp; saved_state->efl = EFL_USER_SET; saved_state->cs = USER_CS; saved_state->ss = USER_DS; saved_state->ds = USER_DS; saved_state->es = USER_DS; if (curth != thread) { thread_unlock(thread); splx(s); } return KERN_SUCCESS; } kern_return_t thread_set_wq_state64(thread_t thread, thread_state_t tstate) { x86_thread_state64_t *state; x86_saved_state64_t *saved_state; thread_t curth = current_thread(); spl_t s = 0; saved_state = USER_REGS64(thread); state = (x86_thread_state64_t *)tstate; /* Disallow setting non-canonical PC or stack */ if (!IS_USERADDR64_CANONICAL(state->rsp) || !IS_USERADDR64_CANONICAL(state->rip)) { return KERN_FAILURE; } pal_register_cache_state(thread, DIRTY); if (curth != thread) { s = splsched(); thread_lock(thread); } saved_state->rbp = 0; saved_state->rdi = state->rdi; saved_state->rsi = state->rsi; saved_state->rdx = state->rdx; saved_state->rcx = state->rcx; saved_state->r8 = state->r8; saved_state->r9 = state->r9; saved_state->isf.rip = state->rip; saved_state->isf.rsp = state->rsp; saved_state->isf.cs = USER64_CS; saved_state->isf.rflags = EFL_USER_SET; if (curth != thread) { thread_unlock(thread); splx(s); } return KERN_SUCCESS; } /* * Initialize the machine-dependent state for a new thread. */ void machine_thread_create( thread_t thread, task_t task, bool first_thread __unused) { pcb_t pcb = THREAD_TO_PCB(thread); if ((task->t_flags & TF_TECS) || __improbable(force_thread_policy_tecs)) { thread->machine.mthr_do_segchk = MTHR_SEGCHK; } else { thread->machine.mthr_do_segchk = 0; } if (task != kernel_task && __improbable((cpuid_wa_required(CPU_INTEL_RSBST) & CWA_ON) != 0)) { thread->machine.mthr_do_segchk |= MTHR_RSBST; } /* * Allocate save frame only if required. */ if (pcb->iss == NULL) { assert((get_preemption_level() == 0)); pcb->iss = zalloc_flags(iss_zone, Z_WAITOK | Z_NOFAIL); } /* * Ensure that the synthesized 32-bit state including * the 64-bit interrupt state can be acommodated in the * 64-bit state we allocate for both 32-bit and 64-bit threads. */ assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <= sizeof(pcb->iss->ss_64)); bzero((char *)pcb->iss, sizeof(x86_saved_state_t)); bzero(&pcb->lbrs, sizeof(x86_lbrs_t)); if (task_has_64Bit_addr(task)) { pcb->iss->flavor = x86_SAVED_STATE64; pcb->iss->ss_64.isf.cs = USER64_CS; pcb->iss->ss_64.isf.ss = USER_DS; pcb->iss->ss_64.fs = USER_DS; pcb->iss->ss_64.gs = USER_DS; pcb->iss->ss_64.isf.rflags = EFL_USER_SET; } else { pcb->iss->flavor = x86_SAVED_STATE32; pcb->iss->ss_32.cs = USER_CS; pcb->iss->ss_32.ss = USER_DS; pcb->iss->ss_32.ds = USER_DS; pcb->iss->ss_32.es = USER_DS; pcb->iss->ss_32.fs = USER_DS; pcb->iss->ss_32.gs = USER_DS; pcb->iss->ss_32.efl = EFL_USER_SET; } simple_lock_init(&pcb->lock, 0); pcb->cthread_self = 0; pcb->uldt_selector = 0; pcb->thread_gpu_ns = 0; /* Ensure that the "cthread" descriptor describes a valid * segment. */ if ((pcb->cthread_desc.access & ACC_P) == 0) { pcb->cthread_desc = *gdt_desc_p(USER_DS); } pcb->insn_state_copyin_failure_errorcode = 0; if (pcb->insn_state != 0) { /* Reinit for new thread */ bzero(pcb->insn_state, sizeof(x86_instruction_state_t)); pcb->insn_state->insn_stream_valid_bytes = -1; } pcb->insn_copy_optout = (task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false; } /* * Machine-dependent cleanup prior to destroying a thread */ void machine_thread_destroy( thread_t thread) { pcb_t pcb = THREAD_TO_PCB(thread); #if HYPERVISOR if (thread->hv_thread_target) { hv_callbacks.thread_destroy(thread->hv_thread_target); thread->hv_thread_target = NULL; } #endif if (pcb->ifps != 0) { fpu_free(thread, pcb->ifps); } if (pcb->iss != 0) { zfree(iss_zone, pcb->iss); pcb->iss = 0; } if (pcb->ids) { zfree(ids_zone, pcb->ids); pcb->ids = NULL; } if (pcb->insn_state != 0) { kfree_data(pcb->insn_state, sizeof(x86_instruction_state_t)); pcb->insn_state = 0; } pcb->insn_state_copyin_failure_errorcode = 0; pcb->insn_copy_optout = false; } /* * machine_thread_process_signature * * Called to allow code signature dependent adjustments to the thread * state. Note that this is usually called twice for the main thread: * Once at thread creation by thread_create, when the signature is * potentially not attached yet (which is usually the case for the * first/main thread of a task), and once after the task's signature * has actually been attached. * */ kern_return_t machine_thread_process_signature(thread_t __unused thread, task_t __unused task) { return KERN_SUCCESS; } kern_return_t machine_thread_set_tsd_base( thread_t thread, mach_vm_offset_t tsd_base) { if (get_threadtask(thread) == kernel_task) { return KERN_INVALID_ARGUMENT; } if (thread_is_64bit_addr(thread)) { /* check for canonical address, set 0 otherwise */ if (!IS_USERADDR64_CANONICAL(tsd_base)) { tsd_base = 0ULL; } } else { if (tsd_base > UINT32_MAX) { tsd_base = 0ULL; } } pcb_t pcb = THREAD_TO_PCB(thread); pcb->cthread_self = tsd_base; if (!thread_is_64bit_addr(thread)) { /* Set up descriptor for later use */ struct real_descriptor desc = { .limit_low = 1, .limit_high = 0, .base_low = tsd_base & 0xffff, .base_med = (tsd_base >> 16) & 0xff, .base_high = (tsd_base >> 24) & 0xff, .access = ACC_P | ACC_PL_U | ACC_DATA_W, .granularity = SZ_32 | SZ_G, }; pcb->cthread_desc = desc; saved_state32(pcb->iss)->gs = USER_CTHREAD; } /* For current thread, make the TSD base active immediately */ if (thread == current_thread()) { if (thread_is_64bit_addr(thread)) { cpu_data_t *cdp; mp_disable_preemption(); cdp = current_cpu_datap(); if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base); } cdp->cpu_uber.cu_user_gs_base = tsd_base; mp_enable_preemption(); } else { /* assign descriptor */ mp_disable_preemption(); *ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc; mp_enable_preemption(); } } return KERN_SUCCESS; } void machine_tecs(thread_t thr) { if (tecs_mode_supported) { thr->machine.mthr_do_segchk = 1; } } void machine_thread_set_insn_copy_optout(thread_t thr) { thr->machine.insn_copy_optout = true; } int machine_csv(cpuvn_e cve) { switch (cve) { case CPUVN_CI: return (cpuid_wa_required(CPU_INTEL_SEGCHK) & CWA_ON) != 0; default: break; } return 0; }