xref: /xnu-11215.61.5/osfmk/i386/pcb_native.c (revision 4f1223e81cd707a65cc109d0b8ad6653699da3c4)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 
57 #include <mach_ldebug.h>
58 
59 #include <sys/kdebug.h>
60 
61 #include <mach/kern_return.h>
62 #include <mach/thread_status.h>
63 #include <mach/vm_param.h>
64 
65 #include <kern/mach_param.h>
66 #include <kern/processor.h>
67 #include <kern/cpu_data.h>
68 #include <kern/cpu_number.h>
69 #include <kern/task.h>
70 #include <kern/thread.h>
71 #include <kern/sched_prim.h>
72 #include <kern/misc_protos.h>
73 #include <kern/assert.h>
74 #include <kern/spl.h>
75 #include <kern/machine.h>
76 #include <ipc/ipc_port.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_map_xnu.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_protos.h>
81 
82 #include <i386/commpage/commpage.h>
83 #include <i386/cpu_data.h>
84 #include <i386/cpu_number.h>
85 #include <i386/cpuid.h>
86 #include <i386/eflags.h>
87 #include <i386/proc_reg.h>
88 #include <i386/tss.h>
89 #include <i386/user_ldt.h>
90 #include <i386/fpu.h>
91 #include <i386/mp_desc.h>
92 #include <i386/misc_protos.h>
93 #include <i386/thread.h>
94 #include <i386/seg.h>
95 #include <i386/machine_routines.h>
96 #include <i386/lbr.h>
97 
98 #if HYPERVISOR
99 #include <kern/hv_support.h>
100 #endif
101 
102 #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_)        \
103 extern char assert_is_16byte_multiple_sizeof_ ## _type_ \
104 	        [(sizeof(_type_) % 16) == 0 ? 1 : -1]
105 
106 /* Compile-time checks for vital save area sizing: */
107 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t);
108 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t);
109 
110 #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
111 
112 extern zone_t           iss_zone;               /* zone for saved_state area */
113 extern zone_t           ids_zone;               /* zone for debug_state area */
114 extern int              tecs_mode_supported;
115 extern boolean_t        cpuid_tsx_supported;
116 
117 static bool lbr_need_tsx_workaround = false;
118 
119 int force_thread_policy_tecs;
120 
121 struct lbr_group {
122 	uint32_t        msr_from;
123 	uint32_t        msr_to;
124 	uint32_t        msr_info;
125 };
126 
127 struct cpu_lbrs {
128 	uint32_t                lbr_count;
129 	struct lbr_group        msr_lbrs[X86_MAX_LBRS];
130 };
131 
132 static const struct cpu_lbrs *cpu_lbr_setp = NULL;
133 static int cpu_lbr_type;
134 
135 static const struct cpu_lbrs nhm_cpu_lbrs = {
136 	16 /* LBR count */,
137 	{
138 		{ 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0 /* INFO_0 */ },
139 		{ 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0 /* INFO_1 */ },
140 		{ 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0 /* INFO_2 */ },
141 		{ 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0 /* INFO_3 */ },
142 		{ 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0 /* INFO_4 */ },
143 		{ 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0 /* INFO_5 */ },
144 		{ 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0 /* INFO_6 */ },
145 		{ 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0 /* INFO_7 */ },
146 		{ 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0 /* INFO_8 */ },
147 		{ 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0 /* INFO_9 */ },
148 		{ 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0 /* INFO_10 */ },
149 		{ 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0 /* INFO_11 */ },
150 		{ 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0 /* INFO_12 */ },
151 		{ 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0 /* INFO_13 */ },
152 		{ 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0 /* INFO_14 */ },
153 		{ 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0 /* INFO_15 */ }
154 	}
155 },
156     skl_cpu_lbrs = {
157 	32 /* LBR count */,
158 	{
159 		{ 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0xdc0 /* INFO_0 */ },
160 		{ 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0xdc1 /* INFO_1 */ },
161 		{ 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0xdc2 /* INFO_2 */ },
162 		{ 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0xdc3 /* INFO_3 */ },
163 		{ 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0xdc4 /* INFO_4 */ },
164 		{ 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0xdc5 /* INFO_5 */ },
165 		{ 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0xdc6 /* INFO_6 */ },
166 		{ 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0xdc7 /* INFO_7 */ },
167 		{ 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0xdc8 /* INFO_8 */ },
168 		{ 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0xdc9 /* INFO_9 */ },
169 		{ 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0xdca /* INFO_10 */ },
170 		{ 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0xdcb /* INFO_11 */ },
171 		{ 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0xdcc /* INFO_12 */ },
172 		{ 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0xdcd /* INFO_13 */ },
173 		{ 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0xdce /* INFO_14 */ },
174 		{ 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0xdcf /* INFO_15 */ },
175 		{ 0x690 /* FROM_16 */, 0x6d0 /* TO_16 */, 0xdd0 /* INFO_16 */ },
176 		{ 0x691 /* FROM_17 */, 0x6d1 /* TO_17 */, 0xdd1 /* INFO_17 */ },
177 		{ 0x692 /* FROM_18 */, 0x6d2 /* TO_18 */, 0xdd2 /* INFO_18 */ },
178 		{ 0x693 /* FROM_19 */, 0x6d3 /* TO_19 */, 0xdd3 /* INFO_19 */ },
179 		{ 0x694 /* FROM_20 */, 0x6d4 /* TO_20 */, 0xdd4 /* INFO_20 */ },
180 		{ 0x695 /* FROM_21 */, 0x6d5 /* TO_21 */, 0xdd5 /* INFO_21 */ },
181 		{ 0x696 /* FROM_22 */, 0x6d6 /* TO_22 */, 0xdd6 /* INFO_22 */ },
182 		{ 0x697 /* FROM_23 */, 0x6d7 /* TO_23 */, 0xdd7 /* INFO_23 */ },
183 		{ 0x698 /* FROM_24 */, 0x6d8 /* TO_24 */, 0xdd8 /* INFO_24 */ },
184 		{ 0x699 /* FROM_25 */, 0x6d9 /* TO_25 */, 0xdd9 /* INFO_25 */ },
185 		{ 0x69a /* FROM_26 */, 0x6da /* TO_26 */, 0xdda /* INFO_26 */ },
186 		{ 0x69b /* FROM_27 */, 0x6db /* TO_27 */, 0xddb /* INFO_27 */ },
187 		{ 0x69c /* FROM_28 */, 0x6dc /* TO_28 */, 0xddc /* INFO_28 */ },
188 		{ 0x69d /* FROM_29 */, 0x6dd /* TO_29 */, 0xddd /* INFO_29 */ },
189 		{ 0x69e /* FROM_30 */, 0x6de /* TO_30 */, 0xdde /* INFO_30 */ },
190 		{ 0x69f /* FROM_31 */, 0x6df /* TO_31 */, 0xddf /* INFO_31 */ }
191 	}
192 };
193 
194 void
i386_lbr_disable(void)195 i386_lbr_disable(void)
196 {
197 	/* Enable LBRs */
198 	wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) & ~DEBUGCTL_LBR_ENA);
199 }
200 
201 /*
202  * Disable ASAN for i386_lbr_enable and i386_lbr_init, otherwise we get a KASAN panic
203  * because the shadow map is not been initialized when these functions are called in
204  * early boot.
205  */
206 void __attribute__((no_sanitize("address")))
i386_lbr_enable(void)207 i386_lbr_enable(void)
208 {
209 	/* last_branch_kmode_only_enabled controls LBR data collection for core files and paniclogs */
210 	switch (last_branch_enabled_modes) {
211 	case LBR_ENABLED_USERMODE:
212 	case LBR_ENABLED_KERNELMODE:
213 		/* Enable LBRs */
214 		wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA);
215 		break;
216 	case LBR_ENABLED_NONE:
217 	case LBR_ENABLED_ALLMODES:
218 	default:
219 		break;
220 	}
221 }
222 
223 void __attribute__((no_sanitize("address")))
i386_lbr_init(i386_cpu_info_t * info_p,bool is_master)224 i386_lbr_init(i386_cpu_info_t *info_p, bool is_master)
225 {
226 	if (last_branch_enabled_modes == LBR_ENABLED_NONE) {
227 		i386_lbr_disable();
228 		return;
229 	}
230 	if (last_branch_enabled_modes == LBR_ENABLED_ALLMODES) {
231 		panic("Collecting LBR data from both user and kernel mode is not supported.");
232 	}
233 
234 	if (is_master) {
235 		if (info_p->cpuid_features & CPUID_FEATURE_PDCM) {
236 			/* All NHM+ CPUs should support this MSR */
237 			cpu_lbr_type = PERFCAP_LBR_TYPE(
238 				rdmsr64(MSR_IA32_PERF_CAPABILITIES));
239 		}
240 		/* Sanity-check the LBR type -- some VMMs do not properly support it */
241 		if (cpu_lbr_type < PERFCAP_LBR_TYPE_MISPRED || cpu_lbr_type > PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO) {
242 			kprintf("CPU-reported LBR type is invalid or is not supported (%d)."
243 			    "  Disabling LBR support.\n", cpu_lbr_type);
244 			last_branch_enabled_modes = LBR_ENABLED_NONE;
245 			i386_lbr_disable();
246 			return;
247 		}
248 
249 		switch (info_p->cpuid_cpufamily) {
250 		case CPUFAMILY_INTEL_NEHALEM:
251 		case CPUFAMILY_INTEL_WESTMERE:
252 			/* NHM family shares an LBR_SELECT MSR for both logical CPUs per core */
253 			cpu_lbr_setp = &nhm_cpu_lbrs;
254 			break;
255 
256 		case CPUFAMILY_INTEL_SANDYBRIDGE:
257 		case CPUFAMILY_INTEL_IVYBRIDGE:
258 			/* SNB+ has dedicated LBR_SELECT MSRs for each logical CPU per core */
259 			cpu_lbr_setp = &nhm_cpu_lbrs;
260 			break;
261 
262 		case CPUFAMILY_INTEL_HASWELL:
263 		case CPUFAMILY_INTEL_BROADWELL:
264 			lbr_need_tsx_workaround = cpuid_tsx_supported ? false : true;
265 			cpu_lbr_setp = &nhm_cpu_lbrs;
266 			break;
267 
268 		case CPUFAMILY_INTEL_SKYLAKE:
269 		case CPUFAMILY_INTEL_KABYLAKE:
270 		case CPUFAMILY_INTEL_ICELAKE:
271 		case CPUFAMILY_INTEL_COMETLAKE:
272 			cpu_lbr_setp = &skl_cpu_lbrs;
273 			break;
274 
275 		default:
276 			panic("Unknown CPU family");
277 		}
278 		if (last_branch_enabled_modes == LBR_ENABLED_KERNELMODE) {
279 			/* This depends on cpu_lbr_setp being setup first */
280 			lbr_for_kmode_init(cpu_lbr_setp->lbr_count);
281 		}
282 	}
283 
284 	/* Configure LBR_SELECT for CPL > 0 records only or CPL = 0 for use in panic logs and core files */
285 	switch (last_branch_enabled_modes) {
286 	case LBR_ENABLED_USERMODE:
287 		wrmsr64(MSR_IA32_LBR_SELECT, LBR_SELECT_CPL_EQ_0);
288 		break;
289 	case LBR_ENABLED_KERNELMODE:
290 #if DEBUG || DEVELOPMENT
291 		wrmsr64(MSR_IA32_LBR_SELECT, 0);
292 #else
293 		wrmsr64(MSR_IA32_LBR_SELECT, LBR_SELECT_CPL_NEQ_0);
294 #endif
295 		break;
296 	case LBR_ENABLED_NONE:
297 	case LBR_ENABLED_ALLMODES:
298 	default:
299 		break;
300 	}
301 
302 	/* Enable LBRs */
303 	wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA);
304 }
305 
306 static uint64_t
lbr_mode_based_filter(uint64_t record,__unused boolean_t from_userspace)307 lbr_mode_based_filter(uint64_t record, __unused boolean_t from_userspace)
308 {
309 	uint64_t filtered_record;
310 #define LBR_SENTINEL_KERNEL_MODE (0x66726d6b65726e6cULL /* "frmkernl" */ )
311 #define LBR_SENTINEL_USER_MODE (0x757365726C616E64ULL /* "userland" */ )
312 	switch (last_branch_enabled_modes) {
313 	case LBR_ENABLED_USERMODE:
314 		filtered_record = (record > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : record;
315 		break;
316 	case LBR_ENABLED_KERNELMODE:
317 		/* For internal builds don't filter out userspace addresses from panic logs and core files. */
318 #if DEBUG || DEVELOPMENT
319 		filtered_record = record;
320 #else
321 		/* If coming from user space use the correct filter in release builds
322 		 * When LBRs are enabled for kernel mode and user space requests LBR data: remove kernel addresses
323 		 * "								   " and kernel mode requests LBR data: remove usermode addresses
324 		 */
325 		if (from_userspace) {
326 			filtered_record = (record > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : record;
327 		} else {
328 			filtered_record = (VM_KERNEL_ADDRESS(record)) ? record : LBR_SENTINEL_USER_MODE;
329 		}
330 #endif
331 		break;
332 	case LBR_ENABLED_ALLMODES:
333 	case LBR_ENABLED_NONE:
334 	default:
335 		/* Set LBR to 0 for unsupported use cases */
336 		filtered_record = 0x0;
337 		break;
338 	}
339 	return filtered_record;
340 }
341 
342 static int
i386_lbr_native_state_to_mach_thread_state(pcb_t pcb,last_branch_state_t * machlbrp,boolean_t from_userspace)343 i386_lbr_native_state_to_mach_thread_state(pcb_t pcb, last_branch_state_t *machlbrp, boolean_t from_userspace)
344 {
345 	int last_entry;
346 	int i, j, lbr_tos;
347 	uint64_t from_rip, to_rip;
348 
349 	machlbrp->lbr_count = cpu_lbr_setp->lbr_count;
350 	lbr_tos = pcb->lbrs.lbr_tos & (X86_MAX_LBRS - 1);
351 	last_entry = (lbr_tos == (cpu_lbr_setp->lbr_count - 1)) ? 0 : (lbr_tos + 1);
352 
353 	switch (cpu_lbr_type) {
354 	case PERFCAP_LBR_TYPE_MISPRED:                  /* NHM */
355 
356 		machlbrp->lbr_supported_tsx = 0;
357 		machlbrp->lbr_supported_cycle_count = 0;
358 		for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
359 			to_rip = pcb->lbrs.lbrs[i].to_rip;
360 			machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace);
361 			from_rip = LBR_TYPE_MISPRED_FROMRIP(pcb->lbrs.lbrs[i].from_rip);
362 			machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace);
363 			machlbrp->lbrs[j].mispredict = LBR_TYPE_MISPRED_MISPREDICT(pcb->lbrs.lbrs[i].from_rip);
364 			machlbrp->lbrs[j].tsx_abort = machlbrp->lbrs[j].in_tsx = 0;     /* Not Supported */
365 			if (i == last_entry) {
366 				break;
367 			}
368 		}
369 		break;
370 
371 	case PERFCAP_LBR_TYPE_TSXINFO:                  /* HSW/BDW */
372 
373 		machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0;
374 		machlbrp->lbr_supported_cycle_count = 0;
375 		for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
376 			to_rip = pcb->lbrs.lbrs[i].to_rip;
377 			machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace);
378 
379 			from_rip = LBR_TYPE_TSXINFO_FROMRIP(pcb->lbrs.lbrs[i].from_rip);
380 			machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace);
381 			machlbrp->lbrs[j].mispredict = LBR_TYPE_TSXINFO_MISPREDICT(pcb->lbrs.lbrs[i].from_rip);
382 			if (cpuid_tsx_supported) {
383 				machlbrp->lbrs[j].tsx_abort = LBR_TYPE_TSXINFO_TSX_ABORT(pcb->lbrs.lbrs[i].from_rip);
384 				machlbrp->lbrs[j].in_tsx = LBR_TYPE_TSXINFO_IN_TSX(pcb->lbrs.lbrs[i].from_rip);
385 			} else {
386 				machlbrp->lbrs[j].tsx_abort = 0;
387 				machlbrp->lbrs[j].in_tsx = 0;
388 			}
389 			if (i == last_entry) {
390 				break;
391 			}
392 		}
393 		break;
394 
395 	case PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO:         /* SKL+ */
396 
397 		machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0;
398 		machlbrp->lbr_supported_cycle_count = 1;
399 		for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
400 			from_rip = pcb->lbrs.lbrs[i].from_rip;
401 			machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace);
402 			to_rip = pcb->lbrs.lbrs[i].to_rip;
403 			machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace);
404 			machlbrp->lbrs[j].mispredict = LBR_TYPE_EIP_WITH_LBRINFO_MISPREDICT(pcb->lbrs.lbrs[i].info);
405 			machlbrp->lbrs[j].tsx_abort = LBR_TYPE_EIP_WITH_LBRINFO_TSX_ABORT(pcb->lbrs.lbrs[i].info);
406 			machlbrp->lbrs[j].in_tsx = LBR_TYPE_EIP_WITH_LBRINFO_IN_TSX(pcb->lbrs.lbrs[i].info);
407 			machlbrp->lbrs[j].cycle_count = LBR_TYPE_EIP_WITH_LBRINFO_CYC_COUNT(pcb->lbrs.lbrs[i].info);
408 			if (i == last_entry) {
409 				break;
410 			}
411 		}
412 		break;
413 
414 	default:
415 #if DEBUG || DEVELOPMENT
416 		/* This should be impossible, based on the filtering we do in i386_lbr_init() */
417 		panic("Unknown LBR format: %d!", cpu_lbr_type);
418 		/*NOTREACHED*/
419 #else
420 		return -1;
421 #endif
422 	}
423 
424 	return 0;
425 }
426 
427 int
i386_filtered_lbr_state_to_mach_thread_state(thread_t thr_act,last_branch_state_t * machlbrp,boolean_t from_userspace)428 i386_filtered_lbr_state_to_mach_thread_state(thread_t thr_act, last_branch_state_t *machlbrp, boolean_t from_userspace)
429 {
430 	boolean_t istate;
431 
432 	istate = ml_set_interrupts_enabled(FALSE);
433 	/* If the current thread is asking for its own LBR data, synch the LBRs first */
434 	if (thr_act == current_thread()) {
435 		i386_lbr_synch(thr_act);
436 	}
437 	ml_set_interrupts_enabled(istate);
438 
439 	return i386_lbr_native_state_to_mach_thread_state(THREAD_TO_PCB(thr_act), machlbrp, from_userspace);
440 }
441 
442 void
i386_lbr_synch(thread_t thr)443 i386_lbr_synch(thread_t thr)
444 {
445 	pcb_t old_pcb = THREAD_TO_PCB(thr);
446 	int i;
447 
448 	/* First, save current LBRs to the old thread's PCB */
449 	if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) {
450 		for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
451 			old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from);
452 			old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to);
453 			old_pcb->lbrs.lbrs[i].info = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info);
454 		}
455 	} else {
456 		for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
457 			old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from);
458 			old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to);
459 		}
460 	}
461 
462 	/* Finally, save the TOS */
463 	old_pcb->lbrs.lbr_tos = rdmsr64(MSR_IA32_LASTBRANCH_TOS);
464 }
465 
466 static void
i386_switch_lbrs(thread_t old,thread_t new)467 i386_switch_lbrs(thread_t old, thread_t new)
468 {
469 	pcb_t   new_pcb;
470 	int     i;
471 	bool    save_old = (old != NULL && get_threadtask(old) != kernel_task);
472 	bool    restore_new = (get_threadtask(new) != kernel_task);
473 
474 	if (!save_old && !restore_new) {
475 		return;
476 	}
477 
478 	assert(cpu_lbr_setp != NULL);
479 
480 	new_pcb = THREAD_TO_PCB(new);
481 
482 	i386_lbr_disable();
483 
484 	if (save_old) {
485 		i386_lbr_synch(old);
486 	}
487 
488 	if (restore_new) {
489 		/* Now restore the new threads's LBRs */
490 		if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) {
491 			for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
492 				wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip);
493 				wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip);
494 				wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info, new_pcb->lbrs.lbrs[i].info);
495 			}
496 		} else {
497 			if (lbr_need_tsx_workaround) {
498 				for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
499 					/*
500 					 * If TSX has been disabled, the hardware expects those two bits to be sign
501 					 * extensions of bit 47 (even though it didn't return them that way via the rdmsr!)
502 					 */
503 #define BIT_47 (1ULL << 47)
504 					wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from,
505 					    new_pcb->lbrs.lbrs[i].from_rip |
506 					    ((new_pcb->lbrs.lbrs[i].from_rip & BIT_47) ? 0x6000000000000000ULL : 0));
507 					wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to,
508 					    new_pcb->lbrs.lbrs[i].to_rip |
509 					    ((new_pcb->lbrs.lbrs[i].to_rip & BIT_47) ? 0x6000000000000000ULL : 0));
510 				}
511 			} else {
512 				for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
513 					wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip);
514 					wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip);
515 				}
516 			}
517 		}
518 
519 		/* Lastly, restore the new threads's TOS */
520 		wrmsr64(MSR_IA32_LASTBRANCH_TOS, new_pcb->lbrs.lbr_tos);
521 	}
522 
523 	i386_lbr_enable();
524 }
525 
526 void
act_machine_switch_pcb(thread_t old,thread_t new)527 act_machine_switch_pcb(thread_t old, thread_t new)
528 {
529 	pcb_t                   pcb = THREAD_TO_PCB(new);
530 	cpu_data_t              *cdp = current_cpu_datap();
531 	struct real_descriptor  *ldtp;
532 	mach_vm_offset_t        pcb_stack_top;
533 
534 	assert(new->kernel_stack != 0);
535 	assert(ml_get_interrupts_enabled() == FALSE);
536 #ifdef  DIRECTION_FLAG_DEBUG
537 	if (x86_get_flags() & EFL_DF) {
538 		panic("Direction flag detected: 0x%lx", x86_get_flags());
539 	}
540 #endif
541 
542 	/*
543 	 * Clear segment state
544 	 * unconditionally for DS/ES/FS but more carefully for GS whose
545 	 * cached state we track.
546 	 */
547 	set_ds(NULL_SEG);
548 	set_es(NULL_SEG);
549 	set_fs(NULL_SEG);
550 
551 	if (get_gs() != NULL_SEG) {
552 		swapgs();               /* switch to user's GS context */
553 		set_gs(NULL_SEG);
554 		swapgs();               /* and back to kernel */
555 
556 		/* record the active machine state lost */
557 		cdp->cpu_uber.cu_user_gs_base = 0;
558 	}
559 
560 	vm_offset_t                     isf;
561 
562 	/*
563 	 * Set pointer to PCB's interrupt stack frame in cpu data.
564 	 * Used by syscall and double-fault trap handlers.
565 	 */
566 	isf = (vm_offset_t) &pcb->iss->ss_64.isf;
567 	cdp->cpu_uber.cu_isf = isf;
568 	pcb_stack_top = (vm_offset_t) (pcb->iss + 1);
569 	/* require 16-byte alignment */
570 	assert((pcb_stack_top & 0xF) == 0);
571 
572 	current_ktss64()->rsp0 = cdp->cpu_desc_index.cdi_sstku;
573 	/*
574 	 * Top of temporary sysenter stack points to pcb stack.
575 	 * Although this is not normally used by 64-bit users,
576 	 * it needs to be set in case a sysenter is attempted.
577 	 */
578 	*current_sstk64() = pcb_stack_top;
579 
580 	cdp->cd_estack = cpu_shadowp(cdp->cpu_number)->cd_estack = cdp->cpu_desc_index.cdi_sstku;
581 
582 	if (is_saved_state64(pcb->iss)) {
583 		cdp->cpu_task_map = new->map->pmap->pm_task_map;
584 
585 		/*
586 		 * Enable the 64-bit user code segment, USER64_CS.
587 		 * Disable the 32-bit user code segment, USER_CS.
588 		 */
589 		gdt_desc_p(USER64_CS)->access |= ACC_PL_U;
590 		gdt_desc_p(USER_CS)->access &= ~ACC_PL_U;
591 
592 		/*
593 		 * Switch user's GS base if necessary
594 		 * by setting the Kernel's GS base MSR
595 		 * - this will become the user's on the swapgs when
596 		 * returning to user-space.  Avoid this for
597 		 * kernel threads (no user TLS support required)
598 		 * and verify the memory shadow of the segment base
599 		 * in the event it was altered in user space.
600 		 */
601 		if ((pcb->cthread_self != 0) || (get_threadtask(new) != kernel_task)) {
602 			if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
603 			    (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
604 				cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
605 				wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self);
606 			}
607 		}
608 	} else {
609 		cdp->cpu_task_map = TASK_MAP_32BIT;
610 
611 		/*
612 		 * Disable USER64_CS
613 		 * Enable USER_CS
614 		 */
615 
616 		/* It's possible that writing to the GDT areas
617 		 * is expensive, if the processor intercepts those
618 		 * writes to invalidate its internal segment caches
619 		 * TODO: perhaps only do this if switching bitness
620 		 */
621 		gdt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
622 		gdt_desc_p(USER_CS)->access |= ACC_PL_U;
623 
624 		/*
625 		 * Set the thread`s cthread (a.k.a pthread)
626 		 * For 32-bit user this involves setting the USER_CTHREAD
627 		 * descriptor in the LDT to point to the cthread data.
628 		 * The involves copying in the pre-initialized descriptor.
629 		 */
630 		ldtp = current_ldt();
631 		ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
632 		if (pcb->uldt_selector != 0) {
633 			ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
634 		}
635 		cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
636 	}
637 
638 	cdp->cpu_curthread_do_segchk = new->machine.mthr_do_segchk;
639 
640 	if (last_branch_enabled_modes == LBR_ENABLED_USERMODE) {
641 		i386_switch_lbrs(old, new);
642 	}
643 
644 	/*
645 	 * Set the thread's LDT or LDT entry.
646 	 */
647 	task_t task = get_threadtask_early(new);
648 	if (__probable(task == TASK_NULL || task->i386_ldt == 0)) {
649 		/*
650 		 * Use system LDT.
651 		 */
652 		ml_cpu_set_ldt(KERNEL_LDT);
653 		cdp->cpu_curtask_has_ldt = 0;
654 	} else {
655 		/*
656 		 * Task has its own LDT.
657 		 */
658 		user_ldt_set(new);
659 		cdp->cpu_curtask_has_ldt = 1;
660 	}
661 }
662 
663 kern_return_t
thread_set_wq_state32(thread_t thread,thread_state_t tstate)664 thread_set_wq_state32(thread_t thread, thread_state_t tstate)
665 {
666 	x86_thread_state32_t    *state;
667 	x86_saved_state32_t     *saved_state;
668 	thread_t curth = current_thread();
669 	spl_t                   s = 0;
670 
671 	pal_register_cache_state(thread, DIRTY);
672 
673 	saved_state = USER_REGS32(thread);
674 
675 	state = (x86_thread_state32_t *)tstate;
676 
677 	if (curth != thread) {
678 		s = splsched();
679 		thread_lock(thread);
680 	}
681 
682 	saved_state->ebp = 0;
683 	saved_state->eip = state->eip;
684 	saved_state->eax = state->eax;
685 	saved_state->ebx = state->ebx;
686 	saved_state->ecx = state->ecx;
687 	saved_state->edx = state->edx;
688 	saved_state->edi = state->edi;
689 	saved_state->esi = state->esi;
690 	saved_state->uesp = state->esp;
691 	saved_state->efl = EFL_USER_SET;
692 
693 	saved_state->cs = USER_CS;
694 	saved_state->ss = USER_DS;
695 	saved_state->ds = USER_DS;
696 	saved_state->es = USER_DS;
697 
698 	if (curth != thread) {
699 		thread_unlock(thread);
700 		splx(s);
701 	}
702 
703 	return KERN_SUCCESS;
704 }
705 
706 
707 kern_return_t
thread_set_wq_state64(thread_t thread,thread_state_t tstate)708 thread_set_wq_state64(thread_t thread, thread_state_t tstate)
709 {
710 	x86_thread_state64_t    *state;
711 	x86_saved_state64_t     *saved_state;
712 	thread_t curth = current_thread();
713 	spl_t                   s = 0;
714 
715 	saved_state = USER_REGS64(thread);
716 	state = (x86_thread_state64_t *)tstate;
717 
718 	/* Disallow setting non-canonical PC or stack */
719 	if (!IS_USERADDR64_CANONICAL(state->rsp) ||
720 	    !IS_USERADDR64_CANONICAL(state->rip)) {
721 		return KERN_FAILURE;
722 	}
723 
724 	pal_register_cache_state(thread, DIRTY);
725 
726 	if (curth != thread) {
727 		s = splsched();
728 		thread_lock(thread);
729 	}
730 
731 	saved_state->rbp = 0;
732 	saved_state->rdi = state->rdi;
733 	saved_state->rsi = state->rsi;
734 	saved_state->rdx = state->rdx;
735 	saved_state->rcx = state->rcx;
736 	saved_state->r8  = state->r8;
737 	saved_state->r9  = state->r9;
738 
739 	saved_state->isf.rip = state->rip;
740 	saved_state->isf.rsp = state->rsp;
741 	saved_state->isf.cs = USER64_CS;
742 	saved_state->isf.rflags = EFL_USER_SET;
743 
744 	if (curth != thread) {
745 		thread_unlock(thread);
746 		splx(s);
747 	}
748 
749 	return KERN_SUCCESS;
750 }
751 
752 /*
753  * Initialize the machine-dependent state for a new thread.
754  */
755 void
machine_thread_create(thread_t thread,task_t task,bool first_thread __unused)756 machine_thread_create(
757 	thread_t                thread,
758 	task_t                  task,
759 	bool                    first_thread __unused)
760 {
761 	pcb_t                   pcb = THREAD_TO_PCB(thread);
762 
763 	if ((task->t_flags & TF_TECS) || __improbable(force_thread_policy_tecs)) {
764 		thread->machine.mthr_do_segchk = MTHR_SEGCHK;
765 	} else {
766 		thread->machine.mthr_do_segchk = 0;
767 	}
768 
769 	if (task != kernel_task &&
770 	    __improbable((cpuid_wa_required(CPU_INTEL_RSBST) & CWA_ON) != 0)) {
771 		thread->machine.mthr_do_segchk |= MTHR_RSBST;
772 	}
773 
774 	/*
775 	 * Allocate save frame only if required.
776 	 */
777 	if (pcb->iss == NULL) {
778 		assert((get_preemption_level() == 0));
779 		pcb->iss = zalloc_flags(iss_zone, Z_WAITOK | Z_NOFAIL);
780 	}
781 
782 	/*
783 	 * Ensure that the synthesized 32-bit state including
784 	 * the 64-bit interrupt state can be acommodated in the
785 	 * 64-bit state we allocate for both 32-bit and 64-bit threads.
786 	 */
787 	assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <=
788 	    sizeof(pcb->iss->ss_64));
789 
790 	bzero((char *)pcb->iss, sizeof(x86_saved_state_t));
791 
792 	bzero(&pcb->lbrs, sizeof(x86_lbrs_t));
793 
794 	if (task_has_64Bit_addr(task)) {
795 		pcb->iss->flavor = x86_SAVED_STATE64;
796 
797 		pcb->iss->ss_64.isf.cs = USER64_CS;
798 		pcb->iss->ss_64.isf.ss = USER_DS;
799 		pcb->iss->ss_64.fs = USER_DS;
800 		pcb->iss->ss_64.gs = USER_DS;
801 		pcb->iss->ss_64.isf.rflags = EFL_USER_SET;
802 	} else {
803 		pcb->iss->flavor = x86_SAVED_STATE32;
804 
805 		pcb->iss->ss_32.cs = USER_CS;
806 		pcb->iss->ss_32.ss = USER_DS;
807 		pcb->iss->ss_32.ds = USER_DS;
808 		pcb->iss->ss_32.es = USER_DS;
809 		pcb->iss->ss_32.fs = USER_DS;
810 		pcb->iss->ss_32.gs = USER_DS;
811 		pcb->iss->ss_32.efl = EFL_USER_SET;
812 	}
813 
814 	simple_lock_init(&pcb->lock, 0);
815 
816 	pcb->cthread_self = 0;
817 	pcb->uldt_selector = 0;
818 	pcb->thread_gpu_ns = 0;
819 	/* Ensure that the "cthread" descriptor describes a valid
820 	 * segment.
821 	 */
822 	if ((pcb->cthread_desc.access & ACC_P) == 0) {
823 		pcb->cthread_desc = *gdt_desc_p(USER_DS);
824 	}
825 
826 
827 	pcb->insn_state_copyin_failure_errorcode = 0;
828 	if (pcb->insn_state != 0) {     /* Reinit for new thread */
829 		bzero(pcb->insn_state, sizeof(x86_instruction_state_t));
830 		pcb->insn_state->insn_stream_valid_bytes = -1;
831 	}
832 
833 	pcb->insn_copy_optout = (task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
834 }
835 
836 /*
837  * Machine-dependent cleanup prior to destroying a thread
838  */
839 void
machine_thread_destroy(thread_t thread)840 machine_thread_destroy(
841 	thread_t                thread)
842 {
843 	pcb_t   pcb = THREAD_TO_PCB(thread);
844 
845 #if HYPERVISOR
846 	if (thread->hv_thread_target) {
847 		hv_callbacks.thread_destroy(thread->hv_thread_target);
848 		thread->hv_thread_target = NULL;
849 	}
850 #endif
851 
852 	if (pcb->ifps != 0) {
853 		fpu_free(thread, pcb->ifps);
854 	}
855 	if (pcb->iss != 0) {
856 		zfree(iss_zone, pcb->iss);
857 		pcb->iss = 0;
858 	}
859 	if (pcb->ids) {
860 		zfree(ids_zone, pcb->ids);
861 		pcb->ids = NULL;
862 	}
863 
864 	if (pcb->insn_state != 0) {
865 		kfree_data(pcb->insn_state, sizeof(x86_instruction_state_t));
866 		pcb->insn_state = 0;
867 	}
868 	pcb->insn_state_copyin_failure_errorcode = 0;
869 	pcb->insn_copy_optout = false;
870 }
871 
872 /*
873  * machine_thread_process_signature
874  *
875  * Called to allow code signature dependent adjustments to the thread
876  * state. Note that this is usually called twice for the main thread:
877  * Once at thread creation by thread_create, when the signature is
878  * potentially not attached yet (which is usually the case for the
879  * first/main thread of a task), and once after the task's signature
880  * has actually been attached.
881  *
882  */
883 kern_return_t
machine_thread_process_signature(thread_t __unused thread,task_t __unused task)884 machine_thread_process_signature(thread_t __unused thread, task_t __unused task)
885 {
886 	return KERN_SUCCESS;
887 }
888 
889 kern_return_t
machine_thread_set_tsd_base(thread_t thread,mach_vm_offset_t tsd_base)890 machine_thread_set_tsd_base(
891 	thread_t                        thread,
892 	mach_vm_offset_t        tsd_base)
893 {
894 	if (get_threadtask(thread) == kernel_task) {
895 		return KERN_INVALID_ARGUMENT;
896 	}
897 
898 	if (thread_is_64bit_addr(thread)) {
899 		/* check for canonical address, set 0 otherwise  */
900 		if (!IS_USERADDR64_CANONICAL(tsd_base)) {
901 			tsd_base = 0ULL;
902 		}
903 	} else {
904 		if (tsd_base > UINT32_MAX) {
905 			tsd_base = 0ULL;
906 		}
907 	}
908 
909 	pcb_t pcb = THREAD_TO_PCB(thread);
910 	pcb->cthread_self = tsd_base;
911 
912 	if (!thread_is_64bit_addr(thread)) {
913 		/* Set up descriptor for later use */
914 		struct real_descriptor desc = {
915 			.limit_low = 1,
916 			.limit_high = 0,
917 			.base_low = tsd_base & 0xffff,
918 			.base_med = (tsd_base >> 16) & 0xff,
919 			.base_high = (tsd_base >> 24) & 0xff,
920 			.access = ACC_P | ACC_PL_U | ACC_DATA_W,
921 			.granularity = SZ_32 | SZ_G,
922 		};
923 
924 		pcb->cthread_desc = desc;
925 		saved_state32(pcb->iss)->gs = USER_CTHREAD;
926 	}
927 
928 	/* For current thread, make the TSD base active immediately */
929 	if (thread == current_thread()) {
930 		if (thread_is_64bit_addr(thread)) {
931 			cpu_data_t              *cdp;
932 
933 			mp_disable_preemption();
934 			cdp = current_cpu_datap();
935 			if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
936 			    (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
937 				wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base);
938 			}
939 			cdp->cpu_uber.cu_user_gs_base = tsd_base;
940 			mp_enable_preemption();
941 		} else {
942 			/* assign descriptor */
943 			mp_disable_preemption();
944 			*ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc;
945 			mp_enable_preemption();
946 		}
947 	}
948 
949 	return KERN_SUCCESS;
950 }
951 
952 void
machine_tecs(thread_t thr)953 machine_tecs(thread_t thr)
954 {
955 	if (tecs_mode_supported) {
956 		thr->machine.mthr_do_segchk = 1;
957 	}
958 }
959 
960 void
machine_thread_set_insn_copy_optout(thread_t thr)961 machine_thread_set_insn_copy_optout(thread_t thr)
962 {
963 	thr->machine.insn_copy_optout = true;
964 }
965 
966 int
machine_csv(cpuvn_e cve)967 machine_csv(cpuvn_e cve)
968 {
969 	switch (cve) {
970 	case CPUVN_CI:
971 		return (cpuid_wa_required(CPU_INTEL_SEGCHK) & CWA_ON) != 0;
972 
973 	default:
974 		break;
975 	}
976 
977 	return 0;
978 }
979