1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #include <mach_ldebug.h>
58
59 #include <sys/kdebug.h>
60
61 #include <mach/kern_return.h>
62 #include <mach/thread_status.h>
63 #include <mach/vm_param.h>
64
65 #include <kern/mach_param.h>
66 #include <kern/processor.h>
67 #include <kern/cpu_data.h>
68 #include <kern/cpu_number.h>
69 #include <kern/task.h>
70 #include <kern/thread.h>
71 #include <kern/sched_prim.h>
72 #include <kern/misc_protos.h>
73 #include <kern/assert.h>
74 #include <kern/spl.h>
75 #include <kern/machine.h>
76 #include <ipc/ipc_port.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_map.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_protos.h>
81
82 #include <i386/commpage/commpage.h>
83 #include <i386/cpu_data.h>
84 #include <i386/cpu_number.h>
85 #include <i386/cpuid.h>
86 #include <i386/eflags.h>
87 #include <i386/proc_reg.h>
88 #include <i386/tss.h>
89 #include <i386/user_ldt.h>
90 #include <i386/fpu.h>
91 #include <i386/mp_desc.h>
92 #include <i386/misc_protos.h>
93 #include <i386/thread.h>
94 #include <i386/seg.h>
95 #include <i386/machine_routines.h>
96 #include <i386/lbr.h>
97
98 #if HYPERVISOR
99 #include <kern/hv_support.h>
100 #endif
101
102 #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \
103 extern char assert_is_16byte_multiple_sizeof_ ## _type_ \
104 [(sizeof(_type_) % 16) == 0 ? 1 : -1]
105
106 /* Compile-time checks for vital save area sizing: */
107 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t);
108 ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t);
109
110 #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
111
112 extern zone_t iss_zone; /* zone for saved_state area */
113 extern zone_t ids_zone; /* zone for debug_state area */
114 extern int tecs_mode_supported;
115 extern boolean_t cpuid_tsx_supported;
116
117 bool lbr_need_tsx_workaround = false;
118
119 int force_thread_policy_tecs;
120
121 struct lbr_group {
122 uint32_t msr_from;
123 uint32_t msr_to;
124 uint32_t msr_info;
125 };
126
127 struct cpu_lbrs {
128 uint32_t lbr_count;
129 struct lbr_group msr_lbrs[X86_MAX_LBRS];
130 };
131
132 const struct cpu_lbrs *cpu_lbr_setp = NULL;
133 int cpu_lbr_type;
134
135 const struct cpu_lbrs nhm_cpu_lbrs = {
136 16 /* LBR count */,
137 {
138 { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0 /* INFO_0 */ },
139 { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0 /* INFO_1 */ },
140 { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0 /* INFO_2 */ },
141 { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0 /* INFO_3 */ },
142 { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0 /* INFO_4 */ },
143 { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0 /* INFO_5 */ },
144 { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0 /* INFO_6 */ },
145 { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0 /* INFO_7 */ },
146 { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0 /* INFO_8 */ },
147 { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0 /* INFO_9 */ },
148 { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0 /* INFO_10 */ },
149 { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0 /* INFO_11 */ },
150 { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0 /* INFO_12 */ },
151 { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0 /* INFO_13 */ },
152 { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0 /* INFO_14 */ },
153 { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0 /* INFO_15 */ }
154 }
155 },
156 skl_cpu_lbrs = {
157 32 /* LBR count */,
158 {
159 { 0x680 /* FROM_0 */, 0x6c0 /* TO_0 */, 0xdc0 /* INFO_0 */ },
160 { 0x681 /* FROM_1 */, 0x6c1 /* TO_1 */, 0xdc1 /* INFO_1 */ },
161 { 0x682 /* FROM_2 */, 0x6c2 /* TO_2 */, 0xdc2 /* INFO_2 */ },
162 { 0x683 /* FROM_3 */, 0x6c3 /* TO_3 */, 0xdc3 /* INFO_3 */ },
163 { 0x684 /* FROM_4 */, 0x6c4 /* TO_4 */, 0xdc4 /* INFO_4 */ },
164 { 0x685 /* FROM_5 */, 0x6c5 /* TO_5 */, 0xdc5 /* INFO_5 */ },
165 { 0x686 /* FROM_6 */, 0x6c6 /* TO_6 */, 0xdc6 /* INFO_6 */ },
166 { 0x687 /* FROM_7 */, 0x6c7 /* TO_7 */, 0xdc7 /* INFO_7 */ },
167 { 0x688 /* FROM_8 */, 0x6c8 /* TO_8 */, 0xdc8 /* INFO_8 */ },
168 { 0x689 /* FROM_9 */, 0x6c9 /* TO_9 */, 0xdc9 /* INFO_9 */ },
169 { 0x68A /* FROM_10 */, 0x6ca /* TO_10 */, 0xdca /* INFO_10 */ },
170 { 0x68B /* FROM_11 */, 0x6cb /* TO_11 */, 0xdcb /* INFO_11 */ },
171 { 0x68C /* FROM_12 */, 0x6cc /* TO_12 */, 0xdcc /* INFO_12 */ },
172 { 0x68D /* FROM_13 */, 0x6cd /* TO_13 */, 0xdcd /* INFO_13 */ },
173 { 0x68E /* FROM_14 */, 0x6ce /* TO_14 */, 0xdce /* INFO_14 */ },
174 { 0x68F /* FROM_15 */, 0x6cf /* TO_15 */, 0xdcf /* INFO_15 */ },
175 { 0x690 /* FROM_16 */, 0x6d0 /* TO_16 */, 0xdd0 /* INFO_16 */ },
176 { 0x691 /* FROM_17 */, 0x6d1 /* TO_17 */, 0xdd1 /* INFO_17 */ },
177 { 0x692 /* FROM_18 */, 0x6d2 /* TO_18 */, 0xdd2 /* INFO_18 */ },
178 { 0x693 /* FROM_19 */, 0x6d3 /* TO_19 */, 0xdd3 /* INFO_19 */ },
179 { 0x694 /* FROM_20 */, 0x6d4 /* TO_20 */, 0xdd4 /* INFO_20 */ },
180 { 0x695 /* FROM_21 */, 0x6d5 /* TO_21 */, 0xdd5 /* INFO_21 */ },
181 { 0x696 /* FROM_22 */, 0x6d6 /* TO_22 */, 0xdd6 /* INFO_22 */ },
182 { 0x697 /* FROM_23 */, 0x6d7 /* TO_23 */, 0xdd7 /* INFO_23 */ },
183 { 0x698 /* FROM_24 */, 0x6d8 /* TO_24 */, 0xdd8 /* INFO_24 */ },
184 { 0x699 /* FROM_25 */, 0x6d9 /* TO_25 */, 0xdd9 /* INFO_25 */ },
185 { 0x69a /* FROM_26 */, 0x6da /* TO_26 */, 0xdda /* INFO_26 */ },
186 { 0x69b /* FROM_27 */, 0x6db /* TO_27 */, 0xddb /* INFO_27 */ },
187 { 0x69c /* FROM_28 */, 0x6dc /* TO_28 */, 0xddc /* INFO_28 */ },
188 { 0x69d /* FROM_29 */, 0x6dd /* TO_29 */, 0xddd /* INFO_29 */ },
189 { 0x69e /* FROM_30 */, 0x6de /* TO_30 */, 0xdde /* INFO_30 */ },
190 { 0x69f /* FROM_31 */, 0x6df /* TO_31 */, 0xddf /* INFO_31 */ }
191 }
192 };
193
194 void
i386_lbr_disable(void)195 i386_lbr_disable(void)
196 {
197 /* Enable LBRs */
198 wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) & ~DEBUGCTL_LBR_ENA);
199 }
200
201 /*
202 * Disable ASAN for i386_lbr_enable and i386_lbr_init, otherwise we get a KASAN panic
203 * because the shadow map is not been initialized when these functions are called in
204 * early boot.
205 */
206 void __attribute__((no_sanitize("address")))
i386_lbr_enable(void)207 i386_lbr_enable(void)
208 {
209 /* last_branch_kmode_only_enabled controls LBR data collection for core files and paniclogs */
210 switch (last_branch_enabled_modes) {
211 case LBR_ENABLED_USERMODE:
212 case LBR_ENABLED_KERNELMODE:
213 /* Enable LBRs */
214 wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA);
215 break;
216 case LBR_ENABLED_NONE:
217 case LBR_ENABLED_ALLMODES:
218 default:
219 break;
220 }
221 }
222
223 void __attribute__((no_sanitize("address")))
i386_lbr_init(i386_cpu_info_t * info_p,bool is_master)224 i386_lbr_init(i386_cpu_info_t *info_p, bool is_master)
225 {
226 if (last_branch_enabled_modes == LBR_ENABLED_NONE) {
227 i386_lbr_disable();
228 return;
229 }
230 if (last_branch_enabled_modes == LBR_ENABLED_ALLMODES) {
231 panic("Collecting LBR data from both user and kernel mode is not supported.");
232 }
233
234 if (is_master) {
235 /* All NHM+ CPUs support PERF_CAPABILITIES, so no need to check cpuid for its presence */
236 cpu_lbr_type = PERFCAP_LBR_TYPE(rdmsr64(MSR_IA32_PERF_CAPABILITIES));
237
238 /* Sanity-check the LBR type -- some VMMs do not properly support it */
239 if (cpu_lbr_type < PERFCAP_LBR_TYPE_MISPRED || cpu_lbr_type > PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO) {
240 kprintf("CPU-reported LBR type is invalid or is not supported (%d)."
241 " Disabling LBR support.\n", cpu_lbr_type);
242 last_branch_enabled_modes = LBR_ENABLED_NONE;
243 i386_lbr_disable();
244 return;
245 }
246
247 switch (info_p->cpuid_cpufamily) {
248 case CPUFAMILY_INTEL_NEHALEM:
249 case CPUFAMILY_INTEL_WESTMERE:
250 /* NHM family shares an LBR_SELECT MSR for both logical CPUs per core */
251 cpu_lbr_setp = &nhm_cpu_lbrs;
252 break;
253
254 case CPUFAMILY_INTEL_SANDYBRIDGE:
255 case CPUFAMILY_INTEL_IVYBRIDGE:
256 /* SNB+ has dedicated LBR_SELECT MSRs for each logical CPU per core */
257 cpu_lbr_setp = &nhm_cpu_lbrs;
258 break;
259
260 case CPUFAMILY_INTEL_HASWELL:
261 case CPUFAMILY_INTEL_BROADWELL:
262 lbr_need_tsx_workaround = cpuid_tsx_supported ? false : true;
263 cpu_lbr_setp = &nhm_cpu_lbrs;
264 break;
265
266 case CPUFAMILY_INTEL_SKYLAKE:
267 case CPUFAMILY_INTEL_KABYLAKE:
268 case CPUFAMILY_INTEL_ICELAKE:
269 case CPUFAMILY_INTEL_COMETLAKE:
270 cpu_lbr_setp = &skl_cpu_lbrs;
271 break;
272
273 default:
274 panic("Unknown CPU family");
275 }
276 if (last_branch_enabled_modes == LBR_ENABLED_KERNELMODE) {
277 /* This depends on cpu_lbr_setp being setup first */
278 lbr_for_kmode_init(cpu_lbr_setp->lbr_count);
279 }
280 }
281
282 /* Configure LBR_SELECT for CPL > 0 records only or CPL = 0 for use in panic logs and core files */
283 switch (last_branch_enabled_modes) {
284 case LBR_ENABLED_USERMODE:
285 wrmsr64(MSR_IA32_LBR_SELECT, LBR_SELECT_CPL_EQ_0);
286 break;
287 case LBR_ENABLED_KERNELMODE:
288 #if DEBUG || DEVELOPMENT
289 wrmsr64(MSR_IA32_LBR_SELECT, 0);
290 #else
291 wrmsr64(MSR_IA32_LBR_SELECT, LBR_SELECT_CPL_NEQ_0);
292 #endif
293 break;
294 case LBR_ENABLED_NONE:
295 case LBR_ENABLED_ALLMODES:
296 default:
297 break;
298 }
299
300 /* Enable LBRs */
301 wrmsr64(MSR_IA32_DEBUGCTLMSR, rdmsr64(MSR_IA32_DEBUGCTLMSR) | DEBUGCTL_LBR_ENA);
302 }
303
304 static uint64_t
lbr_mode_based_filter(uint64_t record,__unused boolean_t from_userspace)305 lbr_mode_based_filter(uint64_t record, __unused boolean_t from_userspace)
306 {
307 uint64_t filtered_record;
308 #define LBR_SENTINEL_KERNEL_MODE (0x66726d6b65726e6cULL /* "frmkernl" */ )
309 #define LBR_SENTINEL_USER_MODE (0x757365726C616E64ULL /* "userland" */ )
310 switch (last_branch_enabled_modes) {
311 case LBR_ENABLED_USERMODE:
312 filtered_record = (record > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : record;
313 break;
314 case LBR_ENABLED_KERNELMODE:
315 /* For internal builds don't filter out userspace addresses from panic logs and core files. */
316 #if DEBUG || DEVELOPMENT
317 filtered_record = record;
318 #else
319 /* If coming from user space use the correct filter in release builds
320 * When LBRs are enabled for kernel mode and user space requests LBR data: remove kernel addresses
321 * " " and kernel mode requests LBR data: remove usermode addresses
322 */
323 if (from_userspace) {
324 filtered_record = (record > VM_MAX_USER_PAGE_ADDRESS) ? LBR_SENTINEL_KERNEL_MODE : record;
325 } else {
326 filtered_record = (VM_KERNEL_ADDRESS(record)) ? record : LBR_SENTINEL_USER_MODE;
327 }
328 #endif
329 break;
330 case LBR_ENABLED_ALLMODES:
331 case LBR_ENABLED_NONE:
332 default:
333 /* Set LBR to 0 for unsupported use cases */
334 filtered_record = 0x0;
335 break;
336 }
337 return filtered_record;
338 }
339
340 static int
i386_lbr_native_state_to_mach_thread_state(pcb_t pcb,last_branch_state_t * machlbrp,boolean_t from_userspace)341 i386_lbr_native_state_to_mach_thread_state(pcb_t pcb, last_branch_state_t *machlbrp, boolean_t from_userspace)
342 {
343 int last_entry;
344 int i, j, lbr_tos;
345 uint64_t from_rip, to_rip;
346
347 machlbrp->lbr_count = cpu_lbr_setp->lbr_count;
348 lbr_tos = pcb->lbrs.lbr_tos & (X86_MAX_LBRS - 1);
349 last_entry = (lbr_tos == (cpu_lbr_setp->lbr_count - 1)) ? 0 : (lbr_tos + 1);
350
351 switch (cpu_lbr_type) {
352 case PERFCAP_LBR_TYPE_MISPRED: /* NHM */
353
354 machlbrp->lbr_supported_tsx = 0;
355 machlbrp->lbr_supported_cycle_count = 0;
356 for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
357 to_rip = pcb->lbrs.lbrs[i].to_rip;
358 machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace);
359 from_rip = LBR_TYPE_MISPRED_FROMRIP(pcb->lbrs.lbrs[i].from_rip);
360 machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace);
361 machlbrp->lbrs[j].mispredict = LBR_TYPE_MISPRED_MISPREDICT(pcb->lbrs.lbrs[i].from_rip);
362 machlbrp->lbrs[j].tsx_abort = machlbrp->lbrs[j].in_tsx = 0; /* Not Supported */
363 if (i == last_entry) {
364 break;
365 }
366 }
367 break;
368
369 case PERFCAP_LBR_TYPE_TSXINFO: /* HSW/BDW */
370
371 machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0;
372 machlbrp->lbr_supported_cycle_count = 0;
373 for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
374 to_rip = pcb->lbrs.lbrs[i].to_rip;
375 machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace);
376
377 from_rip = LBR_TYPE_TSXINFO_FROMRIP(pcb->lbrs.lbrs[i].from_rip);
378 machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace);
379 machlbrp->lbrs[j].mispredict = LBR_TYPE_TSXINFO_MISPREDICT(pcb->lbrs.lbrs[i].from_rip);
380 if (cpuid_tsx_supported) {
381 machlbrp->lbrs[j].tsx_abort = LBR_TYPE_TSXINFO_TSX_ABORT(pcb->lbrs.lbrs[i].from_rip);
382 machlbrp->lbrs[j].in_tsx = LBR_TYPE_TSXINFO_IN_TSX(pcb->lbrs.lbrs[i].from_rip);
383 } else {
384 machlbrp->lbrs[j].tsx_abort = 0;
385 machlbrp->lbrs[j].in_tsx = 0;
386 }
387 if (i == last_entry) {
388 break;
389 }
390 }
391 break;
392
393 case PERFCAP_LBR_TYPE_EIP_WITH_LBRINFO: /* SKL+ */
394
395 machlbrp->lbr_supported_tsx = cpuid_tsx_supported ? 1 : 0;
396 machlbrp->lbr_supported_cycle_count = 1;
397 for (j = 0, i = lbr_tos;; (i = (i == 0) ? (cpu_lbr_setp->lbr_count - 1) : (i - 1)), j++) {
398 from_rip = pcb->lbrs.lbrs[i].from_rip;
399 machlbrp->lbrs[j].from_ip = lbr_mode_based_filter(from_rip, from_userspace);
400 to_rip = pcb->lbrs.lbrs[i].to_rip;
401 machlbrp->lbrs[j].to_ip = lbr_mode_based_filter(to_rip, from_userspace);
402 machlbrp->lbrs[j].mispredict = LBR_TYPE_EIP_WITH_LBRINFO_MISPREDICT(pcb->lbrs.lbrs[i].info);
403 machlbrp->lbrs[j].tsx_abort = LBR_TYPE_EIP_WITH_LBRINFO_TSX_ABORT(pcb->lbrs.lbrs[i].info);
404 machlbrp->lbrs[j].in_tsx = LBR_TYPE_EIP_WITH_LBRINFO_IN_TSX(pcb->lbrs.lbrs[i].info);
405 machlbrp->lbrs[j].cycle_count = LBR_TYPE_EIP_WITH_LBRINFO_CYC_COUNT(pcb->lbrs.lbrs[i].info);
406 if (i == last_entry) {
407 break;
408 }
409 }
410 break;
411
412 default:
413 #if DEBUG || DEVELOPMENT
414 /* This should be impossible, based on the filtering we do in i386_lbr_init() */
415 panic("Unknown LBR format: %d!", cpu_lbr_type);
416 /*NOTREACHED*/
417 #else
418 return -1;
419 #endif
420 }
421
422 return 0;
423 }
424
425 int
i386_filtered_lbr_state_to_mach_thread_state(thread_t thr_act,last_branch_state_t * machlbrp,boolean_t from_userspace)426 i386_filtered_lbr_state_to_mach_thread_state(thread_t thr_act, last_branch_state_t *machlbrp, boolean_t from_userspace)
427 {
428 boolean_t istate;
429
430 istate = ml_set_interrupts_enabled(FALSE);
431 /* If the current thread is asking for its own LBR data, synch the LBRs first */
432 if (thr_act == current_thread()) {
433 i386_lbr_synch(thr_act);
434 }
435 ml_set_interrupts_enabled(istate);
436
437 return i386_lbr_native_state_to_mach_thread_state(THREAD_TO_PCB(thr_act), machlbrp, from_userspace);
438 }
439
440 void
i386_lbr_synch(thread_t thr)441 i386_lbr_synch(thread_t thr)
442 {
443 pcb_t old_pcb = THREAD_TO_PCB(thr);
444 int i;
445
446 /* First, save current LBRs to the old thread's PCB */
447 if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) {
448 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
449 old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from);
450 old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to);
451 old_pcb->lbrs.lbrs[i].info = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info);
452 }
453 } else {
454 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
455 old_pcb->lbrs.lbrs[i].from_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from);
456 old_pcb->lbrs.lbrs[i].to_rip = rdmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to);
457 }
458 }
459
460 /* Finally, save the TOS */
461 old_pcb->lbrs.lbr_tos = rdmsr64(MSR_IA32_LASTBRANCH_TOS);
462 }
463
464 void
i386_switch_lbrs(thread_t old,thread_t new)465 i386_switch_lbrs(thread_t old, thread_t new)
466 {
467 pcb_t new_pcb;
468 int i;
469 bool save_old = (old != NULL && get_threadtask(old) != kernel_task);
470 bool restore_new = (get_threadtask(new) != kernel_task);
471
472 if (!save_old && !restore_new) {
473 return;
474 }
475
476 assert(cpu_lbr_setp != NULL);
477
478 new_pcb = THREAD_TO_PCB(new);
479
480 i386_lbr_disable();
481
482 if (save_old) {
483 i386_lbr_synch(old);
484 }
485
486 if (restore_new) {
487 /* Now restore the new threads's LBRs */
488 if (cpu_lbr_setp->msr_lbrs[0].msr_info != 0) {
489 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
490 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip);
491 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip);
492 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_info, new_pcb->lbrs.lbrs[i].info);
493 }
494 } else {
495 if (lbr_need_tsx_workaround) {
496 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
497 /*
498 * If TSX has been disabled, the hardware expects those two bits to be sign
499 * extensions of bit 47 (even though it didn't return them that way via the rdmsr!)
500 */
501 #define BIT_47 (1ULL << 47)
502 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from,
503 new_pcb->lbrs.lbrs[i].from_rip |
504 ((new_pcb->lbrs.lbrs[i].from_rip & BIT_47) ? 0x6000000000000000ULL : 0));
505 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to,
506 new_pcb->lbrs.lbrs[i].to_rip |
507 ((new_pcb->lbrs.lbrs[i].to_rip & BIT_47) ? 0x6000000000000000ULL : 0));
508 }
509 } else {
510 for (i = 0; i < cpu_lbr_setp->lbr_count; i++) {
511 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_from, new_pcb->lbrs.lbrs[i].from_rip);
512 wrmsr64(cpu_lbr_setp->msr_lbrs[i].msr_to, new_pcb->lbrs.lbrs[i].to_rip);
513 }
514 }
515 }
516
517 /* Lastly, restore the new threads's TOS */
518 wrmsr64(MSR_IA32_LASTBRANCH_TOS, new_pcb->lbrs.lbr_tos);
519 }
520
521 i386_lbr_enable();
522 }
523
524 void
act_machine_switch_pcb(thread_t old,thread_t new)525 act_machine_switch_pcb(thread_t old, thread_t new)
526 {
527 pcb_t pcb = THREAD_TO_PCB(new);
528 cpu_data_t *cdp = current_cpu_datap();
529 struct real_descriptor *ldtp;
530 mach_vm_offset_t pcb_stack_top;
531
532 assert(new->kernel_stack != 0);
533 assert(ml_get_interrupts_enabled() == FALSE);
534 #ifdef DIRECTION_FLAG_DEBUG
535 if (x86_get_flags() & EFL_DF) {
536 panic("Direction flag detected: 0x%lx", x86_get_flags());
537 }
538 #endif
539
540 /*
541 * Clear segment state
542 * unconditionally for DS/ES/FS but more carefully for GS whose
543 * cached state we track.
544 */
545 set_ds(NULL_SEG);
546 set_es(NULL_SEG);
547 set_fs(NULL_SEG);
548
549 if (get_gs() != NULL_SEG) {
550 swapgs(); /* switch to user's GS context */
551 set_gs(NULL_SEG);
552 swapgs(); /* and back to kernel */
553
554 /* record the active machine state lost */
555 cdp->cpu_uber.cu_user_gs_base = 0;
556 }
557
558 vm_offset_t isf;
559
560 /*
561 * Set pointer to PCB's interrupt stack frame in cpu data.
562 * Used by syscall and double-fault trap handlers.
563 */
564 isf = (vm_offset_t) &pcb->iss->ss_64.isf;
565 cdp->cpu_uber.cu_isf = isf;
566 pcb_stack_top = (vm_offset_t) (pcb->iss + 1);
567 /* require 16-byte alignment */
568 assert((pcb_stack_top & 0xF) == 0);
569
570 current_ktss64()->rsp0 = cdp->cpu_desc_index.cdi_sstku;
571 /*
572 * Top of temporary sysenter stack points to pcb stack.
573 * Although this is not normally used by 64-bit users,
574 * it needs to be set in case a sysenter is attempted.
575 */
576 *current_sstk64() = pcb_stack_top;
577
578 cdp->cd_estack = cpu_shadowp(cdp->cpu_number)->cd_estack = cdp->cpu_desc_index.cdi_sstku;
579
580 if (is_saved_state64(pcb->iss)) {
581 cdp->cpu_task_map = new->map->pmap->pm_task_map;
582
583 /*
584 * Enable the 64-bit user code segment, USER64_CS.
585 * Disable the 32-bit user code segment, USER_CS.
586 */
587 gdt_desc_p(USER64_CS)->access |= ACC_PL_U;
588 gdt_desc_p(USER_CS)->access &= ~ACC_PL_U;
589
590 /*
591 * Switch user's GS base if necessary
592 * by setting the Kernel's GS base MSR
593 * - this will become the user's on the swapgs when
594 * returning to user-space. Avoid this for
595 * kernel threads (no user TLS support required)
596 * and verify the memory shadow of the segment base
597 * in the event it was altered in user space.
598 */
599 if ((pcb->cthread_self != 0) || (get_threadtask(new) != kernel_task)) {
600 if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
601 (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
602 cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
603 wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self);
604 }
605 }
606 } else {
607 cdp->cpu_task_map = TASK_MAP_32BIT;
608
609 /*
610 * Disable USER64_CS
611 * Enable USER_CS
612 */
613
614 /* It's possible that writing to the GDT areas
615 * is expensive, if the processor intercepts those
616 * writes to invalidate its internal segment caches
617 * TODO: perhaps only do this if switching bitness
618 */
619 gdt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
620 gdt_desc_p(USER_CS)->access |= ACC_PL_U;
621
622 /*
623 * Set the thread`s cthread (a.k.a pthread)
624 * For 32-bit user this involves setting the USER_CTHREAD
625 * descriptor in the LDT to point to the cthread data.
626 * The involves copying in the pre-initialized descriptor.
627 */
628 ldtp = current_ldt();
629 ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
630 if (pcb->uldt_selector != 0) {
631 ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
632 }
633 cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
634 }
635
636 cdp->cpu_curthread_do_segchk = new->machine.mthr_do_segchk;
637
638 if (last_branch_enabled_modes == LBR_ENABLED_USERMODE) {
639 i386_switch_lbrs(old, new);
640 }
641
642 /*
643 * Set the thread's LDT or LDT entry.
644 */
645 task_t task = get_threadtask_early(new);
646 if (__probable(task == TASK_NULL || task->i386_ldt == 0)) {
647 /*
648 * Use system LDT.
649 */
650 ml_cpu_set_ldt(KERNEL_LDT);
651 cdp->cpu_curtask_has_ldt = 0;
652 } else {
653 /*
654 * Task has its own LDT.
655 */
656 user_ldt_set(new);
657 cdp->cpu_curtask_has_ldt = 1;
658 }
659 }
660
661 kern_return_t
thread_set_wq_state32(thread_t thread,thread_state_t tstate)662 thread_set_wq_state32(thread_t thread, thread_state_t tstate)
663 {
664 x86_thread_state32_t *state;
665 x86_saved_state32_t *saved_state;
666 thread_t curth = current_thread();
667 spl_t s = 0;
668
669 pal_register_cache_state(thread, DIRTY);
670
671 saved_state = USER_REGS32(thread);
672
673 state = (x86_thread_state32_t *)tstate;
674
675 if (curth != thread) {
676 s = splsched();
677 thread_lock(thread);
678 }
679
680 saved_state->ebp = 0;
681 saved_state->eip = state->eip;
682 saved_state->eax = state->eax;
683 saved_state->ebx = state->ebx;
684 saved_state->ecx = state->ecx;
685 saved_state->edx = state->edx;
686 saved_state->edi = state->edi;
687 saved_state->esi = state->esi;
688 saved_state->uesp = state->esp;
689 saved_state->efl = EFL_USER_SET;
690
691 saved_state->cs = USER_CS;
692 saved_state->ss = USER_DS;
693 saved_state->ds = USER_DS;
694 saved_state->es = USER_DS;
695
696 if (curth != thread) {
697 thread_unlock(thread);
698 splx(s);
699 }
700
701 return KERN_SUCCESS;
702 }
703
704
705 kern_return_t
thread_set_wq_state64(thread_t thread,thread_state_t tstate)706 thread_set_wq_state64(thread_t thread, thread_state_t tstate)
707 {
708 x86_thread_state64_t *state;
709 x86_saved_state64_t *saved_state;
710 thread_t curth = current_thread();
711 spl_t s = 0;
712
713 saved_state = USER_REGS64(thread);
714 state = (x86_thread_state64_t *)tstate;
715
716 /* Disallow setting non-canonical PC or stack */
717 if (!IS_USERADDR64_CANONICAL(state->rsp) ||
718 !IS_USERADDR64_CANONICAL(state->rip)) {
719 return KERN_FAILURE;
720 }
721
722 pal_register_cache_state(thread, DIRTY);
723
724 if (curth != thread) {
725 s = splsched();
726 thread_lock(thread);
727 }
728
729 saved_state->rbp = 0;
730 saved_state->rdi = state->rdi;
731 saved_state->rsi = state->rsi;
732 saved_state->rdx = state->rdx;
733 saved_state->rcx = state->rcx;
734 saved_state->r8 = state->r8;
735 saved_state->r9 = state->r9;
736
737 saved_state->isf.rip = state->rip;
738 saved_state->isf.rsp = state->rsp;
739 saved_state->isf.cs = USER64_CS;
740 saved_state->isf.rflags = EFL_USER_SET;
741
742 if (curth != thread) {
743 thread_unlock(thread);
744 splx(s);
745 }
746
747 return KERN_SUCCESS;
748 }
749
750 /*
751 * Initialize the machine-dependent state for a new thread.
752 */
753 void
machine_thread_create(thread_t thread,task_t task,bool first_thread __unused)754 machine_thread_create(
755 thread_t thread,
756 task_t task,
757 bool first_thread __unused)
758 {
759 pcb_t pcb = THREAD_TO_PCB(thread);
760
761 if ((task->t_flags & TF_TECS) || __improbable(force_thread_policy_tecs)) {
762 thread->machine.mthr_do_segchk = MTHR_SEGCHK;
763 } else {
764 thread->machine.mthr_do_segchk = 0;
765 }
766
767 if (task != kernel_task &&
768 __improbable((cpuid_wa_required(CPU_INTEL_RSBST) & CWA_ON) != 0)) {
769 thread->machine.mthr_do_segchk |= MTHR_RSBST;
770 }
771
772 /*
773 * Allocate save frame only if required.
774 */
775 if (pcb->iss == NULL) {
776 assert((get_preemption_level() == 0));
777 pcb->iss = zalloc_flags(iss_zone, Z_WAITOK | Z_NOFAIL);
778 }
779
780 /*
781 * Ensure that the synthesized 32-bit state including
782 * the 64-bit interrupt state can be acommodated in the
783 * 64-bit state we allocate for both 32-bit and 64-bit threads.
784 */
785 assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <=
786 sizeof(pcb->iss->ss_64));
787
788 bzero((char *)pcb->iss, sizeof(x86_saved_state_t));
789
790 bzero(&pcb->lbrs, sizeof(x86_lbrs_t));
791
792 if (task_has_64Bit_addr(task)) {
793 pcb->iss->flavor = x86_SAVED_STATE64;
794
795 pcb->iss->ss_64.isf.cs = USER64_CS;
796 pcb->iss->ss_64.isf.ss = USER_DS;
797 pcb->iss->ss_64.fs = USER_DS;
798 pcb->iss->ss_64.gs = USER_DS;
799 pcb->iss->ss_64.isf.rflags = EFL_USER_SET;
800 } else {
801 pcb->iss->flavor = x86_SAVED_STATE32;
802
803 pcb->iss->ss_32.cs = USER_CS;
804 pcb->iss->ss_32.ss = USER_DS;
805 pcb->iss->ss_32.ds = USER_DS;
806 pcb->iss->ss_32.es = USER_DS;
807 pcb->iss->ss_32.fs = USER_DS;
808 pcb->iss->ss_32.gs = USER_DS;
809 pcb->iss->ss_32.efl = EFL_USER_SET;
810 }
811
812 simple_lock_init(&pcb->lock, 0);
813
814 pcb->cthread_self = 0;
815 pcb->uldt_selector = 0;
816 pcb->thread_gpu_ns = 0;
817 /* Ensure that the "cthread" descriptor describes a valid
818 * segment.
819 */
820 if ((pcb->cthread_desc.access & ACC_P) == 0) {
821 pcb->cthread_desc = *gdt_desc_p(USER_DS);
822 }
823
824
825 pcb->insn_state_copyin_failure_errorcode = 0;
826 if (pcb->insn_state != 0) { /* Reinit for new thread */
827 bzero(pcb->insn_state, sizeof(x86_instruction_state_t));
828 pcb->insn_state->insn_stream_valid_bytes = -1;
829 }
830
831 pcb->insn_copy_optout = (task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
832 }
833
834 /*
835 * Machine-dependent cleanup prior to destroying a thread
836 */
837 void
machine_thread_destroy(thread_t thread)838 machine_thread_destroy(
839 thread_t thread)
840 {
841 pcb_t pcb = THREAD_TO_PCB(thread);
842
843 #if HYPERVISOR
844 if (thread->hv_thread_target) {
845 hv_callbacks.thread_destroy(thread->hv_thread_target);
846 thread->hv_thread_target = NULL;
847 }
848 #endif
849
850 if (pcb->ifps != 0) {
851 fpu_free(thread, pcb->ifps);
852 }
853 if (pcb->iss != 0) {
854 zfree(iss_zone, pcb->iss);
855 pcb->iss = 0;
856 }
857 if (pcb->ids) {
858 zfree(ids_zone, pcb->ids);
859 pcb->ids = NULL;
860 }
861
862 if (pcb->insn_state != 0) {
863 kfree_data(pcb->insn_state, sizeof(x86_instruction_state_t));
864 pcb->insn_state = 0;
865 }
866 pcb->insn_state_copyin_failure_errorcode = 0;
867 pcb->insn_copy_optout = false;
868 }
869
870 /*
871 * machine_thread_process_signature
872 *
873 * Called to allow code signature dependent adjustments to the thread
874 * state. Note that this is usually called twice for the main thread:
875 * Once at thread creation by thread_create, when the signature is
876 * potentially not attached yet (which is usually the case for the
877 * first/main thread of a task), and once after the task's signature
878 * has actually been attached.
879 *
880 */
881 kern_return_t
machine_thread_process_signature(thread_t __unused thread,task_t __unused task)882 machine_thread_process_signature(thread_t __unused thread, task_t __unused task)
883 {
884 return KERN_SUCCESS;
885 }
886
887 kern_return_t
machine_thread_set_tsd_base(thread_t thread,mach_vm_offset_t tsd_base)888 machine_thread_set_tsd_base(
889 thread_t thread,
890 mach_vm_offset_t tsd_base)
891 {
892 if (get_threadtask(thread) == kernel_task) {
893 return KERN_INVALID_ARGUMENT;
894 }
895
896 if (thread_is_64bit_addr(thread)) {
897 /* check for canonical address, set 0 otherwise */
898 if (!IS_USERADDR64_CANONICAL(tsd_base)) {
899 tsd_base = 0ULL;
900 }
901 } else {
902 if (tsd_base > UINT32_MAX) {
903 tsd_base = 0ULL;
904 }
905 }
906
907 pcb_t pcb = THREAD_TO_PCB(thread);
908 pcb->cthread_self = tsd_base;
909
910 if (!thread_is_64bit_addr(thread)) {
911 /* Set up descriptor for later use */
912 struct real_descriptor desc = {
913 .limit_low = 1,
914 .limit_high = 0,
915 .base_low = tsd_base & 0xffff,
916 .base_med = (tsd_base >> 16) & 0xff,
917 .base_high = (tsd_base >> 24) & 0xff,
918 .access = ACC_P | ACC_PL_U | ACC_DATA_W,
919 .granularity = SZ_32 | SZ_G,
920 };
921
922 pcb->cthread_desc = desc;
923 saved_state32(pcb->iss)->gs = USER_CTHREAD;
924 }
925
926 /* For current thread, make the TSD base active immediately */
927 if (thread == current_thread()) {
928 if (thread_is_64bit_addr(thread)) {
929 cpu_data_t *cdp;
930
931 mp_disable_preemption();
932 cdp = current_cpu_datap();
933 if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
934 (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
935 wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base);
936 }
937 cdp->cpu_uber.cu_user_gs_base = tsd_base;
938 mp_enable_preemption();
939 } else {
940 /* assign descriptor */
941 mp_disable_preemption();
942 *ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc;
943 mp_enable_preemption();
944 }
945 }
946
947 return KERN_SUCCESS;
948 }
949
950 void
machine_tecs(thread_t thr)951 machine_tecs(thread_t thr)
952 {
953 if (tecs_mode_supported) {
954 thr->machine.mthr_do_segchk = 1;
955 }
956 }
957
958 void
machine_thread_set_insn_copy_optout(thread_t thr)959 machine_thread_set_insn_copy_optout(thread_t thr)
960 {
961 thr->machine.insn_copy_optout = true;
962 }
963
964 int
machine_csv(cpuvn_e cve)965 machine_csv(cpuvn_e cve)
966 {
967 switch (cve) {
968 case CPUVN_CI:
969 return (cpuid_wa_required(CPU_INTEL_SEGCHK) & CWA_ON) != 0;
970
971 default:
972 break;
973 }
974
975 return 0;
976 }
977