xref: /xnu-8020.121.3/osfmk/i386/cpu_data.h (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  *
31  */
32 
33 #ifndef I386_CPU_DATA
34 #define I386_CPU_DATA
35 
36 #include <mach_assert.h>
37 #include <machine/atomic.h>
38 
39 #include <kern/assert.h>
40 #include <kern/kern_types.h>
41 #include <kern/mpqueue.h>
42 #include <kern/queue.h>
43 #include <kern/processor.h>
44 #include <kern/pms.h>
45 #include <pexpert/pexpert.h>
46 #include <mach/i386/thread_status.h>
47 #include <mach/i386/vm_param.h>
48 #include <i386/locks.h>
49 #include <i386/rtclock_protos.h>
50 #include <i386/pmCPU.h>
51 #include <i386/cpu_topology.h>
52 #include <i386/seg.h>
53 #include <i386/mp.h>
54 
55 #if CONFIG_VMX
56 #include <i386/vmx/vmx_cpu.h>
57 #endif
58 
59 #if MONOTONIC
60 #include <machine/monotonic.h>
61 #endif /* MONOTONIC */
62 
63 #include <san/kcov_data.h>
64 
65 #include <machine/pal_routines.h>
66 
67 /*
68  * Data structures referenced (anonymously) from per-cpu data:
69  */
70 struct cpu_cons_buffer;
71 struct cpu_desc_table;
72 struct mca_state;
73 struct prngContext;
74 
75 /*
76  * Data structures embedded in per-cpu data:
77  */
78 typedef struct rtclock_timer {
79 	mpqueue_head_t          queue;
80 	uint64_t                deadline;
81 	uint64_t                when_set;
82 	boolean_t               has_expired;
83 } rtclock_timer_t;
84 
85 typedef struct {
86 	/* The 'u' suffixed fields store the double-mapped descriptor addresses */
87 	struct x86_64_tss       *cdi_ktssu;
88 	struct x86_64_tss       *cdi_ktssb;
89 	x86_64_desc_register_t  cdi_gdtu;
90 	x86_64_desc_register_t  cdi_gdtb;
91 	x86_64_desc_register_t  cdi_idtu;
92 	x86_64_desc_register_t  cdi_idtb;
93 	struct real_descriptor  *cdi_ldtu;
94 	struct real_descriptor  *cdi_ldtb;
95 	vm_offset_t             cdi_sstku;
96 	vm_offset_t             cdi_sstkb;
97 } cpu_desc_index_t;
98 
99 typedef enum {
100 	TASK_MAP_32BIT,                 /* 32-bit user, compatibility mode */
101 	TASK_MAP_64BIT,                 /* 64-bit user thread, shared space */
102 } task_map_t;
103 
104 
105 /*
106  * This structure is used on entry into the (uber-)kernel on syscall from
107  * a 64-bit user. It contains the address of the machine state save area
108  * for the current thread and a temporary place to save the user's rsp
109  * before loading this address into rsp.
110  */
111 typedef struct {
112 	addr64_t        cu_isf;         /* thread->pcb->iss.isf */
113 	uint64_t        cu_tmp;         /* temporary scratch */
114 	addr64_t        cu_user_gs_base;
115 } cpu_uber_t;
116 
117 typedef uint16_t        pcid_t;
118 typedef uint8_t         pcid_ref_t;
119 
120 #define CPU_RTIME_BINS (12)
121 #define CPU_ITIME_BINS (CPU_RTIME_BINS)
122 
123 #define MAX_TRACE_BTFRAMES (16)
124 typedef struct {
125 	boolean_t pltype;
126 	int plevel;
127 	uint64_t plbt[MAX_TRACE_BTFRAMES];
128 } plrecord_t;
129 
130 #if     DEVELOPMENT || DEBUG
131 typedef enum {
132 	IOTRACE_PHYS_READ = 1,
133 	IOTRACE_PHYS_WRITE,
134 	IOTRACE_IO_READ,
135 	IOTRACE_IO_WRITE,
136 	IOTRACE_PORTIO_READ,
137 	IOTRACE_PORTIO_WRITE
138 } iotrace_type_e;
139 
140 typedef struct {
141 	iotrace_type_e  iotype;
142 	int             size;
143 	uint64_t        vaddr;
144 	uint64_t        paddr;
145 	uint64_t        val;
146 	uint64_t        start_time_abs;
147 	uint64_t        duration;
148 	uint64_t        backtrace[MAX_TRACE_BTFRAMES];
149 } iotrace_entry_t;
150 
151 typedef struct {
152 	int             vector;                 /* Vector number of interrupt */
153 	thread_t        curthread;              /* Current thread at the time of the interrupt */
154 	uint64_t        interrupted_pc;
155 	int             curpl;                  /* Current preemption level */
156 	int             curil;                  /* Current interrupt level */
157 	uint64_t        start_time_abs;
158 	uint64_t        duration;
159 	uint64_t        backtrace[MAX_TRACE_BTFRAMES];
160 } traptrace_entry_t;
161 
162 #define DEFAULT_IOTRACE_ENTRIES_PER_CPU (64)
163 #define IOTRACE_MAX_ENTRIES_PER_CPU (256)
164 extern volatile int mmiotrace_enabled;
165 extern uint32_t iotrace_entries_per_cpu;
166 PERCPU_DECL(uint32_t, iotrace_next);
167 PERCPU_DECL(iotrace_entry_t * __unsafe_indexable, iotrace_ring);
168 
169 #define TRAPTRACE_INVALID_INDEX (~0U)
170 #define DEFAULT_TRAPTRACE_ENTRIES_PER_CPU (16)
171 #define TRAPTRACE_MAX_ENTRIES_PER_CPU (256)
172 extern volatile int traptrace_enabled;
173 extern uint32_t traptrace_entries_per_cpu;
174 PERCPU_DECL(uint32_t, traptrace_next);
175 PERCPU_DECL(traptrace_entry_t * __unsafe_indexable, traptrace_ring);
176 #endif /* DEVELOPMENT || DEBUG */
177 
178 /*
179  * Per-cpu data.
180  *
181  * Each processor has a per-cpu data area which is dereferenced through the
182  * current_cpu_datap() macro. For speed, the %gs segment is based here, and
183  * using this, inlines provides single-instruction access to frequently used
184  * members - such as get_cpu_number()/cpu_number(), and get_active_thread()/
185  * current_thread().
186  *
187  * Cpu data owned by another processor can be accessed using the
188  * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu
189  * pointers.
190  */
191 typedef struct {
192 	pcid_t                  cpu_pcid_free_hint;
193 #define PMAP_PCID_MAX_PCID      (0x800)
194 	pcid_ref_t              cpu_pcid_refcounts[PMAP_PCID_MAX_PCID];
195 	pmap_t                  cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID];
196 } pcid_cdata_t;
197 
198 typedef struct cpu_data {
199 	struct pal_cpu_data     cpu_pal_data;           /* PAL-specific data */
200 #define                         cpu_pd cpu_pal_data     /* convenience alias */
201 	struct cpu_data         *cpu_this;              /* pointer to myself */
202 	vm_offset_t             cpu_pcpu_base;
203 	thread_t                cpu_active_thread;
204 	thread_t                cpu_nthread;
205 	int                     cpu_number;             /* Logical CPU */
206 	void                    *cpu_int_state;         /* interrupt state */
207 	vm_offset_t             cpu_active_stack;       /* kernel stack base */
208 	vm_offset_t             cpu_kernel_stack;       /* kernel stack top */
209 	vm_offset_t             cpu_int_stack_top;
210 	volatile int            cpu_signals;            /* IPI events */
211 	volatile int            cpu_prior_signals;      /* Last set of events,
212 	                                                 * debugging
213 	                                                 */
214 	ast_t                   cpu_pending_ast;
215 	/*
216 	 * Note if rearranging fields:
217 	 * We want cpu_preemption_level on a different
218 	 * cache line than cpu_active_thread
219 	 * for optimizing mtx_spin phase.
220 	 */
221 	int                     cpu_interrupt_level;
222 	volatile int            cpu_preemption_level;
223 	volatile int            cpu_running;
224 #if !MONOTONIC
225 	boolean_t               cpu_fixed_pmcs_enabled;
226 #endif /* !MONOTONIC */
227 	rtclock_timer_t         rtclock_timer;
228 	volatile addr64_t       cpu_active_cr3 __attribute((aligned(64)));
229 	union {
230 		volatile uint32_t cpu_tlb_invalid;
231 		struct {
232 			volatile uint16_t cpu_tlb_invalid_local;
233 			volatile uint16_t cpu_tlb_invalid_global;
234 		};
235 	};
236 	uint64_t                cpu_ip_desc[2];
237 	volatile task_map_t     cpu_task_map;
238 	volatile addr64_t       cpu_task_cr3;
239 	addr64_t                cpu_kernel_cr3;
240 	volatile addr64_t       cpu_ucr3;
241 	volatile addr64_t       cpu_shadowtask_cr3;
242 	boolean_t               cpu_pagezero_mapped;
243 	cpu_uber_t              cpu_uber;
244 /* Double-mapped per-CPU exception stack address */
245 	uintptr_t               cd_estack;
246 	int                     cpu_xstate;
247 	int                     cpu_curtask_has_ldt;
248 	int                     cpu_curthread_do_segchk;
249 /* Address of shadowed, partially mirrored CPU data structures located
250  * in the double mapped PML4
251  */
252 	void                    *cd_shadow;
253 	union {
254 		volatile uint32_t cpu_tlb_invalid_count;
255 		struct {
256 			volatile uint16_t cpu_tlb_invalid_local_count;
257 			volatile uint16_t cpu_tlb_invalid_global_count;
258 		};
259 	};
260 
261 	uint16_t                cpu_tlb_gen_counts_local[MAX_CPUS];
262 	uint16_t                cpu_tlb_gen_counts_global[MAX_CPUS];
263 
264 	struct processor        *cpu_processor;
265 	struct real_descriptor  *cpu_ldtp;
266 	struct cpu_desc_table   *cpu_desc_tablep;
267 	cpu_desc_index_t        cpu_desc_index;
268 	int                     cpu_ldt;
269 
270 #define HWINTCNT_SIZE 256
271 	uint32_t                cpu_hwIntCnt[HWINTCNT_SIZE];    /* Interrupt counts */
272 	uint64_t                cpu_hwIntpexits[HWINTCNT_SIZE];
273 	uint64_t                cpu_dr7; /* debug control register */
274 	uint64_t                cpu_int_event_time;     /* intr entry/exit time */
275 	pal_rtc_nanotime_t      *cpu_nanotime;          /* Nanotime info */
276 #if KPC
277 	/* double-buffered performance counter data */
278 	uint64_t                *cpu_kpc_buf[2];
279 	/* PMC shadow and reload value buffers */
280 	uint64_t                *cpu_kpc_shadow;
281 	uint64_t                *cpu_kpc_reload;
282 #endif
283 #if MONOTONIC
284 	struct mt_cpu cpu_monotonic;
285 #endif /* MONOTONIC */
286 	uint32_t                cpu_pmap_pcid_enabled;
287 	pcid_t                  cpu_active_pcid;
288 	pcid_t                  cpu_last_pcid;
289 	pcid_t                  cpu_kernel_pcid;
290 	volatile pcid_ref_t     *cpu_pmap_pcid_coherentp;
291 	volatile pcid_ref_t     *cpu_pmap_pcid_coherentp_kernel;
292 	pcid_cdata_t            *cpu_pcid_data;
293 #ifdef  PCID_STATS
294 	uint64_t                cpu_pmap_pcid_flushes;
295 	uint64_t                cpu_pmap_pcid_preserves;
296 #endif
297 	uint64_t                cpu_aperf;
298 	uint64_t                cpu_mperf;
299 	uint64_t                cpu_c3res;
300 	uint64_t                cpu_c6res;
301 	uint64_t                cpu_c7res;
302 	uint64_t                cpu_itime_total;
303 	uint64_t                cpu_rtime_total;
304 	uint64_t                cpu_ixtime;
305 	uint64_t                cpu_idle_exits;
306 	/*
307 	 * Note that the cacheline-copy mechanism uses the cpu_rtimes field in the shadow CPU
308 	 * structures to temporarily stash the code cacheline that includes the instruction
309 	 * pointer at the time of the fault (this field is otherwise unused in the shadow
310 	 * CPU structures).
311 	 */
312 	uint64_t                cpu_rtimes[CPU_RTIME_BINS];
313 	uint64_t                cpu_itimes[CPU_ITIME_BINS];
314 #if !MONOTONIC
315 	uint64_t                cpu_cur_insns;
316 	uint64_t                cpu_cur_ucc;
317 	uint64_t                cpu_cur_urc;
318 #endif /* !MONOTONIC */
319 	uint64_t                cpu_gpmcs[4];
320 	uint64_t                cpu_max_observed_int_latency;
321 	int                     cpu_max_observed_int_latency_vector;
322 	volatile boolean_t      cpu_NMI_acknowledged;
323 	uint64_t                debugger_entry_time;
324 	uint64_t                debugger_ipi_time;
325 	/* A separate nested interrupt stack flag, to account
326 	 * for non-nested interrupts arriving while on the interrupt stack
327 	 * Currently only occurs when AICPM enables interrupts on the
328 	 * interrupt stack during processor offlining.
329 	 */
330 	uint32_t                cpu_nested_istack;
331 	uint32_t                cpu_nested_istack_events;
332 	x86_saved_state64_t     *cpu_fatal_trap_state;
333 	x86_saved_state64_t     *cpu_post_fatal_trap_state;
334 #if CONFIG_VMX
335 	vmx_cpu_t               cpu_vmx;                /* wonderful world of virtualization */
336 #endif
337 #if CONFIG_MCA
338 	struct mca_state        *cpu_mca_state;         /* State at MC fault */
339 #endif
340 	int                     cpu_type;
341 	int                     cpu_subtype;
342 	int                     cpu_threadtype;
343 	boolean_t               cpu_iflag;
344 	boolean_t               cpu_boot_complete;
345 	int                     cpu_hibernate;
346 #define MAX_PREEMPTION_RECORDS (8)
347 #if     DEVELOPMENT || DEBUG
348 	int                     cpu_plri;
349 	plrecord_t              plrecords[MAX_PREEMPTION_RECORDS];
350 #endif
351 	struct x86_lcpu         lcpu;
352 	int                     cpu_phys_number;        /* Physical CPU */
353 	cpu_id_t                cpu_id;                 /* Platform Expert */
354 #if DEBUG
355 	uint64_t                cpu_entry_cr3;
356 	uint64_t                cpu_exit_cr3;
357 	uint64_t                cpu_pcid_last_cr3;
358 #endif
359 	boolean_t               cpu_rendezvous_in_progress;
360 #if CST_DEMOTION_DEBUG
361 	/* Count of thread wakeups issued by this processor */
362 	uint64_t                cpu_wakeups_issued_total;
363 #endif
364 #if DEBUG || DEVELOPMENT
365 	uint64_t                tsc_sync_delta;
366 #endif
367 	uint32_t                cpu_soft_apic_lvt_timer;
368 #if CONFIG_KCOV
369 	kcov_cpu_data_t         cpu_kcov_data;
370 #endif
371 } cpu_data_t;
372 
373 extern cpu_data_t *__single cpu_data_ptr[MAX_CPUS];
374 
375 /*
376  * __SEG_GS marks %gs-relative operations:
377  *   https://clang.llvm.org/docs/LanguageExtensions.html#memory-references-to-specified-segments
378  *   https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html#x86-Named-Address-Spaces
379  */
380 #if defined(__SEG_GS)
381 // __seg_gs exists
382 #elif defined(__clang__)
383 #define __seg_gs __attribute__((address_space(256)))
384 #else
385 #error use a compiler that supports address spaces or __seg_gs
386 #endif
387 
388 #define CPU_DATA()            ((cpu_data_t __seg_gs *)0UL)
389 
390 /*
391  * Everyone within the osfmk part of the kernel can use the fast
392  * inline versions of these routines.  Everyone outside, must call
393  * the real thing,
394  */
395 
396 
397 /*
398  * The "volatile" flavor of current_thread() is intended for use by
399  * scheduler code which may need to update the thread pointer in the
400  * course of a context switch.  Any call to current_thread() made
401  * prior to the thread pointer update should be safe to optimize away
402  * as it should be consistent with that thread's state to the extent
403  * the compiler can reason about it.  Likewise, the context switch
404  * path will eventually result in an arbitrary branch to the new
405  * thread's pc, about which the compiler won't be able to reason.
406  * Thus any compile-time optimization of current_thread() calls made
407  * within the new thread should be safely encapsulated in its
408  * register/stack state.  The volatile form therefore exists to cover
409  * the window between the thread pointer update and the branch to
410  * the new pc.
411  */
412 static inline thread_t
get_active_thread_volatile(void)413 get_active_thread_volatile(void)
414 {
415 	return CPU_DATA()->cpu_active_thread;
416 }
417 
418 static inline __attribute__((const)) thread_t
get_active_thread(void)419 get_active_thread(void)
420 {
421 	return CPU_DATA()->cpu_active_thread;
422 }
423 
424 #define current_thread_fast()           get_active_thread()
425 #define current_thread_volatile()       get_active_thread_volatile()
426 
427 #define cpu_mode_is64bit()              TRUE
428 
429 static inline int
get_preemption_level(void)430 get_preemption_level(void)
431 {
432 	return CPU_DATA()->cpu_preemption_level;
433 }
434 static inline int
get_interrupt_level(void)435 get_interrupt_level(void)
436 {
437 	return CPU_DATA()->cpu_interrupt_level;
438 }
439 static inline int
get_cpu_number(void)440 get_cpu_number(void)
441 {
442 	return CPU_DATA()->cpu_number;
443 }
444 static inline vm_offset_t
get_current_percpu_base(void)445 get_current_percpu_base(void)
446 {
447 	return CPU_DATA()->cpu_pcpu_base;
448 }
449 static inline int
get_cpu_phys_number(void)450 get_cpu_phys_number(void)
451 {
452 	return CPU_DATA()->cpu_phys_number;
453 }
454 
455 static inline cpu_data_t *
current_cpu_datap(void)456 current_cpu_datap(void)
457 {
458 	return CPU_DATA()->cpu_this;
459 }
460 
461 /*
462  * Facility to diagnose preemption-level imbalances, which are otherwise
463  * challenging to debug. On each operation that enables or disables preemption,
464  * we record a backtrace into a per-CPU ring buffer, along with the current
465  * preemption level and operation type. Thus, if an imbalance is observed,
466  * one can examine these per-CPU records to determine which codepath failed
467  * to re-enable preemption, enabled premption without a corresponding
468  * disablement etc. The backtracer determines which stack is currently active,
469  * and uses that to perform bounds checks on unterminated stacks.
470  * To enable, sysctl -w machdep.pltrace=1 on DEVELOPMENT or DEBUG kernels (DRK '15)
471  * The bounds check currently doesn't account for non-default thread stack sizes.
472  */
473 #if DEVELOPMENT || DEBUG
474 static inline void
rbtrace_bt(uint64_t * __counted_by (maxframes)rets,int maxframes,cpu_data_t * cdata,uint64_t frameptr,bool use_cursp)475 rbtrace_bt(uint64_t *__counted_by(maxframes)rets, int maxframes,
476     cpu_data_t *cdata, uint64_t frameptr, bool use_cursp)
477 {
478 	extern uint32_t         low_intstack[];         /* bottom */
479 	extern uint32_t         low_eintstack[];        /* top */
480 	extern char             mp_slave_stack[];
481 	int                     btidx = 0;
482 
483 	uint64_t kstackb, kstackt;
484 
485 	/* Obtain the 'current' program counter, initial backtrace
486 	 * element. This will also indicate if we were unable to
487 	 * trace further up the stack for some reason
488 	 */
489 	if (use_cursp) {
490 		__asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
491                      : "=m" (rets[btidx++])
492                      :
493                      : "rax");
494 	}
495 
496 	thread_t cplthread = cdata->cpu_active_thread;
497 	if (cplthread) {
498 		uintptr_t csp;
499 		if (use_cursp == true) {
500 			__asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
501 		} else {
502 			csp = frameptr;
503 		}
504 		/* Determine which stack we're on to populate stack bounds.
505 		 * We don't need to trace across stack boundaries for this
506 		 * routine.
507 		 */
508 		kstackb = cdata->cpu_active_stack;
509 		kstackt = kstackb + KERNEL_STACK_SIZE;
510 		if (csp < kstackb || csp > kstackt) {
511 			kstackt = cdata->cpu_kernel_stack;
512 			kstackb = kstackt - KERNEL_STACK_SIZE;
513 			if (csp < kstackb || csp > kstackt) {
514 				kstackt = cdata->cpu_int_stack_top;
515 				kstackb = kstackt - INTSTACK_SIZE;
516 				if (csp < kstackb || csp > kstackt) {
517 					kstackt = (uintptr_t)&low_eintstack;
518 					kstackb = kstackt - INTSTACK_SIZE;
519 					if (csp < kstackb || csp > kstackt) {
520 						kstackb = (uintptr_t)&mp_slave_stack;
521 						kstackt = kstackb + PAGE_SIZE;
522 					} else {
523 						kstackb = 0;
524 						kstackt = 0;
525 					}
526 				}
527 			}
528 		}
529 
530 		if (__probable(kstackb && kstackt)) {
531 			uint64_t *cfp = __unsafe_forge_single(uint64_t *, frameptr);
532 			int rbbtf;
533 
534 			for (rbbtf = btidx; rbbtf < maxframes; rbbtf++) {
535 				if (((uint64_t)cfp == 0) || (((uint64_t)cfp < kstackb) || ((uint64_t)cfp > kstackt))) {
536 					rets[rbbtf] = 0;
537 					continue;
538 				}
539 				rets[rbbtf] = *(cfp + 1);
540 				cfp = __unsafe_forge_single(uint64_t *, *cfp);
541 			}
542 		}
543 	}
544 }
545 
546 __attribute__((noinline))
547 static inline void
pltrace_internal(boolean_t enable)548 pltrace_internal(boolean_t enable)
549 {
550 	cpu_data_t *cdata = current_cpu_datap();
551 	int cpli = cdata->cpu_preemption_level;
552 	int cplrecord = cdata->cpu_plri;
553 	uint64_t *plbts;
554 
555 	assert(cpli >= 0);
556 
557 	cdata->plrecords[cplrecord].pltype = enable;
558 	cdata->plrecords[cplrecord].plevel = cpli;
559 
560 	plbts = &cdata->plrecords[cplrecord].plbt[0];
561 
562 	cplrecord++;
563 
564 	if (cplrecord >= MAX_PREEMPTION_RECORDS) {
565 		cplrecord = 0;
566 	}
567 
568 	cdata->cpu_plri = cplrecord;
569 
570 	rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), false);
571 }
572 
573 extern int plctrace_enabled;
574 
575 static inline void
iotrace(iotrace_type_e type,uint64_t vaddr,uint64_t paddr,int size,uint64_t val,uint64_t sabs,uint64_t duration)576 iotrace(iotrace_type_e type, uint64_t vaddr, uint64_t paddr, int size, uint64_t val,
577     uint64_t sabs, uint64_t duration)
578 {
579 	cpu_data_t *cdata;
580 	uint32_t nextidx;
581 	iotrace_entry_t *cur_iotrace_ring;
582 	uint32_t *nextidxp;
583 
584 	if (__improbable(mmiotrace_enabled == 0 || iotrace_entries_per_cpu == 0)) {
585 		return;
586 	}
587 
588 	cdata = current_cpu_datap();
589 	nextidxp = PERCPU_GET(iotrace_next);
590 	nextidx = *nextidxp;
591 	cur_iotrace_ring = *PERCPU_GET(iotrace_ring);
592 
593 	cur_iotrace_ring[nextidx].iotype = type;
594 	cur_iotrace_ring[nextidx].vaddr = vaddr;
595 	cur_iotrace_ring[nextidx].paddr = paddr;
596 	cur_iotrace_ring[nextidx].size = size;
597 	cur_iotrace_ring[nextidx].val = val;
598 	cur_iotrace_ring[nextidx].start_time_abs = sabs;
599 	cur_iotrace_ring[nextidx].duration = duration;
600 
601 	*nextidxp = ((nextidx + 1) >= iotrace_entries_per_cpu) ? 0 : (nextidx + 1);
602 
603 	rbtrace_bt(&cur_iotrace_ring[nextidx].backtrace[0],
604 	    MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true);
605 }
606 
607 static inline uint32_t
traptrace_start(int vecnum,uint64_t ipc,uint64_t sabs,uint64_t frameptr)608 traptrace_start(int vecnum, uint64_t ipc, uint64_t sabs, uint64_t frameptr)
609 {
610 	cpu_data_t *cdata;
611 	uint32_t nextidx;
612 	traptrace_entry_t *__unsafe_indexable cur_traptrace_ring;
613 	uint32_t *nextidxp;
614 
615 	if (__improbable(traptrace_enabled == 0 || traptrace_entries_per_cpu == 0)) {
616 		return TRAPTRACE_INVALID_INDEX;
617 	}
618 
619 	assert(ml_get_interrupts_enabled() == FALSE);
620 	cdata = current_cpu_datap();
621 	nextidxp = PERCPU_GET(traptrace_next);
622 	nextidx = *nextidxp;
623 	/* prevent nested interrupts from clobbering this record */
624 	*nextidxp = (((nextidx + 1) >= (unsigned int)traptrace_entries_per_cpu) ? 0 : (nextidx + 1));
625 
626 	cur_traptrace_ring = *PERCPU_GET(traptrace_ring);
627 	cur_traptrace_ring[nextidx].vector = vecnum;
628 	cur_traptrace_ring[nextidx].curthread = current_thread_fast();
629 	cur_traptrace_ring[nextidx].interrupted_pc = ipc;
630 	cur_traptrace_ring[nextidx].curpl = cdata->cpu_preemption_level;
631 	cur_traptrace_ring[nextidx].curil = cdata->cpu_interrupt_level;
632 	cur_traptrace_ring[nextidx].start_time_abs = sabs;
633 	cur_traptrace_ring[nextidx].duration = ~0ULL;
634 
635 	rbtrace_bt(&cur_traptrace_ring[nextidx].backtrace[0],
636 	    MAX_TRACE_BTFRAMES - 1, cdata, frameptr, false);
637 
638 	assert(nextidx <= 0xFFFF);
639 
640 	/*
641 	 * encode the cpu number we're on because traptrace_end()
642 	 * might be called from a different CPU.
643 	 */
644 	return ((uint32_t)cdata->cpu_number << 16) | nextidx;
645 }
646 
647 static inline void
traptrace_end(uint32_t index,uint64_t eabs)648 traptrace_end(uint32_t index, uint64_t eabs)
649 {
650 	traptrace_entry_t *__unsafe_indexable ring;
651 
652 	if (index != TRAPTRACE_INVALID_INDEX) {
653 		ring = *PERCPU_GET_WITH_BASE(other_percpu_base(index >> 16),
654 		    traptrace_ring);
655 		index &= 0XFFFF;
656 		ring[index].duration = eabs - ring[index].start_time_abs;
657 	}
658 }
659 
660 #endif /* DEVELOPMENT || DEBUG */
661 
662 __header_always_inline void
pltrace(boolean_t plenable)663 pltrace(boolean_t plenable)
664 {
665 #if DEVELOPMENT || DEBUG
666 	if (__improbable(plctrace_enabled != 0)) {
667 		pltrace_internal(plenable);
668 	}
669 #else
670 	(void)plenable;
671 #endif
672 }
673 
674 static inline void
disable_preemption_internal(void)675 disable_preemption_internal(void)
676 {
677 	assert(get_preemption_level() >= 0);
678 
679 	os_compiler_barrier();
680 	CPU_DATA()->cpu_preemption_level++;
681 	os_compiler_barrier();
682 	pltrace(FALSE);
683 }
684 
685 static inline void
enable_preemption_internal(void)686 enable_preemption_internal(void)
687 {
688 	assert(get_preemption_level() > 0);
689 	pltrace(TRUE);
690 	os_compiler_barrier();
691 	if (0 == --CPU_DATA()->cpu_preemption_level) {
692 		kernel_preempt_check();
693 	}
694 	os_compiler_barrier();
695 }
696 
697 static inline void
enable_preemption_no_check(void)698 enable_preemption_no_check(void)
699 {
700 	assert(get_preemption_level() > 0);
701 
702 	pltrace(TRUE);
703 	os_compiler_barrier();
704 	CPU_DATA()->cpu_preemption_level--;
705 	os_compiler_barrier();
706 }
707 
708 static inline void
_enable_preemption_no_check(void)709 _enable_preemption_no_check(void)
710 {
711 	enable_preemption_no_check();
712 }
713 
714 static inline void
mp_disable_preemption(void)715 mp_disable_preemption(void)
716 {
717 	disable_preemption_internal();
718 }
719 
720 static inline void
_mp_disable_preemption(void)721 _mp_disable_preemption(void)
722 {
723 	disable_preemption_internal();
724 }
725 
726 static inline void
mp_enable_preemption(void)727 mp_enable_preemption(void)
728 {
729 	enable_preemption_internal();
730 }
731 
732 static inline void
_mp_enable_preemption(void)733 _mp_enable_preemption(void)
734 {
735 	enable_preemption_internal();
736 }
737 
738 static inline void
mp_enable_preemption_no_check(void)739 mp_enable_preemption_no_check(void)
740 {
741 	enable_preemption_no_check();
742 }
743 
744 static inline void
_mp_enable_preemption_no_check(void)745 _mp_enable_preemption_no_check(void)
746 {
747 	enable_preemption_no_check();
748 }
749 
750 #ifdef XNU_KERNEL_PRIVATE
751 #define disable_preemption() disable_preemption_internal()
752 #define disable_preemption_without_measurements() disable_preemption_internal()
753 #define enable_preemption() enable_preemption_internal()
754 #define MACHINE_PREEMPTION_MACROS (1)
755 #endif
756 
757 static inline cpu_data_t *
cpu_datap(int cpu)758 cpu_datap(int cpu)
759 {
760 	return cpu_data_ptr[cpu];
761 }
762 
763 static inline int
cpu_is_running(int cpu)764 cpu_is_running(int cpu)
765 {
766 	return (cpu_datap(cpu) != NULL) && (cpu_datap(cpu)->cpu_running);
767 }
768 
769 #ifdef MACH_KERNEL_PRIVATE
770 static inline cpu_data_t *
cpu_shadowp(int cpu)771 cpu_shadowp(int cpu)
772 {
773 	return cpu_data_ptr[cpu]->cd_shadow;
774 }
775 
776 #endif
777 extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu);
778 extern void cpu_data_realloc(void);
779 
780 #endif  /* I386_CPU_DATA */
781