xref: /xnu-10002.61.3/osfmk/kperf/callstack.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2011-2022 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* Collect kernel callstacks */
30 
31 #include <mach/mach_types.h>
32 #include <kern/thread.h>
33 #include <kern/backtrace.h>
34 #include <kern/cambria_layout.h>
35 #include <vm/vm_map.h>
36 #include <kperf/buffer.h>
37 #include <kperf/context.h>
38 #include <kperf/callstack.h>
39 #include <kperf/ast.h>
40 #include <sys/errno.h>
41 
42 #if defined(__arm64__)
43 #include <arm/cpu_data.h>
44 #include <arm/cpu_data_internal.h>
45 #endif
46 
47 static void
callstack_fixup_user(struct kp_ucallstack * cs,thread_t thread)48 callstack_fixup_user(struct kp_ucallstack *cs, thread_t thread)
49 {
50 	uint64_t fixup_val = 0;
51 	assert(cs->kpuc_nframes < MAX_UCALLSTACK_FRAMES);
52 
53 #if defined(__x86_64__)
54 	user_addr_t sp_user;
55 	bool user_64;
56 	x86_saved_state_t *state;
57 
58 	state = get_user_regs(thread);
59 	if (!state) {
60 		goto out;
61 	}
62 
63 	user_64 = is_saved_state64(state);
64 	if (user_64) {
65 		sp_user = saved_state64(state)->isf.rsp;
66 	} else {
67 		sp_user = saved_state32(state)->uesp;
68 	}
69 
70 	if (thread == current_thread()) {
71 		(void)copyin(sp_user, (char *)&fixup_val,
72 		    user_64 ? sizeof(uint64_t) : sizeof(uint32_t));
73 	} else {
74 		(void)vm_map_read_user(get_task_map(get_threadtask(thread)), sp_user,
75 		    &fixup_val, user_64 ? sizeof(uint64_t) : sizeof(uint32_t));
76 	}
77 
78 #elif defined(__arm64__)
79 
80 	struct arm_saved_state *state = get_user_regs(thread);
81 	if (!state) {
82 		goto out;
83 	}
84 
85 	/* encode thumb mode into low bit of PC */
86 	if (is_saved_state32(state) && (get_saved_state_cpsr(state) & PSR_TF)) {
87 		cs->kpuc_frames[0] |= 1ULL;
88 	}
89 
90 
91 	fixup_val = get_saved_state_lr(state);
92 
93 #else
94 #error "callstack_fixup_user: unsupported architecture"
95 #endif
96 
97 out:
98 	cs->kpuc_frames[cs->kpuc_nframes++] = fixup_val;
99 }
100 
101 #if defined(__x86_64__)
102 
103 __attribute__((used))
104 static kern_return_t
interrupted_kernel_sp_value(uintptr_t * sp_val)105 interrupted_kernel_sp_value(uintptr_t *sp_val)
106 {
107 	x86_saved_state_t *state;
108 	uintptr_t sp;
109 	bool state_64;
110 	uint64_t cs;
111 	uintptr_t top, bottom;
112 
113 	state = current_cpu_datap()->cpu_int_state;
114 	if (!state) {
115 		return KERN_FAILURE;
116 	}
117 
118 	state_64 = is_saved_state64(state);
119 
120 	if (state_64) {
121 		cs = saved_state64(state)->isf.cs;
122 	} else {
123 		cs = saved_state32(state)->cs;
124 	}
125 	/* return early if interrupted a thread in user space */
126 	if ((cs & SEL_PL) == SEL_PL_U) {
127 		return KERN_FAILURE;
128 	}
129 
130 	if (state_64) {
131 		sp = saved_state64(state)->isf.rsp;
132 	} else {
133 		sp = saved_state32(state)->uesp;
134 	}
135 
136 	/* make sure the stack pointer is pointing somewhere in this stack */
137 	bottom = current_thread()->kernel_stack;
138 	top = bottom + kernel_stack_size;
139 	if (sp >= bottom && sp < top) {
140 		return KERN_FAILURE;
141 	}
142 
143 	*sp_val = *(uintptr_t *)sp;
144 	return KERN_SUCCESS;
145 }
146 
147 #elif defined(__arm64__)
148 
149 __attribute__((used))
150 static kern_return_t
interrupted_kernel_lr(uintptr_t * lr)151 interrupted_kernel_lr(uintptr_t *lr)
152 {
153 	struct arm_saved_state *state;
154 
155 	state = getCpuDatap()->cpu_int_state;
156 
157 	/* return early if interrupted a thread in user space */
158 	if (PSR64_IS_USER(get_saved_state_cpsr(state))) {
159 		return KERN_FAILURE;
160 	}
161 
162 	*lr = get_saved_state_lr(state);
163 	return KERN_SUCCESS;
164 }
165 #else /* defined(__arm64__) */
166 #error "interrupted_kernel_{sp,lr}: unsupported architecture"
167 #endif /* !defined(__arm64__) */
168 
169 
170 static void
callstack_fixup_interrupted(struct kp_kcallstack * cs)171 callstack_fixup_interrupted(struct kp_kcallstack *cs)
172 {
173 	uintptr_t fixup_val = 0;
174 	assert(cs->kpkc_nframes < MAX_KCALLSTACK_FRAMES);
175 
176 	/*
177 	 * Only provide arbitrary data on development or debug kernels.
178 	 */
179 #if DEVELOPMENT || DEBUG
180 #if defined(__x86_64__)
181 	(void)interrupted_kernel_sp_value(&fixup_val);
182 #elif defined(__arm64__)
183 	(void)interrupted_kernel_lr(&fixup_val);
184 #endif /* defined(__x86_64__) */
185 #endif /* DEVELOPMENT || DEBUG */
186 
187 	assert(cs->kpkc_flags & CALLSTACK_KERNEL);
188 	cs->kpkc_frames[cs->kpkc_nframes++] = fixup_val;
189 }
190 
191 void
kperf_continuation_sample(struct kp_kcallstack * cs,struct kperf_context * context)192 kperf_continuation_sample(struct kp_kcallstack *cs, struct kperf_context *context)
193 {
194 	thread_t thread;
195 
196 	assert(cs != NULL);
197 	assert(context != NULL);
198 
199 	thread = context->cur_thread;
200 	assert(thread != NULL);
201 	assert(thread->continuation != NULL);
202 
203 	cs->kpkc_flags = CALLSTACK_CONTINUATION | CALLSTACK_VALID | CALLSTACK_KERNEL;
204 #ifdef __LP64__
205 	cs->kpkc_flags |= CALLSTACK_64BIT;
206 #endif
207 
208 	cs->kpkc_nframes = 1;
209 	cs->kpkc_frames[0] = VM_KERNEL_UNSLIDE(thread->continuation);
210 }
211 
212 void
kperf_backtrace_sample(struct kp_kcallstack * cs,struct kperf_context * context)213 kperf_backtrace_sample(struct kp_kcallstack *cs, struct kperf_context *context)
214 {
215 	assert(cs != NULL);
216 	assert(context != NULL);
217 	assert(context->cur_thread == current_thread());
218 
219 	cs->kpkc_flags = CALLSTACK_KERNEL | CALLSTACK_KERNEL_WORDS;
220 #ifdef __LP64__
221 	cs->kpkc_flags |= CALLSTACK_64BIT;
222 #endif
223 
224 	BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_START, 1);
225 
226 	backtrace_info_t btinfo = BTI_NONE;
227 	struct backtrace_control ctl = {
228 		.btc_frame_addr = (uintptr_t)context->starting_fp,
229 	};
230 	cs->kpkc_nframes = backtrace(cs->kpkc_word_frames, cs->kpkc_nframes - 1,
231 	    &ctl, &btinfo);
232 	if (cs->kpkc_nframes > 0) {
233 		cs->kpkc_flags |= CALLSTACK_VALID;
234 		/*
235 		 * Fake the value pointed to by the stack pointer or the link
236 		 * register for symbolicators.
237 		 */
238 		cs->kpkc_word_frames[cs->kpkc_nframes + 1] = 0;
239 		cs->kpkc_nframes += 1;
240 	}
241 	if ((btinfo & BTI_TRUNCATED)) {
242 		cs->kpkc_flags |= CALLSTACK_TRUNCATED;
243 	}
244 
245 	BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_END, cs->kpkc_nframes);
246 }
247 
248 kern_return_t chudxnu_thread_get_callstack64_kperf(thread_t thread,
249     uint64_t *callStack, mach_msg_type_number_t *count,
250     boolean_t user_only);
251 
252 void
kperf_kcallstack_sample(struct kp_kcallstack * cs,struct kperf_context * context)253 kperf_kcallstack_sample(struct kp_kcallstack *cs, struct kperf_context *context)
254 {
255 	thread_t thread;
256 
257 	assert(cs != NULL);
258 	assert(context != NULL);
259 	assert(cs->kpkc_nframes <= MAX_KCALLSTACK_FRAMES);
260 
261 	thread = context->cur_thread;
262 	assert(thread != NULL);
263 
264 	BUF_INFO(PERF_CS_KSAMPLE | DBG_FUNC_START, (uintptr_t)thread_tid(thread),
265 	    cs->kpkc_nframes);
266 
267 	cs->kpkc_flags = CALLSTACK_KERNEL;
268 #ifdef __LP64__
269 	cs->kpkc_flags |= CALLSTACK_64BIT;
270 #endif
271 
272 	if (ml_at_interrupt_context()) {
273 		assert(thread == current_thread());
274 		cs->kpkc_flags |= CALLSTACK_KERNEL_WORDS;
275 		backtrace_info_t btinfo = BTI_NONE;
276 		struct backtrace_control ctl = { .btc_flags = BTF_KERN_INTERRUPTED, };
277 		cs->kpkc_nframes = backtrace(cs->kpkc_word_frames, cs->kpkc_nframes - 1,
278 		    &ctl, &btinfo);
279 		if (cs->kpkc_nframes != 0) {
280 			callstack_fixup_interrupted(cs);
281 		}
282 		if ((btinfo & BTI_TRUNCATED)) {
283 			cs->kpkc_flags |= CALLSTACK_TRUNCATED;
284 		}
285 	} else {
286 		/*
287 		 * Rely on legacy CHUD backtracer to backtrace kernel stacks on
288 		 * other threads.
289 		 */
290 		kern_return_t kr;
291 		kr = chudxnu_thread_get_callstack64_kperf(thread,
292 		    cs->kpkc_frames, &cs->kpkc_nframes, FALSE);
293 		if (kr == KERN_SUCCESS) {
294 			cs->kpkc_flags |= CALLSTACK_VALID;
295 		} else if (kr == KERN_RESOURCE_SHORTAGE) {
296 			cs->kpkc_flags |= CALLSTACK_VALID;
297 			cs->kpkc_flags |= CALLSTACK_TRUNCATED;
298 		} else {
299 			cs->kpkc_nframes = 0;
300 		}
301 	}
302 
303 	if (!(cs->kpkc_flags & CALLSTACK_VALID)) {
304 		BUF_INFO(PERF_CS_ERROR, ERR_GETSTACK);
305 	}
306 
307 	BUF_INFO(PERF_CS_KSAMPLE | DBG_FUNC_END, (uintptr_t)thread_tid(thread),
308 	    cs->kpkc_flags, cs->kpkc_nframes);
309 }
310 
311 void
kperf_ucallstack_sample(struct kp_ucallstack * cs,struct kperf_context * context)312 kperf_ucallstack_sample(struct kp_ucallstack *cs, struct kperf_context *context)
313 {
314 	assert(ml_get_interrupts_enabled() == TRUE);
315 
316 	thread_t thread = context->cur_thread;
317 	assert(thread != NULL);
318 
319 	BUF_INFO(PERF_CS_USAMPLE | DBG_FUNC_START,
320 	    (uintptr_t)thread_tid(thread), cs->kpuc_nframes);
321 
322 	struct backtrace_user_info btinfo = BTUINFO_INIT;
323 	/*
324 	 * Leave space for the fixup information.
325 	 */
326 	unsigned int maxnframes = cs->kpuc_nframes - 1;
327 	struct backtrace_control ctl = { .btc_user_thread = thread, };
328 	unsigned int nframes = backtrace_user(cs->kpuc_frames, maxnframes, &ctl,
329 	    &btinfo);
330 	cs->kpuc_nframes = MIN(maxnframes, nframes);
331 
332 	cs->kpuc_flags |= CALLSTACK_KERNEL_WORDS |
333 	    ((btinfo.btui_info & BTI_TRUNCATED) ? CALLSTACK_TRUNCATED : 0) |
334 	    ((btinfo.btui_info & BTI_64_BIT) ? CALLSTACK_64BIT : 0);
335 
336 	/*
337 	 * Ignore EFAULT to get as much of the stack as possible.
338 	 */
339 	if (btinfo.btui_error == 0 || btinfo.btui_error == EFAULT) {
340 		callstack_fixup_user(cs, thread);
341 		cs->kpuc_flags |= CALLSTACK_VALID;
342 
343 		if (cs->kpuc_nframes < maxnframes &&
344 		    btinfo.btui_async_frame_addr != 0) {
345 			cs->kpuc_async_index = btinfo.btui_async_start_index;
346 			ctl.btc_frame_addr = btinfo.btui_async_frame_addr;
347 			ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
348 			maxnframes -= cs->kpuc_nframes;
349 			btinfo = BTUINFO_INIT;
350 			unsigned int nasync_frames = backtrace_user(
351 			    &cs->kpuc_frames[cs->kpuc_nframes], maxnframes, &ctl, &btinfo);
352 			if (btinfo.btui_info & BTI_TRUNCATED) {
353 				cs->kpuc_flags |= CALLSTACK_TRUNCATED;
354 			}
355 			if (btinfo.btui_error == 0 || btinfo.btui_error == EFAULT) {
356 				cs->kpuc_flags |= CALLSTACK_HAS_ASYNC;
357 				cs->kpuc_async_nframes = nasync_frames;
358 			}
359 		}
360 	} else {
361 		cs->kpuc_nframes = 0;
362 		BUF_INFO(PERF_CS_ERROR, ERR_GETSTACK, btinfo.btui_error);
363 	}
364 
365 	BUF_INFO(PERF_CS_USAMPLE | DBG_FUNC_END, (uintptr_t)thread_tid(thread),
366 	    cs->kpuc_flags, cs->kpuc_nframes);
367 }
368 
369 static inline uintptr_t
scrub_word(uintptr_t * bt,int n_frames,int frame,bool kern)370 scrub_word(uintptr_t *bt, int n_frames, int frame, bool kern)
371 {
372 	if (frame < n_frames) {
373 		if (kern) {
374 			return VM_KERNEL_UNSLIDE(bt[frame]);
375 		} else {
376 			return bt[frame];
377 		}
378 	} else {
379 		return 0;
380 	}
381 }
382 
383 static inline uintptr_t
scrub_frame(uint64_t * bt,int n_frames,int frame)384 scrub_frame(uint64_t *bt, int n_frames, int frame)
385 {
386 	if (frame < n_frames) {
387 		return (uintptr_t)(bt[frame]);
388 	} else {
389 		return 0;
390 	}
391 }
392 
393 static void
callstack_log(uint32_t hdrid,uint32_t dataid,void * vframes,unsigned int nframes,unsigned int flags,unsigned int async_index,unsigned int async_nframes)394 callstack_log(uint32_t hdrid, uint32_t dataid, void *vframes,
395     unsigned int nframes, unsigned int flags, unsigned int async_index,
396     unsigned int async_nframes)
397 {
398 	BUF_VERB(PERF_CS_LOG | DBG_FUNC_START, flags, nframes);
399 	BUF_DATA(hdrid, flags, nframes - async_nframes, async_index, async_nframes);
400 
401 	unsigned int nevts = nframes / 4;
402 	unsigned int ovf = nframes % 4;
403 	if (ovf != 0) {
404 		nevts++;
405 	}
406 
407 	bool kern = flags & CALLSTACK_KERNEL;
408 
409 	if (flags & CALLSTACK_KERNEL_WORDS) {
410 		uintptr_t *frames = vframes;
411 		for (unsigned int i = 0; i < nevts; i++) {
412 			unsigned int j = i * 4;
413 			BUF_DATA(dataid,
414 			    scrub_word(frames, nframes, j + 0, kern),
415 			    scrub_word(frames, nframes, j + 1, kern),
416 			    scrub_word(frames, nframes, j + 2, kern),
417 			    scrub_word(frames, nframes, j + 3, kern));
418 		}
419 	} else {
420 		for (unsigned int i = 0; i < nevts; i++) {
421 			uint64_t *frames = vframes;
422 			unsigned int j = i * 4;
423 			BUF_DATA(dataid,
424 			    scrub_frame(frames, nframes, j + 0),
425 			    scrub_frame(frames, nframes, j + 1),
426 			    scrub_frame(frames, nframes, j + 2),
427 			    scrub_frame(frames, nframes, j + 3));
428 		}
429 	}
430 
431 	BUF_VERB(PERF_CS_LOG | DBG_FUNC_END, flags, nframes);
432 }
433 
434 void
kperf_kcallstack_log(struct kp_kcallstack * cs)435 kperf_kcallstack_log(struct kp_kcallstack *cs)
436 {
437 	callstack_log(PERF_CS_KHDR, PERF_CS_KDATA, cs->kpkc_frames,
438 	    cs->kpkc_nframes, cs->kpkc_flags, 0, 0);
439 }
440 
441 void
kperf_ucallstack_log(struct kp_ucallstack * cs)442 kperf_ucallstack_log(struct kp_ucallstack *cs)
443 {
444 	callstack_log(PERF_CS_UHDR, PERF_CS_UDATA, cs->kpuc_frames,
445 	    cs->kpuc_nframes + cs->kpuc_async_nframes, cs->kpuc_flags,
446 	    cs->kpuc_async_index, cs->kpuc_async_nframes);
447 }
448 
449 int
kperf_ucallstack_pend(struct kperf_context * context,uint32_t depth,unsigned int actionid)450 kperf_ucallstack_pend(struct kperf_context * context, uint32_t depth,
451     unsigned int actionid)
452 {
453 	if (depth < 2) {
454 		panic("HUH");
455 	}
456 	kperf_ast_set_callstack_depth(context->cur_thread, depth);
457 	return kperf_ast_pend(context->cur_thread, T_KPERF_AST_CALLSTACK,
458 	           actionid);
459 }
460 
461 static kern_return_t
chudxnu_kern_read(void * dstaddr,vm_offset_t srcaddr,vm_size_t size)462 chudxnu_kern_read(void *dstaddr, vm_offset_t srcaddr, vm_size_t size)
463 {
464 	return (ml_nofault_copy(srcaddr, (vm_offset_t)dstaddr, size) == size) ?
465 	       KERN_SUCCESS : KERN_FAILURE;
466 }
467 
468 static kern_return_t
chudxnu_task_read(task_t task,void * kernaddr,uint64_t usraddr,vm_size_t size)469 chudxnu_task_read(
470 	task_t      task,
471 	void        *kernaddr,
472 	uint64_t    usraddr,
473 	vm_size_t   size)
474 {
475 	//ppc version ported to arm
476 	kern_return_t ret = KERN_SUCCESS;
477 
478 	if (ml_at_interrupt_context()) {
479 		return KERN_FAILURE;    // can't look at tasks on interrupt stack
480 	}
481 
482 	if (current_task() == task) {
483 		if (copyin(usraddr, kernaddr, size)) {
484 			ret = KERN_FAILURE;
485 		}
486 	} else {
487 		vm_map_t map = get_task_map(task);
488 		ret = vm_map_read_user(map, usraddr, kernaddr, size);
489 	}
490 
491 	return ret;
492 }
493 
494 static inline uint64_t
chudxnu_vm_unslide(uint64_t ptr,int kaddr)495 chudxnu_vm_unslide( uint64_t ptr, int kaddr )
496 {
497 	if (!kaddr) {
498 		return ptr;
499 	}
500 
501 	return VM_KERNEL_UNSLIDE(ptr);
502 }
503 
504 #if __arm64__
505 
506 #if defined(HAS_APPLE_PAC)
507 #include <ptrauth.h>
508 #endif
509 
510 // chudxnu_thread_get_callstack gathers a raw callstack along with any information needed to
511 // fix it up later (in case we stopped program as it was saving values into prev stack frame, etc.)
512 // after sampling has finished.
513 //
514 // For an N-entry callstack:
515 //
516 // [0]      current pc
517 // [1..N-3] stack frames (including current one)
518 // [N-2]    current LR (return value if we're in a leaf function)
519 // [N-1]    current r0 (in case we've saved LR in r0) (optional)
520 //
521 //
522 #define CS_FLAG_EXTRASP  1  // capture extra sp register
523 
524 static kern_return_t
chudxnu_thread_get_callstack64_internal(thread_t thread,uint64_t * callStack,mach_msg_type_number_t * count,boolean_t user_only,int flags)525 chudxnu_thread_get_callstack64_internal(
526 	thread_t                thread,
527 	uint64_t                *callStack,
528 	mach_msg_type_number_t  *count,
529 	boolean_t               user_only,
530 	int flags)
531 {
532 	kern_return_t   kr = KERN_SUCCESS;
533 	task_t                  task;
534 	uint64_t                currPC = 0ULL, currLR = 0ULL, currSP = 0ULL;
535 	uint64_t                prevPC = 0ULL;
536 	uint64_t                kernStackMin = thread->kernel_stack;
537 	uint64_t                kernStackMax = kernStackMin + kernel_stack_size;
538 	uint64_t       *buffer = callStack;
539 	int             bufferIndex = 0;
540 	int             bufferMaxIndex = 0;
541 	boolean_t       kernel = FALSE;
542 	struct arm_saved_state *sstate = NULL;
543 	uint64_t                pc = 0ULL;
544 
545 	task = get_threadtask(thread);
546 	bufferMaxIndex = *count;
547 	//get thread state
548 	if (user_only) {
549 		sstate = find_user_regs(thread);
550 	} else {
551 		sstate = find_kern_regs(thread);
552 	}
553 
554 	if (!sstate) {
555 		*count = 0;
556 		return KERN_FAILURE;
557 	}
558 
559 	if (is_saved_state64(sstate)) {
560 		struct arm_saved_state64 *state = NULL;
561 		uint64_t *fp = NULL, *nextFramePointer = NULL, *topfp = NULL;
562 		uint64_t frame[2];
563 
564 		state = saved_state64(sstate);
565 
566 		/* make sure it is safe to dereference before you do it */
567 		kernel = PSR64_IS_KERNEL(state->cpsr);
568 
569 		/* can't take a kernel callstack if we've got a user frame */
570 		if (!user_only && !kernel) {
571 			return KERN_FAILURE;
572 		}
573 
574 		/*
575 		 * Reserve space for saving LR (and sometimes SP) at the end of the
576 		 * backtrace.
577 		 */
578 		if (flags & CS_FLAG_EXTRASP) {
579 			bufferMaxIndex -= 2;
580 		} else {
581 			bufferMaxIndex -= 1;
582 		}
583 
584 		if (bufferMaxIndex < 2) {
585 			*count = 0;
586 			return KERN_RESOURCE_SHORTAGE;
587 		}
588 
589 		currPC = state->pc;
590 		currLR = state->lr;
591 		currSP = state->sp;
592 
593 		fp = (uint64_t *)state->fp; /* frame pointer */
594 #if defined(HAS_APPLE_PAC)
595 		/* frame pointers on stack will be signed by arm64e ABI */
596 		fp = ptrauth_strip(fp, ptrauth_key_frame_pointer);
597 #endif
598 		topfp = fp;
599 
600 		bufferIndex = 0;  // start with a stack of size zero
601 		buffer[bufferIndex++] = chudxnu_vm_unslide(currPC, kernel); // save PC in position 0.
602 
603 		BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_START, kernel, 0);
604 
605 		// Now, fill buffer with stack backtraces.
606 		while (bufferIndex < bufferMaxIndex) {
607 			pc = 0ULL;
608 			/*
609 			 * Below the frame pointer, the following values are saved:
610 			 * -> FP
611 			 */
612 
613 			/*
614 			 * Note that we read the pc even for the first stack frame
615 			 * (which, in theory, is always empty because the callee fills
616 			 * it in just before it lowers the stack.  However, if we
617 			 * catch the program in between filling in the return address
618 			 * and lowering the stack, we want to still have a valid
619 			 * backtrace. FixupStack correctly disregards this value if
620 			 * necessary.
621 			 */
622 
623 			if ((uint64_t)fp == 0 || ((uint64_t)fp & 0x3) != 0) {
624 				/* frame pointer is invalid - stop backtracing */
625 				pc = 0ULL;
626 				break;
627 			}
628 
629 			if (kernel) {
630 				if (((uint64_t)fp > kernStackMax) ||
631 				    ((uint64_t)fp < kernStackMin)) {
632 					kr = KERN_FAILURE;
633 				} else {
634 					kr = chudxnu_kern_read(&frame,
635 					    (vm_offset_t)fp,
636 					    (vm_size_t)sizeof(frame));
637 					if (kr == KERN_SUCCESS) {
638 #if defined(HAS_APPLE_PAC)
639 						/* return addresses on stack will be signed by arm64e ABI */
640 						pc = (uint64_t)ptrauth_strip((void *)frame[1], ptrauth_key_return_address);
641 #else
642 						pc = frame[1];
643 #endif
644 						nextFramePointer = (uint64_t *)frame[0];
645 #if defined(HAS_APPLE_PAC)
646 						/* frame pointers on stack will be signed by arm64e ABI */
647 						nextFramePointer = ptrauth_strip(nextFramePointer, ptrauth_key_frame_pointer);
648 #endif
649 					} else {
650 						pc = 0ULL;
651 						nextFramePointer = 0ULL;
652 						kr = KERN_FAILURE;
653 					}
654 				}
655 			} else {
656 				kr = chudxnu_task_read(task,
657 				    &frame,
658 				    (vm_offset_t)fp,
659 				    (vm_size_t)sizeof(frame));
660 				if (kr == KERN_SUCCESS) {
661 #if defined(HAS_APPLE_PAC)
662 					/* return addresses on stack will be signed by arm64e ABI */
663 					pc = (uint64_t)ptrauth_strip((void *)frame[1], ptrauth_key_return_address);
664 #else
665 					pc = frame[1];
666 #endif
667 					nextFramePointer = (uint64_t *)(frame[0]);
668 #if defined(HAS_APPLE_PAC)
669 					/* frame pointers on stack will be signed by arm64e ABI */
670 					nextFramePointer = ptrauth_strip(nextFramePointer, ptrauth_key_frame_pointer);
671 #endif
672 				} else {
673 					pc = 0ULL;
674 					nextFramePointer = 0ULL;
675 					kr = KERN_FAILURE;
676 				}
677 			}
678 
679 			if (kr != KERN_SUCCESS) {
680 				pc = 0ULL;
681 				break;
682 			}
683 
684 			if (nextFramePointer) {
685 				buffer[bufferIndex++] = chudxnu_vm_unslide(pc, kernel);
686 				prevPC = pc;
687 			}
688 
689 			if (nextFramePointer < fp) {
690 				break;
691 			} else {
692 				fp = nextFramePointer;
693 			}
694 		}
695 
696 		BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_END, bufferIndex);
697 
698 		if (bufferIndex >= bufferMaxIndex) {
699 			bufferIndex = bufferMaxIndex;
700 			kr = KERN_RESOURCE_SHORTAGE;
701 		} else {
702 			kr = KERN_SUCCESS;
703 		}
704 
705 		// Save link register and SP at bottom of stack (used for later fixup).
706 		buffer[bufferIndex++] = chudxnu_vm_unslide(currLR, kernel);
707 		if (flags & CS_FLAG_EXTRASP) {
708 			buffer[bufferIndex++] = chudxnu_vm_unslide(currSP, kernel);
709 		}
710 	} else {
711 		struct arm_saved_state32 *state = NULL;
712 		uint32_t *fp = NULL, *nextFramePointer = NULL, *topfp = NULL;
713 
714 		/* 64-bit kernel stacks, 32-bit user stacks */
715 		uint64_t frame[2];
716 		uint32_t frame32[2];
717 
718 		state = saved_state32(sstate);
719 
720 		/* make sure it is safe to dereference before you do it */
721 		kernel = PSR_IS_KERNEL(state->cpsr);
722 
723 		/* can't take a kernel callstack if we've got a user frame */
724 		if (!user_only && !kernel) {
725 			return KERN_FAILURE;
726 		}
727 
728 		/*
729 		 * Reserve space for saving LR (and sometimes SP) at the end of the
730 		 * backtrace.
731 		 */
732 		if (flags & CS_FLAG_EXTRASP) {
733 			bufferMaxIndex -= 2;
734 		} else {
735 			bufferMaxIndex -= 1;
736 		}
737 
738 		if (bufferMaxIndex < 2) {
739 			*count = 0;
740 			return KERN_RESOURCE_SHORTAGE;
741 		}
742 
743 		currPC = (uint64_t)state->pc; /* r15 */
744 		if (state->cpsr & PSR_TF) {
745 			currPC |= 1ULL; /* encode thumb mode into low bit of PC */
746 		}
747 		currLR = (uint64_t)state->lr; /* r14 */
748 		currSP = (uint64_t)state->sp; /* r13 */
749 
750 		fp = (uint32_t *)(uintptr_t)state->r[7]; /* frame pointer */
751 		topfp = fp;
752 
753 		bufferIndex = 0;  // start with a stack of size zero
754 		buffer[bufferIndex++] = chudxnu_vm_unslide(currPC, kernel); // save PC in position 0.
755 
756 		BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_START, kernel, 1);
757 
758 		// Now, fill buffer with stack backtraces.
759 		while (bufferIndex < bufferMaxIndex) {
760 			pc = 0ULL;
761 			/*
762 			 * Below the frame pointer, the following values are saved:
763 			 * -> FP
764 			 */
765 
766 			/*
767 			 * Note that we read the pc even for the first stack frame
768 			 * (which, in theory, is always empty because the callee fills
769 			 * it in just before it lowers the stack.  However, if we
770 			 * catch the program in between filling in the return address
771 			 * and lowering the stack, we want to still have a valid
772 			 * backtrace. FixupStack correctly disregards this value if
773 			 * necessary.
774 			 */
775 
776 			if ((uint32_t)fp == 0 || ((uint32_t)fp & 0x3) != 0) {
777 				/* frame pointer is invalid - stop backtracing */
778 				pc = 0ULL;
779 				break;
780 			}
781 
782 			if (kernel) {
783 				if (((uint32_t)fp > kernStackMax) ||
784 				    ((uint32_t)fp < kernStackMin)) {
785 					kr = KERN_FAILURE;
786 				} else {
787 					kr = chudxnu_kern_read(&frame,
788 					    (vm_offset_t)fp,
789 					    (vm_size_t)sizeof(frame));
790 					if (kr == KERN_SUCCESS) {
791 						pc = (uint64_t)frame[1];
792 						nextFramePointer = (uint32_t *) (frame[0]);
793 					} else {
794 						pc = 0ULL;
795 						nextFramePointer = 0ULL;
796 						kr = KERN_FAILURE;
797 					}
798 				}
799 			} else {
800 				kr = chudxnu_task_read(task,
801 				    &frame32,
802 				    (((uint64_t)(uint32_t)fp) & 0x00000000FFFFFFFFULL),
803 				    sizeof(frame32));
804 				if (kr == KERN_SUCCESS) {
805 					pc = (uint64_t)frame32[1];
806 					nextFramePointer = (uint32_t *)(uintptr_t)(frame32[0]);
807 				} else {
808 					pc = 0ULL;
809 					nextFramePointer = 0ULL;
810 					kr = KERN_FAILURE;
811 				}
812 			}
813 
814 			if (kr != KERN_SUCCESS) {
815 				pc = 0ULL;
816 				break;
817 			}
818 
819 			if (nextFramePointer) {
820 				buffer[bufferIndex++] = chudxnu_vm_unslide(pc, kernel);
821 				prevPC = pc;
822 			}
823 
824 			if (nextFramePointer < fp) {
825 				break;
826 			} else {
827 				fp = nextFramePointer;
828 			}
829 		}
830 
831 		BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_END, bufferIndex);
832 
833 		/* clamp callstack size to max */
834 		if (bufferIndex >= bufferMaxIndex) {
835 			bufferIndex = bufferMaxIndex;
836 			kr = KERN_RESOURCE_SHORTAGE;
837 		} else {
838 			/* ignore all other failures */
839 			kr = KERN_SUCCESS;
840 		}
841 
842 		// Save link register and R13 (sp) at bottom of stack (used for later fixup).
843 		buffer[bufferIndex++] = chudxnu_vm_unslide(currLR, kernel);
844 		if (flags & CS_FLAG_EXTRASP) {
845 			buffer[bufferIndex++] = chudxnu_vm_unslide(currSP, kernel);
846 		}
847 	}
848 
849 	*count = bufferIndex;
850 	return kr;
851 }
852 
853 kern_return_t
chudxnu_thread_get_callstack64_kperf(thread_t thread,uint64_t * callStack,mach_msg_type_number_t * count,boolean_t user_only)854 chudxnu_thread_get_callstack64_kperf(
855 	thread_t                thread,
856 	uint64_t                *callStack,
857 	mach_msg_type_number_t  *count,
858 	boolean_t               user_only)
859 {
860 	return chudxnu_thread_get_callstack64_internal( thread, callStack, count, user_only, 0 );
861 }
862 #elif __x86_64__
863 
864 #define VALID_STACK_ADDRESS(supervisor, addr, minKernAddr, maxKernAddr)   (supervisor ? (addr>=minKernAddr && addr<=maxKernAddr) : TRUE)
865 // don't try to read in the hole
866 #define VALID_STACK_ADDRESS64(supervisor, addr, minKernAddr, maxKernAddr) \
867 (supervisor ? ((uint64_t)addr >= minKernAddr && (uint64_t)addr <= maxKernAddr) : \
868 ((uint64_t)addr != 0ULL && ((uint64_t)addr <= 0x00007FFFFFFFFFFFULL || (uint64_t)addr >= 0xFFFF800000000000ULL)))
869 
870 typedef struct _cframe64_t {
871 	uint64_t        prevFP;         // can't use a real pointer here until we're a 64 bit kernel
872 	uint64_t        caller;
873 	uint64_t        args[0];
874 }cframe64_t;
875 
876 
877 typedef struct _cframe_t {
878 	uint32_t                prev;   // this is really a user32-space pointer to the previous frame
879 	uint32_t                caller;
880 	uint32_t                args[0];
881 } cframe_t;
882 
883 extern void * find_user_regs(thread_t);
884 extern x86_saved_state32_t *find_kern_regs(thread_t);
885 
886 static kern_return_t
do_kernel_backtrace(thread_t thread,struct x86_kernel_state * regs,uint64_t * frames,mach_msg_type_number_t * start_idx,mach_msg_type_number_t max_idx)887 do_kernel_backtrace(
888 	thread_t thread,
889 	struct x86_kernel_state *regs,
890 	uint64_t *frames,
891 	mach_msg_type_number_t *start_idx,
892 	mach_msg_type_number_t max_idx)
893 {
894 	uint64_t kernStackMin = (uint64_t)thread->kernel_stack;
895 	uint64_t kernStackMax = (uint64_t)kernStackMin + kernel_stack_size;
896 	mach_msg_type_number_t ct = *start_idx;
897 	kern_return_t kr = KERN_FAILURE;
898 
899 #if __LP64__
900 	uint64_t currPC = 0ULL;
901 	uint64_t currFP = 0ULL;
902 	uint64_t prevPC = 0ULL;
903 	uint64_t prevFP = 0ULL;
904 	if (KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(regs->k_rip), sizeof(uint64_t))) {
905 		return KERN_FAILURE;
906 	}
907 	if (KERN_SUCCESS != chudxnu_kern_read(&currFP, (vm_offset_t)&(regs->k_rbp), sizeof(uint64_t))) {
908 		return KERN_FAILURE;
909 	}
910 #else
911 	uint32_t currPC = 0U;
912 	uint32_t currFP = 0U;
913 	uint32_t prevPC = 0U;
914 	uint32_t prevFP = 0U;
915 	if (KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(regs->k_eip), sizeof(uint32_t))) {
916 		return KERN_FAILURE;
917 	}
918 	if (KERN_SUCCESS != chudxnu_kern_read(&currFP, (vm_offset_t)&(regs->k_ebp), sizeof(uint32_t))) {
919 		return KERN_FAILURE;
920 	}
921 #endif
922 
923 	if (*start_idx >= max_idx) {
924 		return KERN_RESOURCE_SHORTAGE;  // no frames traced
925 	}
926 	if (!currPC) {
927 		return KERN_FAILURE;
928 	}
929 
930 	frames[ct++] = chudxnu_vm_unslide((uint64_t)currPC, 1);
931 
932 	// build a backtrace of this kernel state
933 #if __LP64__
934 	while (VALID_STACK_ADDRESS64(TRUE, currFP, kernStackMin, kernStackMax)) {
935 		// this is the address where caller lives in the user thread
936 		uint64_t caller = currFP + sizeof(uint64_t);
937 #else
938 	while (VALID_STACK_ADDRESS(TRUE, currFP, kernStackMin, kernStackMax)) {
939 		uint32_t caller = (uint32_t)currFP + sizeof(uint32_t);
940 #endif
941 
942 		if (!currFP || !currPC) {
943 			currPC = 0;
944 			break;
945 		}
946 
947 		if (ct >= max_idx) {
948 			*start_idx = ct;
949 			return KERN_RESOURCE_SHORTAGE;
950 		}
951 
952 		/* read our caller */
953 		kr = chudxnu_kern_read(&currPC, (vm_offset_t)caller, sizeof(currPC));
954 
955 		if (kr != KERN_SUCCESS || !currPC) {
956 			currPC = 0UL;
957 			break;
958 		}
959 
960 		/*
961 		 * retrive contents of the frame pointer and advance to the next stack
962 		 * frame if it's valid
963 		 */
964 		prevFP = 0;
965 		kr = chudxnu_kern_read(&prevFP, (vm_offset_t)currFP, sizeof(currPC));
966 
967 #if __LP64__
968 		if (VALID_STACK_ADDRESS64(TRUE, prevFP, kernStackMin, kernStackMax)) {
969 #else
970 		if (VALID_STACK_ADDRESS(TRUE, prevFP, kernStackMin, kernStackMax)) {
971 #endif
972 			frames[ct++] = chudxnu_vm_unslide((uint64_t)currPC, 1);
973 			prevPC = currPC;
974 		}
975 		if (prevFP <= currFP) {
976 			break;
977 		} else {
978 			currFP = prevFP;
979 		}
980 	}
981 
982 	*start_idx = ct;
983 	return KERN_SUCCESS;
984 }
985 
986 
987 
988 static kern_return_t
989 do_backtrace32(
990 	task_t task,
991 	thread_t thread,
992 	x86_saved_state32_t *regs,
993 	uint64_t *frames,
994 	mach_msg_type_number_t *start_idx,
995 	mach_msg_type_number_t max_idx,
996 	boolean_t supervisor)
997 {
998 	uint32_t tmpWord = 0UL;
999 	uint64_t currPC = (uint64_t) regs->eip;
1000 	uint64_t currFP = (uint64_t) regs->ebp;
1001 	uint64_t prevPC = 0ULL;
1002 	uint64_t prevFP = 0ULL;
1003 	uint64_t kernStackMin = thread->kernel_stack;
1004 	uint64_t kernStackMax = kernStackMin + kernel_stack_size;
1005 	mach_msg_type_number_t ct = *start_idx;
1006 	kern_return_t kr = KERN_FAILURE;
1007 
1008 	if (ct >= max_idx) {
1009 		return KERN_RESOURCE_SHORTAGE;  // no frames traced
1010 	}
1011 	frames[ct++] = chudxnu_vm_unslide(currPC, supervisor);
1012 
1013 	// build a backtrace of this 32 bit state.
1014 	while (VALID_STACK_ADDRESS(supervisor, currFP, kernStackMin, kernStackMax)) {
1015 		cframe_t *fp = (cframe_t *) (uintptr_t) currFP;
1016 
1017 		if (!currFP) {
1018 			currPC = 0;
1019 			break;
1020 		}
1021 
1022 		if (ct >= max_idx) {
1023 			*start_idx = ct;
1024 			return KERN_RESOURCE_SHORTAGE;
1025 		}
1026 
1027 		/* read our caller */
1028 		if (supervisor) {
1029 			kr = chudxnu_kern_read(&tmpWord, (vm_offset_t) &fp->caller, sizeof(uint32_t));
1030 		} else {
1031 			kr = chudxnu_task_read(task, &tmpWord, (vm_offset_t) &fp->caller, sizeof(uint32_t));
1032 		}
1033 
1034 		if (kr != KERN_SUCCESS) {
1035 			currPC = 0ULL;
1036 			break;
1037 		}
1038 
1039 		currPC = (uint64_t) tmpWord;    // promote 32 bit address
1040 
1041 		/*
1042 		 * retrive contents of the frame pointer and advance to the next stack
1043 		 * frame if it's valid
1044 		 */
1045 		prevFP = 0;
1046 		if (supervisor) {
1047 			kr = chudxnu_kern_read(&tmpWord, (vm_offset_t)&fp->prev, sizeof(uint32_t));
1048 		} else {
1049 			kr = chudxnu_task_read(task, &tmpWord, (vm_offset_t)&fp->prev, sizeof(uint32_t));
1050 		}
1051 		prevFP = (uint64_t) tmpWord;    // promote 32 bit address
1052 
1053 		if (prevFP) {
1054 			frames[ct++] = chudxnu_vm_unslide(currPC, supervisor);
1055 			prevPC = currPC;
1056 		}
1057 		if (prevFP < currFP) {
1058 			break;
1059 		} else {
1060 			currFP = prevFP;
1061 		}
1062 	}
1063 
1064 	*start_idx = ct;
1065 	return KERN_SUCCESS;
1066 }
1067 
1068 static kern_return_t
1069 do_backtrace64(
1070 	task_t task,
1071 	thread_t thread,
1072 	x86_saved_state64_t *regs,
1073 	uint64_t *frames,
1074 	mach_msg_type_number_t *start_idx,
1075 	mach_msg_type_number_t max_idx,
1076 	boolean_t supervisor)
1077 {
1078 	uint64_t currPC = regs->isf.rip;
1079 	uint64_t currFP = regs->rbp;
1080 	uint64_t prevPC = 0ULL;
1081 	uint64_t prevFP = 0ULL;
1082 	uint64_t kernStackMin = (uint64_t)thread->kernel_stack;
1083 	uint64_t kernStackMax = (uint64_t)kernStackMin + kernel_stack_size;
1084 	mach_msg_type_number_t ct = *start_idx;
1085 	kern_return_t kr = KERN_FAILURE;
1086 
1087 	if (*start_idx >= max_idx) {
1088 		return KERN_RESOURCE_SHORTAGE;  // no frames traced
1089 	}
1090 	frames[ct++] = chudxnu_vm_unslide(currPC, supervisor);
1091 
1092 	// build a backtrace of this 32 bit state.
1093 	while (VALID_STACK_ADDRESS64(supervisor, currFP, kernStackMin, kernStackMax)) {
1094 		// this is the address where caller lives in the user thread
1095 		uint64_t caller = currFP + sizeof(uint64_t);
1096 
1097 		if (!currFP) {
1098 			currPC = 0;
1099 			break;
1100 		}
1101 
1102 		if (ct >= max_idx) {
1103 			*start_idx = ct;
1104 			return KERN_RESOURCE_SHORTAGE;
1105 		}
1106 
1107 		/* read our caller */
1108 		if (supervisor) {
1109 			kr = chudxnu_kern_read(&currPC, (vm_offset_t)caller, sizeof(uint64_t));
1110 		} else {
1111 			kr = chudxnu_task_read(task, &currPC, caller, sizeof(uint64_t));
1112 		}
1113 
1114 		if (kr != KERN_SUCCESS) {
1115 			currPC = 0ULL;
1116 			break;
1117 		}
1118 
1119 		/*
1120 		 * retrive contents of the frame pointer and advance to the next stack
1121 		 * frame if it's valid
1122 		 */
1123 		prevFP = 0;
1124 		if (supervisor) {
1125 			kr = chudxnu_kern_read(&prevFP, (vm_offset_t)currFP, sizeof(uint64_t));
1126 		} else {
1127 			kr = chudxnu_task_read(task, &prevFP, currFP, sizeof(uint64_t));
1128 		}
1129 
1130 		if (VALID_STACK_ADDRESS64(supervisor, prevFP, kernStackMin, kernStackMax)) {
1131 			frames[ct++] = chudxnu_vm_unslide(currPC, supervisor);
1132 			prevPC = currPC;
1133 		}
1134 		if (prevFP < currFP) {
1135 			break;
1136 		} else {
1137 			currFP = prevFP;
1138 		}
1139 	}
1140 
1141 	*start_idx = ct;
1142 	return KERN_SUCCESS;
1143 }
1144 
1145 static kern_return_t
1146 chudxnu_thread_get_callstack64_internal(
1147 	thread_t                thread,
1148 	uint64_t                *callstack,
1149 	mach_msg_type_number_t  *count,
1150 	boolean_t               user_only,
1151 	boolean_t               kern_only)
1152 {
1153 	kern_return_t kr = KERN_FAILURE;
1154 	task_t task = get_threadtask(thread);
1155 	uint64_t currPC = 0ULL;
1156 	boolean_t supervisor = FALSE;
1157 	mach_msg_type_number_t bufferIndex = 0;
1158 	mach_msg_type_number_t bufferMaxIndex = *count;
1159 	x86_saved_state_t *tagged_regs = NULL;          // kernel register state
1160 	x86_saved_state64_t *regs64 = NULL;
1161 	x86_saved_state32_t *regs32 = NULL;
1162 	x86_saved_state32_t *u_regs32 = NULL;
1163 	x86_saved_state64_t *u_regs64 = NULL;
1164 	struct x86_kernel_state *kregs = NULL;
1165 
1166 	if (ml_at_interrupt_context()) {
1167 		if (user_only) {
1168 			/* can't backtrace user state on interrupt stack. */
1169 			return KERN_FAILURE;
1170 		}
1171 
1172 		/* backtracing at interrupt context? */
1173 		if (thread == current_thread() && current_cpu_datap()->cpu_int_state) {
1174 			/*
1175 			 * Locate the registers for the interrupted thread, assuming it is
1176 			 * current_thread().
1177 			 */
1178 			tagged_regs = current_cpu_datap()->cpu_int_state;
1179 
1180 			if (is_saved_state64(tagged_regs)) {
1181 				/* 64 bit registers */
1182 				regs64 = saved_state64(tagged_regs);
1183 				supervisor = ((regs64->isf.cs & SEL_PL) != SEL_PL_U);
1184 			} else {
1185 				/* 32 bit registers */
1186 				regs32 = saved_state32(tagged_regs);
1187 				supervisor = ((regs32->cs & SEL_PL) != SEL_PL_U);
1188 			}
1189 		}
1190 	}
1191 
1192 	if (!ml_at_interrupt_context() && kernel_task == task) {
1193 		if (!thread->kernel_stack) {
1194 			return KERN_FAILURE;
1195 		}
1196 
1197 		// Kernel thread not at interrupt context
1198 		kregs = (struct x86_kernel_state *)NULL;
1199 
1200 		// nofault read of the thread->kernel_stack pointer
1201 		if (KERN_SUCCESS != chudxnu_kern_read(&kregs, (vm_offset_t)&(thread->kernel_stack), sizeof(void *))) {
1202 			return KERN_FAILURE;
1203 		}
1204 
1205 		// Adjust to find the saved kernel state
1206 		kregs = STACK_IKS((vm_offset_t)(uintptr_t)kregs);
1207 
1208 		supervisor = TRUE;
1209 	} else if (!tagged_regs) {
1210 		/*
1211 		 * not at interrupt context, or tracing a different thread than
1212 		 * current_thread() at interrupt context
1213 		 */
1214 		tagged_regs = USER_STATE(thread);
1215 		if (is_saved_state64(tagged_regs)) {
1216 			/* 64 bit registers */
1217 			regs64 = saved_state64(tagged_regs);
1218 			supervisor = ((regs64->isf.cs & SEL_PL) != SEL_PL_U);
1219 		} else {
1220 			/* 32 bit registers */
1221 			regs32 = saved_state32(tagged_regs);
1222 			supervisor = ((regs32->cs & SEL_PL) != SEL_PL_U);
1223 		}
1224 	}
1225 
1226 	*count = 0;
1227 
1228 	if (supervisor) {
1229 		// the caller only wants a user callstack.
1230 		if (user_only) {
1231 			// bail - we've only got kernel state
1232 			return KERN_FAILURE;
1233 		}
1234 	} else {
1235 		// regs32(64) is not in supervisor mode.
1236 		u_regs32 = regs32;
1237 		u_regs64 = regs64;
1238 		regs32 = NULL;
1239 		regs64 = NULL;
1240 	}
1241 
1242 	if (user_only) {
1243 		/* we only want to backtrace the user mode */
1244 		if (!(u_regs32 || u_regs64)) {
1245 			/* no user state to look at */
1246 			return KERN_FAILURE;
1247 		}
1248 	}
1249 
1250 	/*
1251 	 * Order of preference for top of stack:
1252 	 * 64 bit kernel state (not likely)
1253 	 * 32 bit kernel state
1254 	 * 64 bit user land state
1255 	 * 32 bit user land state
1256 	 */
1257 
1258 	if (kregs) {
1259 		/*
1260 		 * nofault read of the registers from the kernel stack (as they can
1261 		 * disappear on the fly).
1262 		 */
1263 
1264 		if (KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(kregs->k_rip), sizeof(uint64_t))) {
1265 			return KERN_FAILURE;
1266 		}
1267 	} else if (regs64) {
1268 		currPC = regs64->isf.rip;
1269 	} else if (regs32) {
1270 		currPC = (uint64_t) regs32->eip;
1271 	} else if (u_regs64) {
1272 		currPC = u_regs64->isf.rip;
1273 	} else if (u_regs32) {
1274 		currPC = (uint64_t) u_regs32->eip;
1275 	}
1276 
1277 	if (!currPC) {
1278 		/* no top of the stack, bail out */
1279 		return KERN_FAILURE;
1280 	}
1281 
1282 	bufferIndex = 0;
1283 
1284 	if (bufferMaxIndex < 1) {
1285 		*count = 0;
1286 		return KERN_RESOURCE_SHORTAGE;
1287 	}
1288 
1289 	/* backtrace kernel */
1290 	if (kregs) {
1291 		addr64_t address = 0ULL;
1292 		size_t size = 0UL;
1293 
1294 		// do the backtrace
1295 		kr = do_kernel_backtrace(thread, kregs, callstack, &bufferIndex, bufferMaxIndex);
1296 
1297 		// and do a nofault read of (r|e)sp
1298 		uint64_t rsp = 0ULL;
1299 		size = sizeof(uint64_t);
1300 
1301 		if (KERN_SUCCESS != chudxnu_kern_read(&address, (vm_offset_t)&(kregs->k_rsp), size)) {
1302 			address = 0ULL;
1303 		}
1304 
1305 		if (address && KERN_SUCCESS == chudxnu_kern_read(&rsp, (vm_offset_t)address, size) && bufferIndex < bufferMaxIndex) {
1306 			callstack[bufferIndex++] = (uint64_t)rsp;
1307 		}
1308 	} else if (regs64) {
1309 		uint64_t rsp = 0ULL;
1310 
1311 		// backtrace the 64bit side.
1312 		kr = do_backtrace64(task, thread, regs64, callstack, &bufferIndex,
1313 		    bufferMaxIndex - 1, TRUE);
1314 
1315 		if (KERN_SUCCESS == chudxnu_kern_read(&rsp, (vm_offset_t) regs64->isf.rsp, sizeof(uint64_t)) &&
1316 		    bufferIndex < bufferMaxIndex) {
1317 			callstack[bufferIndex++] = rsp;
1318 		}
1319 	} else if (regs32) {
1320 		uint32_t esp = 0UL;
1321 
1322 		// backtrace the 32bit side.
1323 		kr = do_backtrace32(task, thread, regs32, callstack, &bufferIndex,
1324 		    bufferMaxIndex - 1, TRUE);
1325 
1326 		if (KERN_SUCCESS == chudxnu_kern_read(&esp, (vm_offset_t) regs32->uesp, sizeof(uint32_t)) &&
1327 		    bufferIndex < bufferMaxIndex) {
1328 			callstack[bufferIndex++] = (uint64_t) esp;
1329 		}
1330 	} else if (u_regs64 && !kern_only) {
1331 		/* backtrace user land */
1332 		uint64_t rsp = 0ULL;
1333 
1334 		kr = do_backtrace64(task, thread, u_regs64, callstack, &bufferIndex,
1335 		    bufferMaxIndex - 1, FALSE);
1336 
1337 		if (KERN_SUCCESS == chudxnu_task_read(task, &rsp, (addr64_t) u_regs64->isf.rsp, sizeof(uint64_t)) &&
1338 		    bufferIndex < bufferMaxIndex) {
1339 			callstack[bufferIndex++] = rsp;
1340 		}
1341 	} else if (u_regs32 && !kern_only) {
1342 		uint32_t esp = 0UL;
1343 
1344 		kr = do_backtrace32(task, thread, u_regs32, callstack, &bufferIndex,
1345 		    bufferMaxIndex - 1, FALSE);
1346 
1347 		if (KERN_SUCCESS == chudxnu_task_read(task, &esp, (addr64_t) u_regs32->uesp, sizeof(uint32_t)) &&
1348 		    bufferIndex < bufferMaxIndex) {
1349 			callstack[bufferIndex++] = (uint64_t) esp;
1350 		}
1351 	}
1352 
1353 	*count = bufferIndex;
1354 	return kr;
1355 }
1356 
1357 __private_extern__
1358 kern_return_t
1359 chudxnu_thread_get_callstack64_kperf(
1360 	thread_t                thread,
1361 	uint64_t                *callstack,
1362 	mach_msg_type_number_t  *count,
1363 	boolean_t               is_user)
1364 {
1365 	return chudxnu_thread_get_callstack64_internal(thread, callstack, count, is_user, !is_user);
1366 }
1367 #else /* !__arm64__ && !__x86_64__ */
1368 #error kperf: unsupported architecture
1369 #endif /* !__arm64__ && !__x86_64__ */
1370