xref: /xnu-8019.80.24/osfmk/kern/kern_stackshot.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/vm_param.h>
31 #include <mach/mach_vm.h>
32 #include <mach/clock_types.h>
33 #include <sys/errno.h>
34 #include <sys/stackshot.h>
35 #ifdef IMPORTANCE_INHERITANCE
36 #include <ipc/ipc_importance.h>
37 #endif
38 #include <sys/appleapiopts.h>
39 #include <kern/debug.h>
40 #include <kern/block_hint.h>
41 #include <uuid/uuid.h>
42 
43 #include <kdp/kdp_dyld.h>
44 #include <kdp/kdp_en_debugger.h>
45 
46 #include <libsa/types.h>
47 #include <libkern/version.h>
48 #include <libkern/section_keywords.h>
49 
50 #include <string.h> /* bcopy */
51 
52 #include <kern/backtrace.h>
53 #include <kern/coalition.h>
54 #include <kern/processor.h>
55 #include <kern/host_statistics.h>
56 #include <kern/counter.h>
57 #include <kern/thread.h>
58 #include <kern/thread_group.h>
59 #include <kern/task.h>
60 #include <kern/telemetry.h>
61 #include <kern/clock.h>
62 #include <kern/policy_internal.h>
63 #include <kern/socd_client.h>
64 #include <vm/vm_map.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_pageout.h>
67 #include <vm/vm_fault.h>
68 #include <vm/vm_shared_region.h>
69 #include <vm/vm_compressor.h>
70 #include <libkern/OSKextLibPrivate.h>
71 #include <os/log.h>
72 
73 #if defined(__x86_64__)
74 #include <i386/mp.h>
75 #include <i386/cpu_threads.h>
76 #endif
77 
78 #include <pexpert/pexpert.h>
79 
80 #if MONOTONIC
81 #include <kern/monotonic.h>
82 #endif /* MONOTONIC */
83 
84 #include <san/kasan.h>
85 
86 #if DEBUG || DEVELOPMENT
87 # define STACKSHOT_COLLECTS_LATENCY_INFO 1
88 #else
89 # define STACKSHOT_COLLECTS_LATENCY_INFO 0
90 #endif /* DEBUG || DEVELOPMENT */
91 
92 extern unsigned int not_in_kdp;
93 
94 /* indicate to the compiler that some accesses are unaligned */
95 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
96 
97 extern addr64_t kdp_vtophys(pmap_t pmap, addr64_t va);
98 
99 int kdp_snapshot                            = 0;
100 static kern_return_t stack_snapshot_ret     = 0;
101 static uint32_t stack_snapshot_bytes_traced = 0;
102 static uint32_t stack_snapshot_bytes_uncompressed  = 0;
103 
104 #if STACKSHOT_COLLECTS_LATENCY_INFO
105 static bool collect_latency_info = true;
106 #endif
107 static kcdata_descriptor_t stackshot_kcdata_p = NULL;
108 static void *stack_snapshot_buf;
109 static uint32_t stack_snapshot_bufsize;
110 int stack_snapshot_pid;
111 static uint64_t stack_snapshot_flags;
112 static uint64_t stack_snapshot_delta_since_timestamp;
113 static uint32_t stack_snapshot_pagetable_mask;
114 static boolean_t panic_stackshot;
115 
116 static boolean_t stack_enable_faulting = FALSE;
117 static struct stackshot_fault_stats fault_stats;
118 
119 /*
120  * Experimentally, our current estimates are 20% short 96% of the time; 40 gets
121  * us into 99.9%+ territory.  In the longer run, we need to make stackshot
122  * estimates use a better approach (rdar://78880038); this is intended to be a
123  * short-term fix.
124  */
125 uint32_t stackshot_estimate_adj = 40; /* experiment factor: 0-100, adjust our estimate up by this amount */
126 
127 static uint32_t stackshot_initial_estimate;
128 static uint32_t stackshot_initial_estimate_adj;
129 static uint64_t stackshot_duration_prior_abs;   /* prior attempts, abs */
130 static unaligned_u64 * stackshot_duration_outer;
131 static uint64_t stackshot_microsecs;
132 
133 void * kernel_stackshot_buf   = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
134 int kernel_stackshot_buf_size = 0;
135 
136 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
137 
138 __private_extern__ void stackshot_init( void );
139 static boolean_t memory_iszero(void *addr, size_t size);
140 uint32_t                get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj);
141 kern_return_t           kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config,
142     size_t stackshot_config_size, boolean_t stackshot_from_user);
143 kern_return_t           do_stackshot(void *);
144 void                    kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags, kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask);
145 boolean_t               stackshot_thread_is_idle_worker_unsafe(thread_t thread);
146 static int              kdp_stackshot_kcdata_format(int pid, uint64_t trace_flags, uint32_t *pBytesTraced, uint32_t *pBytesUncompressed);
147 uint32_t                kdp_stack_snapshot_bytes_traced(void);
148 uint32_t                kdp_stack_snapshot_bytes_uncompressed(void);
149 static void             kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
150 static boolean_t        kdp_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
151 static int              kdp_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
152 static boolean_t        kdp_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
153 static uint64_t         proc_was_throttled_from_task(task_t task);
154 static void             stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_t * waitinfo);
155 static int              stackshot_thread_has_valid_waitinfo(thread_t thread);
156 static void             stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_t *tsinfo);
157 static int              stackshot_thread_has_valid_turnstileinfo(thread_t thread);
158 
159 #if CONFIG_COALITIONS
160 static void             stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
161 static void             stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
162 #endif /* CONFIG_COALITIONS */
163 
164 #if CONFIG_THREAD_GROUPS
165 static void             stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
166 static void             stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
167 #endif /* CONFIG_THREAD_GROUPS */
168 
169 extern uint32_t         workqueue_get_pwq_state_kdp(void *proc);
170 
171 struct proc;
172 extern int              proc_pid(struct proc *p);
173 extern uint64_t         proc_uniqueid(void *p);
174 extern uint64_t         proc_was_throttled(void *p);
175 extern uint64_t         proc_did_throttle(void *p);
176 extern int              proc_exiting(void *p);
177 extern int              proc_in_teardown(void *p);
178 static uint64_t         proc_did_throttle_from_task(task_t task);
179 extern void             proc_name_kdp(struct proc *p, char * buf, int size);
180 extern int              proc_threadname_kdp(void * uth, char * buf, size_t size);
181 extern void             proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
182 extern void             proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
183 extern boolean_t        proc_binary_uuid_kdp(task_t task, uuid_t uuid);
184 extern int              memorystatus_get_pressure_status_kdp(void);
185 extern void             memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
186 
187 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
188 extern void bcopy_phys(addr64_t, addr64_t, vm_size_t);
189 
190 #if CONFIG_TELEMETRY
191 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
192 #endif /* CONFIG_TELEMETRY */
193 
194 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
195 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
196 
197 static size_t _stackshot_get_page_size(vm_map_t, size_t *page_mask_out);
198 
199 /*
200  * Validates that the given address for a word is both a valid page and has
201  * default caching attributes for the current map.
202  */
203 bool machine_trace_thread_validate_kva(vm_offset_t);
204 /*
205  * Validates a region that stackshot will potentially inspect.
206  */
207 static bool _stackshot_validate_kva(vm_offset_t, size_t);
208 /*
209  * Must be called whenever stackshot is re-driven.
210  */
211 static void _stackshot_validation_reset(void);
212 
213 #define KDP_FAULT_RESULT_PAGED_OUT   0x1 /* some data was unable to be retrieved */
214 #define KDP_FAULT_RESULT_TRIED_FAULT 0x2 /* tried to fault in data */
215 #define KDP_FAULT_RESULT_FAULTED_IN  0x4 /* successfully faulted in data */
216 
217 /*
218  * Looks up the physical translation for the given address in the target map, attempting
219  * to fault data in if requested and it is not resident. Populates thread_trace_flags if requested
220  * as well.
221  */
222 vm_offset_t kdp_find_phys(vm_map_t map, vm_offset_t target_addr, boolean_t try_fault, uint32_t *kdp_fault_results);
223 
224 static size_t _stackshot_strlcpy(char *dst, const char *src, size_t maxlen);
225 void stackshot_memcpy(void *dst, const void *src, size_t len);
226 
227 #define MAX_FRAMES 1000
228 #define MAX_LOADINFOS 500
229 #define TASK_IMP_WALK_LIMIT 20
230 
231 typedef struct thread_snapshot *thread_snapshot_t;
232 typedef struct task_snapshot *task_snapshot_t;
233 
234 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
235 extern kdp_send_t    kdp_en_send_pkt;
236 #endif
237 
238 /*
239  * Stackshot locking and other defines.
240  */
241 static LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
242 static LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
243 
244 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
245 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
246 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
247 
248 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
249 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
250 
251 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
252 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
253 
254 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
255 
256 /*
257  * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
258  * for non-panic stackshots where faulting is requested.
259  */
260 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
261 
262 #define STACKSHOT_SUPP_SIZE (16 * 1024) /* Minimum stackshot size */
263 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
264 
265 #ifndef ROUNDUP
266 #define ROUNDUP(x, y)            ((((x)+(y)-1)/(y))*(y))
267 #endif
268 
269 #define STACKSHOT_QUEUE_LABEL_MAXSIZE  64
270 
271 /*
272  * Initialize the mutex governing access to the stack snapshot subsystem
273  * and other stackshot related bits.
274  */
275 __private_extern__ void
stackshot_init(void)276 stackshot_init( void )
277 {
278 	mach_timebase_info_data_t timebase;
279 
280 	clock_timebase_info(&timebase);
281 	fault_stats.sfs_system_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
282 
283 	max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
284 
285 	PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
286 }
287 
288 /*
289  * Method for grabbing timer values safely, in the sense that no infinite loop will occur
290  * Certain flavors of the timer_grab function, which would seem to be the thing to use,
291  * can loop infinitely if called while the timer is in the process of being updated.
292  * Unfortunately, it is (rarely) possible to get inconsistent top and bottom halves of
293  * the timer using this method. This seems insoluble, since stackshot runs in a context
294  * where the timer might be half-updated, and has no way of yielding control just long
295  * enough to finish the update.
296  */
297 
298 static uint64_t
safe_grab_timer_value(struct timer * t)299 safe_grab_timer_value(struct timer *t)
300 {
301 #if   defined(__LP64__)
302 	return t->all_bits;
303 #else
304 	uint64_t time = t->high_bits; /* endian independent grab */
305 	time = (time << 32) | t->low_bits;
306 	return time;
307 #endif
308 }
309 
310 /*
311  * Called with interrupts disabled after stackshot context has been
312  * initialized. Updates stack_snapshot_ret.
313  */
314 static kern_return_t
stackshot_trap()315 stackshot_trap()
316 {
317 	kern_return_t   rv;
318 
319 #if defined(__x86_64__)
320 	/*
321 	 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
322 	 * operation, it's essential to apply mutual exclusion to the other when one
323 	 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
324 	 * capture CPUs.
325 	 *
326 	 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
327 	 * allowed, so we check to ensure there there is no rendezvous in progress before
328 	 * trying to grab the lock (if there is, a deadlock will occur when we try to
329 	 * grab the lock).  This is accomplished by setting cpu_rendezvous_in_progress to
330 	 * TRUE in the mp rendezvous action function.  If stackshot_trap() is called by
331 	 * a subordinate of the call chain within the mp rendezvous action, this flag will
332 	 * be set and can be used to detect the inevitable deadlock that would occur
333 	 * if this thread tried to grab the rendezvous lock.
334 	 */
335 
336 	if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
337 		panic("Calling stackshot from a rendezvous is not allowed!");
338 	}
339 
340 	mp_rendezvous_lock();
341 #endif
342 
343 	rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0);
344 
345 #if defined(__x86_64__)
346 	mp_rendezvous_unlock();
347 #endif
348 	return rv;
349 }
350 
351 
352 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)353 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
354 {
355 	kern_return_t error = KERN_SUCCESS;
356 	boolean_t istate;
357 
358 #if DEVELOPMENT || DEBUG
359 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
360 		error = KERN_NOT_SUPPORTED;
361 		goto out;
362 	}
363 #endif
364 	if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
365 		return KERN_INVALID_ARGUMENT;
366 	}
367 
368 	/* cap in individual stackshot to max_tracebuf_size */
369 	if (size > max_tracebuf_size) {
370 		size = max_tracebuf_size;
371 	}
372 
373 	/* Serialize tracing */
374 	if (flags & STACKSHOT_TRYLOCK) {
375 		if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
376 			return KERN_LOCK_OWNED;
377 		}
378 	} else {
379 		STACKSHOT_SUBSYS_LOCK();
380 	}
381 
382 	struct kcdata_descriptor kcdata;
383 	uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ?
384 	    KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT : KCDATA_BUFFER_BEGIN_STACKSHOT;
385 
386 	error = kcdata_memory_static_init(&kcdata, (mach_vm_address_t)buf, hdr_tag, size,
387 	    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
388 	if (error) {
389 		goto out;
390 	}
391 
392 	stackshot_initial_estimate = 0;
393 	stackshot_duration_prior_abs = 0;
394 	stackshot_duration_outer = NULL;
395 
396 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
397 	    flags, size, pid, delta_since_timestamp);
398 
399 	istate = ml_set_interrupts_enabled(FALSE);
400 	uint64_t time_start      = mach_absolute_time();
401 
402 	/* Emit a SOCD tracepoint that we are initiating a stackshot */
403 	SOCD_TRACE_XNU_START(STACKSHOT);
404 
405 	/* Preload trace parameters*/
406 	kdp_snapshot_preflight(pid, buf, size, flags, &kcdata,
407 	    delta_since_timestamp, pagetable_mask);
408 
409 	/*
410 	 * Trap to the debugger to obtain a coherent stack snapshot; this populates
411 	 * the trace buffer
412 	 */
413 	error = stackshot_trap();
414 
415 	uint64_t time_end               = mach_absolute_time();
416 
417 	/* Emit a SOCD tracepoint that we have completed the stackshot */
418 	SOCD_TRACE_XNU_END(STACKSHOT);
419 
420 	ml_set_interrupts_enabled(istate);
421 
422 	if (stackshot_duration_outer) {
423 		*stackshot_duration_outer = time_end - time_start;
424 	}
425 	*bytes_traced = kdp_stack_snapshot_bytes_traced();
426 
427 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
428 	    error, (time_end - time_start), size, *bytes_traced);
429 out:
430 	stackshot_kcdata_p = NULL;
431 	STACKSHOT_SUBSYS_UNLOCK();
432 	return error;
433 }
434 
435 #if CONFIG_TELEMETRY
436 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)437 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
438 {
439 	int error = KERN_SUCCESS;
440 	uint32_t bytes_traced = 0;
441 
442 	*retval = -1;
443 
444 	/*
445 	 * Control related operations
446 	 */
447 	if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
448 		telemetry_global_ctl(1);
449 		*retval = 0;
450 		goto exit;
451 	} else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
452 		telemetry_global_ctl(0);
453 		*retval = 0;
454 		goto exit;
455 	}
456 
457 	/*
458 	 * Data related operations
459 	 */
460 	*retval = -1;
461 
462 	if ((((void*)tracebuf) == NULL) || (tracebuf_size == 0)) {
463 		error = KERN_INVALID_ARGUMENT;
464 		goto exit;
465 	}
466 
467 	STACKSHOT_SUBSYS_LOCK();
468 
469 	if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
470 		if (tracebuf_size > max_tracebuf_size) {
471 			error = KERN_INVALID_ARGUMENT;
472 			goto unlock_exit;
473 		}
474 
475 		bytes_traced = tracebuf_size;
476 		error = telemetry_gather(tracebuf, &bytes_traced,
477 		    (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? true : false);
478 		*retval = (int)bytes_traced;
479 		goto unlock_exit;
480 	}
481 
482 unlock_exit:
483 	STACKSHOT_SUBSYS_UNLOCK();
484 exit:
485 	return error;
486 }
487 #endif /* CONFIG_TELEMETRY */
488 
489 /*
490  * Return the estimated size of a stackshot based on the
491  * number of currently running threads and tasks.
492  *
493  * adj is an adjustment in units of percentage
494  *
495  * This function is mostly unhinged from reality; struct thread_snapshot and
496  * struct task_stackshot are legacy, much larger versions of the structures we
497  * actually use, and there's no accounting for how we actually generate
498  * task & thread information.  rdar://78880038 intends to replace this all.
499  */
500 uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj)501 get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj)
502 {
503 	vm_size_t thread_total;
504 	vm_size_t task_total;
505 	uint64_t size;
506 	uint32_t estimated_size;
507 	size_t est_thread_size = sizeof(struct thread_snapshot);
508 	size_t est_task_size = sizeof(struct task_snapshot) + TASK_UUID_AVG_SIZE;
509 
510 	adj = MIN(adj, 100u);   /* no more than double our estimate */
511 
512 #if STACKSHOT_COLLECTS_LATENCY_INFO
513 	if (collect_latency_info) {
514 		est_thread_size += sizeof(struct stackshot_latency_thread);
515 		est_task_size += sizeof(struct stackshot_latency_task);
516 	}
517 #endif
518 
519 	thread_total = (threads_count * est_thread_size);
520 	task_total = (tasks_count  * est_task_size);
521 
522 	size = thread_total + task_total + STACKSHOT_SUPP_SIZE;                 /* estimate */
523 	size += (size * adj) / 100;                                                                     /* add adj */
524 	size = MAX(size, prev_size_hint);                                                               /* allow hint to increase */
525 	size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK));             /* avoid overflow */
526 	estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
527 
528 	return estimated_size;
529 }
530 
531 /*
532  * stackshot_remap_buffer:	Utility function to remap bytes_traced bytes starting at stackshotbuf
533  *				into the current task's user space and subsequently copy out the address
534  *				at which the buffer has been mapped in user space to out_buffer_addr.
535  *
536  * Inputs:			stackshotbuf - pointer to the original buffer in the kernel's address space
537  *				bytes_traced - length of the buffer to remap starting from stackshotbuf
538  *				out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
539  *				out_size_addr - pointer to be filled in with the size of the buffer
540  *
541  * Outputs:			ENOSPC if there is not enough free space in the task's address space to remap the buffer
542  *				EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
543  *				an error from copyout
544  */
545 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)546 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
547 {
548 	int                     error = 0;
549 	mach_vm_offset_t        stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
550 	vm_prot_t               cur_prot, max_prot;
551 
552 	error = mach_vm_remap_kernel(get_task_map(current_task()), &stackshotbuf_user_addr, bytes_traced, 0,
553 	    VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE, &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
554 	/*
555 	 * If the call to mach_vm_remap fails, we return the appropriate converted error
556 	 */
557 	if (error == KERN_SUCCESS) {
558 		/*
559 		 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
560 		 * we just made in the task's user space.
561 		 */
562 		error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
563 		if (error != KERN_SUCCESS) {
564 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
565 			return error;
566 		}
567 		error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
568 		if (error != KERN_SUCCESS) {
569 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
570 			return error;
571 		}
572 	}
573 	return error;
574 }
575 
576 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)577 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
578 {
579 	int error = 0;
580 	boolean_t prev_interrupt_state;
581 	uint32_t bytes_traced = 0;
582 	uint32_t stackshot_estimate = 0;
583 	uint32_t stackshotbuf_size = 0;
584 	void * stackshotbuf = NULL;
585 	kcdata_descriptor_t kcdata_p = NULL;
586 
587 	void * buf_to_free = NULL;
588 	int size_to_free = 0;
589 	bool is_traced = false;    /* has FUNC_START tracepoint fired? */
590 	uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
591 
592 	/* Parsed arguments */
593 	uint64_t                out_buffer_addr;
594 	uint64_t                out_size_addr;
595 	int                     pid = -1;
596 	uint64_t                flags;
597 	uint64_t                since_timestamp;
598 	uint32_t                size_hint = 0;
599 	uint32_t                pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
600 
601 	if (stackshot_config == NULL) {
602 		return KERN_INVALID_ARGUMENT;
603 	}
604 #if DEVELOPMENT || DEBUG
605 	/* TBD: ask stackshot clients to avoid issuing stackshots in this
606 	 * configuration in lieu of the kernel feature override.
607 	 */
608 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
609 		return KERN_NOT_SUPPORTED;
610 	}
611 #endif
612 
613 	switch (stackshot_config_version) {
614 	case STACKSHOT_CONFIG_TYPE:
615 		if (stackshot_config_size != sizeof(stackshot_config_t)) {
616 			return KERN_INVALID_ARGUMENT;
617 		}
618 		stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
619 		out_buffer_addr = config->sc_out_buffer_addr;
620 		out_size_addr = config->sc_out_size_addr;
621 		pid = config->sc_pid;
622 		flags = config->sc_flags;
623 		since_timestamp = config->sc_delta_timestamp;
624 		if (config->sc_size <= max_tracebuf_size) {
625 			size_hint = config->sc_size;
626 		}
627 		/*
628 		 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
629 		 * dump every level if the pagetable_mask is not set
630 		 */
631 		if (flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
632 			pagetable_mask = config->sc_pagetable_mask;
633 		}
634 		break;
635 	default:
636 		return KERN_NOT_SUPPORTED;
637 	}
638 
639 	/*
640 	 * Currently saving a kernel buffer and trylock are only supported from the
641 	 * internal/KEXT API.
642 	 */
643 	if (stackshot_from_user) {
644 		if (flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
645 			return KERN_NO_ACCESS;
646 		}
647 #if !DEVELOPMENT && !DEBUG
648 		if (flags & (STACKSHOT_DO_COMPRESS)) {
649 			return KERN_NO_ACCESS;
650 		}
651 #endif
652 	} else {
653 		if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
654 			return KERN_NOT_SUPPORTED;
655 		}
656 	}
657 
658 	if (!((flags & STACKSHOT_KCDATA_FORMAT) || (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
659 		return KERN_NOT_SUPPORTED;
660 	}
661 
662 	/* Compresssed delta stackshots or page dumps are not yet supported */
663 	if (((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (flags & STACKSHOT_PAGE_TABLES))
664 	    && (flags & STACKSHOT_DO_COMPRESS)) {
665 		return KERN_NOT_SUPPORTED;
666 	}
667 
668 	/*
669 	 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
670 	 */
671 	if ((!out_buffer_addr || !out_size_addr) && !(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
672 		return KERN_INVALID_ARGUMENT;
673 	}
674 
675 	if (since_timestamp != 0 && ((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
676 		return KERN_INVALID_ARGUMENT;
677 	}
678 
679 #if MONOTONIC
680 	if (!mt_core_supported) {
681 		flags &= ~STACKSHOT_INSTRS_CYCLES;
682 	}
683 #else /* MONOTONIC */
684 	flags &= ~STACKSHOT_INSTRS_CYCLES;
685 #endif /* !MONOTONIC */
686 
687 	STACKSHOT_SUBSYS_LOCK();
688 
689 	if (flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
690 		/*
691 		 * Don't overwrite an existing stackshot
692 		 */
693 		if (kernel_stackshot_buf != NULL) {
694 			error = KERN_MEMORY_PRESENT;
695 			goto error_exit;
696 		}
697 	} else if (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
698 		if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
699 			error = KERN_NOT_IN_SET;
700 			goto error_exit;
701 		}
702 		error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
703 		    out_buffer_addr, out_size_addr);
704 		/*
705 		 * If we successfully remapped the buffer into the user's address space, we
706 		 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
707 		 * and then clear the kernel stackshot pointer and associated size.
708 		 */
709 		if (error == KERN_SUCCESS) {
710 			buf_to_free = kernel_stackshot_buf;
711 			size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
712 			kernel_stackshot_buf = NULL;
713 			kernel_stackshot_buf_size = 0;
714 		}
715 
716 		goto error_exit;
717 	}
718 
719 	if (flags & STACKSHOT_GET_BOOT_PROFILE) {
720 		void *bootprofile = NULL;
721 		uint32_t len = 0;
722 #if CONFIG_TELEMETRY
723 		bootprofile_get(&bootprofile, &len);
724 #endif
725 		if (!bootprofile || !len) {
726 			error = KERN_NOT_IN_SET;
727 			goto error_exit;
728 		}
729 		error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
730 		goto error_exit;
731 	}
732 
733 	stackshot_duration_prior_abs = 0;
734 	stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
735 	stackshotbuf_size = stackshot_estimate =
736 	    get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj);
737 	stackshot_initial_estimate = stackshot_estimate;
738 
739 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
740 	    flags, stackshotbuf_size, pid, since_timestamp);
741 	is_traced = true;
742 
743 	for (; stackshotbuf_size <= max_tracebuf_size; stackshotbuf_size <<= 1) {
744 		if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size, VM_KERN_MEMORY_DIAG, KMA_ZERO) != KERN_SUCCESS) {
745 			error = KERN_RESOURCE_SHORTAGE;
746 			goto error_exit;
747 		}
748 
749 
750 		uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
751 		    : (flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
752 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
753 		kcdata_p = kcdata_memory_alloc_init((mach_vm_address_t)stackshotbuf, hdr_tag, stackshotbuf_size,
754 		    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
755 
756 		stackshot_duration_outer = NULL;
757 
758 		/* if compression was requested, allocate the extra zlib scratch area */
759 		if (flags & STACKSHOT_DO_COMPRESS) {
760 			hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
761 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
762 			error = kcdata_init_compress(kcdata_p, hdr_tag, stackshot_memcpy, KCDCT_ZLIB);
763 			if (error != KERN_SUCCESS) {
764 				os_log(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
765 				    (int) error);
766 				goto error_exit;
767 			}
768 		}
769 
770 		/*
771 		 * Disable interrupts and save the current interrupt state.
772 		 */
773 		prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
774 		uint64_t time_start      = mach_absolute_time();
775 
776 		/* Emit a SOCD tracepoint that we are initiating a stackshot */
777 		SOCD_TRACE_XNU_START(STACKSHOT);
778 
779 		/*
780 		 * Load stackshot parameters.
781 		 */
782 		kdp_snapshot_preflight(pid, stackshotbuf, stackshotbuf_size, flags, kcdata_p, since_timestamp,
783 		    pagetable_mask);
784 
785 		error = stackshot_trap();
786 
787 		/* record the duration that interupts were disabled */
788 		uint64_t time_end = mach_absolute_time();
789 
790 		/* Emit a SOCD tracepoint that we have completed the stackshot */
791 		SOCD_TRACE_XNU_END(STACKSHOT);
792 		ml_set_interrupts_enabled(prev_interrupt_state);
793 
794 		if (stackshot_duration_outer) {
795 			*stackshot_duration_outer = time_end - time_start;
796 		}
797 		tot_interrupts_off_abs += time_end - time_start;
798 
799 		if (error != KERN_SUCCESS) {
800 			if (kcdata_p != NULL) {
801 				kcdata_memory_destroy(kcdata_p);
802 				kcdata_p = NULL;
803 				stackshot_kcdata_p = NULL;
804 			}
805 			kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
806 			stackshotbuf = NULL;
807 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
808 				/*
809 				 * If we didn't allocate a big enough buffer, deallocate and try again.
810 				 */
811 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
812 				    time_end - time_start, stackshot_estimate, stackshotbuf_size);
813 				stackshot_duration_prior_abs += (time_end - time_start);
814 				continue;
815 			} else {
816 				goto error_exit;
817 			}
818 		}
819 
820 		bytes_traced = kdp_stack_snapshot_bytes_traced();
821 		if (bytes_traced <= 0) {
822 			error = KERN_ABORTED;
823 			goto error_exit;
824 		}
825 
826 		assert(bytes_traced <= stackshotbuf_size);
827 		if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
828 			error = stackshot_remap_buffer(stackshotbuf, bytes_traced, out_buffer_addr, out_size_addr);
829 			goto error_exit;
830 		}
831 
832 		/*
833 		 * Save the stackshot in the kernel buffer.
834 		 */
835 		kernel_stackshot_buf = stackshotbuf;
836 		kernel_stackshot_buf_size =  bytes_traced;
837 		/*
838 		 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
839 		 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
840 		 * update size_to_free for kmem_free accordingly.
841 		 */
842 		size_to_free = stackshotbuf_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
843 
844 		assert(size_to_free >= 0);
845 
846 		if (size_to_free != 0) {
847 			buf_to_free = (void *)((uint64_t)stackshotbuf + stackshotbuf_size - size_to_free);
848 		}
849 
850 		stackshotbuf = NULL;
851 		stackshotbuf_size = 0;
852 		goto error_exit;
853 	}
854 
855 	if (stackshotbuf_size > max_tracebuf_size) {
856 		error = KERN_RESOURCE_SHORTAGE;
857 	}
858 
859 error_exit:
860 	if (is_traced) {
861 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
862 		    error, tot_interrupts_off_abs, stackshotbuf_size, bytes_traced);
863 	}
864 	if (kcdata_p != NULL) {
865 		kcdata_memory_destroy(kcdata_p);
866 		kcdata_p = NULL;
867 		stackshot_kcdata_p = NULL;
868 	}
869 
870 	if (stackshotbuf != NULL) {
871 		kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
872 	}
873 	if (buf_to_free != NULL) {
874 		kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
875 	}
876 	STACKSHOT_SUBSYS_UNLOCK();
877 	return error;
878 }
879 
880 /*
881  * Cache stack snapshot parameters in preparation for a trace.
882  */
883 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)884 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
885     kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
886 {
887 	uint64_t microsecs = 0, secs = 0;
888 	clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)&microsecs);
889 
890 	stackshot_microsecs = microsecs + (secs * USEC_PER_SEC);
891 	stack_snapshot_pid = pid;
892 	stack_snapshot_buf = tracebuf;
893 	stack_snapshot_bufsize = tracebuf_size;
894 	stack_snapshot_flags = flags;
895 	stack_snapshot_delta_since_timestamp = since_timestamp;
896 	stack_snapshot_pagetable_mask = pagetable_mask;
897 
898 	panic_stackshot = ((flags & STACKSHOT_FROM_PANIC) != 0);
899 
900 	assert(data_p != NULL);
901 	assert(stackshot_kcdata_p == NULL);
902 	stackshot_kcdata_p = data_p;
903 
904 	stack_snapshot_bytes_traced = 0;
905 	stack_snapshot_bytes_uncompressed = 0;
906 }
907 
908 void
panic_stackshot_reset_state()909 panic_stackshot_reset_state()
910 {
911 	stackshot_kcdata_p = NULL;
912 }
913 
914 boolean_t
stackshot_active()915 stackshot_active()
916 {
917 	return stackshot_kcdata_p != NULL;
918 }
919 
920 uint32_t
kdp_stack_snapshot_bytes_traced(void)921 kdp_stack_snapshot_bytes_traced(void)
922 {
923 	return stack_snapshot_bytes_traced;
924 }
925 
926 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)927 kdp_stack_snapshot_bytes_uncompressed(void)
928 {
929 	return stack_snapshot_bytes_uncompressed;
930 }
931 
932 static boolean_t
memory_iszero(void * addr,size_t size)933 memory_iszero(void *addr, size_t size)
934 {
935 	char *data = (char *)addr;
936 	for (size_t i = 0; i < size; i++) {
937 		if (data[i] != 0) {
938 			return FALSE;
939 		}
940 	}
941 	return TRUE;
942 }
943 
944 /*
945  * Keep a simple cache of the most recent validation done at a page granularity
946  * to avoid the expensive software KVA-to-phys translation in the VM.
947  */
948 
949 struct _stackshot_validation_state {
950 	vm_offset_t last_valid_page_kva;
951 	size_t last_valid_size;
952 } g_validation_state;
953 
954 static void
_stackshot_validation_reset(void)955 _stackshot_validation_reset(void)
956 {
957 	g_validation_state.last_valid_page_kva = -1;
958 	g_validation_state.last_valid_size = 0;
959 }
960 
961 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)962 _stackshot_validate_kva(vm_offset_t addr, size_t size)
963 {
964 	vm_offset_t page_addr = atop_kernel(addr);
965 	if (g_validation_state.last_valid_page_kva == page_addr &&
966 	    g_validation_state.last_valid_size <= size) {
967 		return true;
968 	}
969 
970 	if (ml_validate_nofault(addr, size)) {
971 		g_validation_state.last_valid_page_kva = page_addr;
972 		g_validation_state.last_valid_size = size;
973 		return true;
974 	}
975 	return false;
976 }
977 
978 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
979 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
980 /*
981  * Use of the kcd_exit_on_error(action) macro requires a local
982  * 'kern_return_t error' variable and 'error_exit' label.
983  */
984 #define kcd_exit_on_error(action)                      \
985 	do {                                               \
986 	        if (KERN_SUCCESS != (error = (action))) {      \
987 	                if (error == KERN_RESOURCE_SHORTAGE) {     \
988 	                        error = KERN_INSUFFICIENT_BUFFER_SIZE; \
989 	                }                                          \
990 	                goto error_exit;                           \
991 	        }                                              \
992 	} while (0); /* end kcd_exit_on_error */
993 
994 static uint64_t
kcdata_get_task_ss_flags(task_t task)995 kcdata_get_task_ss_flags(task_t task)
996 {
997 	uint64_t ss_flags = 0;
998 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
999 
1000 	if (task_64bit_addr) {
1001 		ss_flags |= kUser64_p;
1002 	}
1003 	if (!task->active || task_is_a_corpse(task) || proc_exiting(task->bsd_info)) {
1004 		ss_flags |= kTerminatedSnapshot;
1005 	}
1006 	if (task->pidsuspended) {
1007 		ss_flags |= kPidSuspended;
1008 	}
1009 	if (task->frozen) {
1010 		ss_flags |= kFrozen;
1011 	}
1012 	if (task->effective_policy.tep_darwinbg == 1) {
1013 		ss_flags |= kTaskDarwinBG;
1014 	}
1015 	if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
1016 		ss_flags |= kTaskIsForeground;
1017 	}
1018 	if (task->requested_policy.trp_boosted == 1) {
1019 		ss_flags |= kTaskIsBoosted;
1020 	}
1021 	if (task->effective_policy.tep_sup_active == 1) {
1022 		ss_flags |= kTaskIsSuppressed;
1023 	}
1024 #if CONFIG_MEMORYSTATUS
1025 
1026 	boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
1027 	memorystatus_proc_flags_unsafe(task->bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
1028 	if (dirty) {
1029 		ss_flags |= kTaskIsDirty;
1030 	}
1031 	if (dirty_tracked) {
1032 		ss_flags |= kTaskIsDirtyTracked;
1033 	}
1034 	if (allow_idle_exit) {
1035 		ss_flags |= kTaskAllowIdleExit;
1036 	}
1037 
1038 #endif
1039 	if (task->effective_policy.tep_tal_engaged) {
1040 		ss_flags |= kTaskTALEngaged;
1041 	}
1042 
1043 	ss_flags |= (0x7 & workqueue_get_pwq_state_kdp(task->bsd_info)) << 17;
1044 
1045 #if IMPORTANCE_INHERITANCE
1046 	if (task->task_imp_base) {
1047 		if (task->task_imp_base->iit_donor) {
1048 			ss_flags |= kTaskIsImpDonor;
1049 		}
1050 		if (task->task_imp_base->iit_live_donor) {
1051 			ss_flags |= kTaskIsLiveImpDonor;
1052 		}
1053 	}
1054 #endif
1055 	return ss_flags;
1056 }
1057 
1058 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)1059 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
1060 {
1061 	kern_return_t error = KERN_SUCCESS;
1062 
1063 	uint64_t shared_cache_slide = 0;
1064 	uint64_t shared_cache_first_mapping = 0;
1065 	uint32_t kdp_fault_results = 0;
1066 	struct dyld_shared_cache_loadinfo shared_cache_data = {0};
1067 
1068 
1069 	assert(task_snap_ss_flags != NULL);
1070 
1071 	/* Get basic info about the shared region pointer, regardless of any failures */
1072 	if (task->shared_region == NULL) {
1073 		*task_snap_ss_flags |= kTaskSharedRegionNone;
1074 	} else if (task->shared_region == primary_system_shared_region) {
1075 		*task_snap_ss_flags |= kTaskSharedRegionSystem;
1076 	} else {
1077 		*task_snap_ss_flags |= kTaskSharedRegionOther;
1078 	}
1079 
1080 	if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
1081 		struct vm_shared_region *sr = task->shared_region;
1082 		shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
1083 
1084 	} else {
1085 		*task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
1086 		goto error_exit;
1087 	}
1088 
1089 	/* We haven't copied in the shared region UUID yet as part of setup */
1090 	if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
1091 		goto error_exit;
1092 	}
1093 
1094 
1095 	/*
1096 	 * No refcounting here, but we are in debugger context, so that should be safe.
1097 	 */
1098 	shared_cache_slide = task->shared_region->sr_slide;
1099 
1100 	if (task->shared_region == primary_system_shared_region) {
1101 		/* skip adding shared cache info -- it's the same as the system level one */
1102 		goto error_exit;
1103 	}
1104 
1105 	/*
1106 	 * Historically, this data was in a dyld_uuid_info_64 structure, but the
1107 	 * naming of both the structure and fields for this use wasn't great.  The
1108 	 * dyld_shared_cache_loadinfo structure has better names, but the same
1109 	 * layout and content as the original.
1110 	 *
1111 	 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
1112 	 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
1113 	 * entries; here, it's the slid first mapping, and we leave it that way
1114 	 * for backwards compatibility.
1115 	 */
1116 	shared_cache_data.sharedCacheSlide = shared_cache_slide;
1117 	stackshot_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
1118 	shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
1119 	shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
1120 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
1121 
1122 error_exit:
1123 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
1124 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
1125 	}
1126 
1127 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
1128 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
1129 	}
1130 
1131 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
1132 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
1133 	}
1134 
1135 	return error;
1136 }
1137 
1138 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)1139 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
1140 {
1141 	boolean_t save_loadinfo_p         = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
1142 	boolean_t save_kextloadinfo_p     = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
1143 	boolean_t should_fault            = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
1144 
1145 	kern_return_t error        = KERN_SUCCESS;
1146 	mach_vm_address_t out_addr = 0;
1147 
1148 	uint32_t uuid_info_count         = 0;
1149 	mach_vm_address_t uuid_info_addr = 0;
1150 	uint64_t uuid_info_timestamp     = 0;
1151 	uint32_t kdp_fault_results       = 0;
1152 
1153 
1154 	assert(task_snap_ss_flags != NULL);
1155 
1156 	int task_pid     = pid_from_task(task);
1157 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
1158 
1159 	if (save_loadinfo_p && have_pmap && task->active && task_pid > 0) {
1160 		/* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
1161 		if (task_64bit_addr) {
1162 			struct user64_dyld_all_image_infos task_image_infos;
1163 			if (kdp_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1164 			    sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1165 				uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
1166 				uuid_info_addr = task_image_infos.uuidArray;
1167 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1168 					uuid_info_timestamp = task_image_infos.timestamp;
1169 				}
1170 
1171 			}
1172 		} else {
1173 			struct user32_dyld_all_image_infos task_image_infos;
1174 			if (kdp_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1175 			    sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1176 				uuid_info_count = task_image_infos.uuidArrayCount;
1177 				uuid_info_addr = task_image_infos.uuidArray;
1178 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1179 					uuid_info_timestamp = task_image_infos.timestamp;
1180 				}
1181 			}
1182 		}
1183 
1184 		/*
1185 		 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
1186 		 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
1187 		 * for this task.
1188 		 */
1189 		if (!uuid_info_addr) {
1190 			uuid_info_count = 0;
1191 		}
1192 
1193 
1194 	}
1195 
1196 	if (have_pmap && task_pid == 0) {
1197 		if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
1198 			uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
1199 		} else {
1200 			uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
1201 		}
1202 	}
1203 
1204 	if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
1205 		uint32_t copied_uuid_count = 0;
1206 		uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
1207 		uint32_t uuid_info_array_size = 0;
1208 
1209 		/* Open a compression window to avoid overflowing the stack */
1210 		kcdata_compression_window_open(kcd);
1211 
1212 		/* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
1213 		if (uuid_info_count > 0) {
1214 			uuid_info_array_size = uuid_info_count * uuid_info_size;
1215 
1216 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
1217 			    uuid_info_size, uuid_info_count, &out_addr));
1218 
1219 			if (!kdp_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
1220 				bzero((void *)out_addr, uuid_info_array_size);
1221 			} else {
1222 				copied_uuid_count = uuid_info_count;
1223 			}
1224 		}
1225 
1226 		uuid_t binary_uuid;
1227 		if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
1228 			/* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
1229 			if (uuid_info_array_size == 0) {
1230 				/* We just need to store one UUID */
1231 				uuid_info_array_size = uuid_info_size;
1232 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
1233 				    uuid_info_size, 1, &out_addr));
1234 			}
1235 
1236 			if (task_64bit_addr) {
1237 				struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
1238 				uint64_t image_load_address = task->mach_header_vm_address;
1239 
1240 				stackshot_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
1241 				stackshot_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
1242 			} else {
1243 				struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
1244 				uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
1245 
1246 				stackshot_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
1247 				stackshot_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
1248 			}
1249 		}
1250 
1251 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
1252 	} else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
1253 		uintptr_t image_load_address;
1254 
1255 		do {
1256 #if defined(__arm__) || defined(__arm64__)
1257 			if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
1258 				struct dyld_uuid_info_64 kc_uuid = {0};
1259 				kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1260 				stackshot_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
1261 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
1262 				break;
1263 			}
1264 #endif /* defined(__arm__) || defined(__arm64__) */
1265 
1266 			if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
1267 				/* Kernel UUID not found or inaccessible */
1268 				break;
1269 			}
1270 
1271 			uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
1272 			if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
1273 				uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
1274 #if  defined(__arm64__)
1275 				kc_format_t primary_kc_type = KCFormatUnknown;
1276 				if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
1277 					/* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
1278 					uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
1279 				}
1280 #endif
1281 			}
1282 
1283 			/*
1284 			 * The element count of the array can vary - avoid overflowing the
1285 			 * stack by opening a window.
1286 			 */
1287 			kcdata_compression_window_open(kcd);
1288 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
1289 			    sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
1290 			kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
1291 
1292 			image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
1293 #if defined(__arm64__)
1294 			if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
1295 				/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
1296 				extern vm_offset_t segTEXTEXECB;
1297 				image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
1298 			}
1299 #endif
1300 			uuid_info_array[0].imageLoadAddress = image_load_address;
1301 			stackshot_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
1302 
1303 			if (save_kextloadinfo_p &&
1304 			    _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
1305 			    _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
1306 			    gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
1307 				uint32_t kexti;
1308 				for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
1309 					image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
1310 #if defined(__arm64__)
1311 					if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
1312 						/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
1313 						image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
1314 					}
1315 #endif
1316 					uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
1317 					stackshot_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
1318 				}
1319 			}
1320 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
1321 		} while (0);
1322 	}
1323 
1324 error_exit:
1325 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
1326 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
1327 	}
1328 
1329 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
1330 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
1331 	}
1332 
1333 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
1334 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
1335 	}
1336 
1337 	return error;
1338 }
1339 
1340 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)1341 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
1342 {
1343 	kern_return_t error = KERN_SUCCESS;
1344 	mach_vm_address_t out_addr = 0;
1345 
1346 	/* I/O Statistics if any counters are non zero */
1347 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
1348 	if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
1349 		/* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
1350 		kcdata_compression_window_open(kcd);
1351 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
1352 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
1353 		_iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
1354 		_iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
1355 		_iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
1356 		_iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
1357 		_iostat->ss_paging_count = task->task_io_stats->paging.count;
1358 		_iostat->ss_paging_size = task->task_io_stats->paging.size;
1359 		_iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
1360 		_iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
1361 		_iostat->ss_metadata_count = task->task_io_stats->metadata.count;
1362 		_iostat->ss_metadata_size = task->task_io_stats->metadata.size;
1363 		_iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
1364 		_iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
1365 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
1366 			_iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
1367 			_iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
1368 		}
1369 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
1370 	}
1371 
1372 
1373 error_exit:
1374 	return error;
1375 }
1376 
1377 #if MONOTONIC
1378 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)1379 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
1380 {
1381 	struct instrs_cycles_snapshot instrs_cycles = {0};
1382 	uint64_t ics_instructions;
1383 	uint64_t ics_cycles;
1384 
1385 	mt_stackshot_task(task, &ics_instructions, &ics_cycles);
1386 	instrs_cycles.ics_instructions = ics_instructions;
1387 	instrs_cycles.ics_cycles = ics_cycles;
1388 
1389 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
1390 }
1391 #endif /* MONOTONIC */
1392 
1393 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)1394 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
1395 {
1396 	struct stackshot_cpu_architecture cpu_architecture = {0};
1397 	int32_t cputype;
1398 	int32_t cpusubtype;
1399 
1400 	proc_archinfo_kdp(task->bsd_info, &cputype, &cpusubtype);
1401 	cpu_architecture.cputype = cputype;
1402 	cpu_architecture.cpusubtype = cpusubtype;
1403 
1404 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
1405 }
1406 
1407 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)1408 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
1409 {
1410 	kern_return_t error                 = KERN_SUCCESS;
1411 	mach_vm_address_t out_addr          = 0;
1412 	struct transitioning_task_snapshot * cur_tsnap = NULL;
1413 
1414 	int task_pid           = pid_from_task(task);
1415 	/* Is returning -1 ok for terminating task ok ??? */
1416 	uint64_t task_uniqueid = get_task_uniqueid(task);
1417 
1418 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
1419 		/*
1420 		 * if this task is a transit task from another one, show the pid as
1421 		 * negative
1422 		 */
1423 		task_pid = 0 - task_pid;
1424 	}
1425 
1426 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
1427 	kcdata_compression_window_open(kcd);
1428 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
1429 	cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
1430 	bzero(cur_tsnap, sizeof(*cur_tsnap));
1431 
1432 	cur_tsnap->tts_unique_pid = task_uniqueid;
1433 	cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task);
1434 	cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
1435 	cur_tsnap->tts_transition_type = transition_type;
1436 	cur_tsnap->tts_pid = task_pid;
1437 
1438 	/* Add the BSD process identifiers */
1439 	if (task_pid != -1 && task->bsd_info != NULL) {
1440 		proc_name_kdp(task->bsd_info, cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
1441 	} else {
1442 		cur_tsnap->tts_p_comm[0] = '\0';
1443 	}
1444 
1445 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
1446 
1447 error_exit:
1448 	return error;
1449 }
1450 
1451 static kern_return_t
1452 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)1453 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
1454 #else
1455 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
1456 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1457 {
1458 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
1459 	boolean_t collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
1460 #if MONOTONIC
1461 	boolean_t collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
1462 #endif /* MONOTONIC */
1463 #if __arm__ || __arm64__
1464 	boolean_t collect_asid            = ((trace_flags & STACKSHOT_ASID) != 0);
1465 #endif
1466 	boolean_t collect_pagetables       = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
1467 
1468 
1469 	kern_return_t error                 = KERN_SUCCESS;
1470 	mach_vm_address_t out_addr          = 0;
1471 	struct task_snapshot_v2 * cur_tsnap = NULL;
1472 #if STACKSHOT_COLLECTS_LATENCY_INFO
1473 	latency_info->cur_tsnap_latency = mach_absolute_time();
1474 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1475 
1476 	int task_pid           = pid_from_task(task);
1477 	uint64_t task_uniqueid = get_task_uniqueid(task);
1478 	uint64_t proc_starttime_secs = 0;
1479 
1480 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
1481 		/*
1482 		 * if this task is a transit task from another one, show the pid as
1483 		 * negative
1484 		 */
1485 		task_pid = 0 - task_pid;
1486 	}
1487 
1488 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
1489 	kcdata_compression_window_open(kcd);
1490 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
1491 	cur_tsnap = (struct task_snapshot_v2 *)out_addr;
1492 	bzero(cur_tsnap, sizeof(*cur_tsnap));
1493 
1494 	cur_tsnap->ts_unique_pid = task_uniqueid;
1495 	cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
1496 	cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
1497 	cur_tsnap->ts_user_time_in_terminated_threads = task->total_user_time;
1498 	cur_tsnap->ts_system_time_in_terminated_threads = task->total_system_time;
1499 
1500 	proc_starttime_kdp(task->bsd_info, &proc_starttime_secs, NULL, NULL);
1501 	cur_tsnap->ts_p_start_sec = proc_starttime_secs;
1502 	cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
1503 	cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
1504 	cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
1505 	cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
1506 
1507 	cur_tsnap->ts_suspend_count = task->suspend_count;
1508 	cur_tsnap->ts_faults = counter_load(&task->faults);
1509 	cur_tsnap->ts_pageins = counter_load(&task->pageins);
1510 	cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
1511 	cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
1512 	    LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
1513 	cur_tsnap->ts_pid = task_pid;
1514 
1515 	/* Add the BSD process identifiers */
1516 	if (task_pid != -1 && task->bsd_info != NULL) {
1517 		proc_name_kdp(task->bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
1518 	} else {
1519 		cur_tsnap->ts_p_comm[0] = '\0';
1520 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
1521 		if (task->task_imp_base != NULL) {
1522 			_stackshot_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
1523 			    MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
1524 		}
1525 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
1526 	}
1527 
1528 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
1529 
1530 #if CONFIG_COALITIONS
1531 	if (task_pid != -1 && task->bsd_info != NULL &&
1532 	    ((trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) && (task->coalition[COALITION_TYPE_JETSAM] != NULL))) {
1533 		uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
1534 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
1535 	}
1536 #endif /* CONFIG_COALITIONS */
1537 
1538 #if __arm__ || __arm64__
1539 	if (collect_asid && have_pmap) {
1540 		uint32_t asid = PMAP_VASID(task->map->pmap);
1541 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
1542 	}
1543 #endif
1544 
1545 #if STACKSHOT_COLLECTS_LATENCY_INFO
1546 	latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
1547 	latency_info->pmap_latency = mach_absolute_time();
1548 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1549 
1550 	if (collect_pagetables && have_pmap) {
1551 #if INTERRUPT_MASKED_DEBUG
1552 		// pagetable dumps can be large; reset the interrupt timeout to avoid a panic
1553 		ml_spin_debug_clear_self();
1554 #endif
1555 		size_t bytes_dumped = 0;
1556 		error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stack_snapshot_pagetable_mask, &bytes_dumped);
1557 		if (error != KERN_SUCCESS) {
1558 			goto error_exit;
1559 		} else {
1560 			/* Variable size array - better not have it on the stack. */
1561 			kcdata_compression_window_open(kcd);
1562 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
1563 			    sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
1564 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
1565 		}
1566 	}
1567 
1568 #if STACKSHOT_COLLECTS_LATENCY_INFO
1569 	latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
1570 	latency_info->bsd_proc_ids_latency = mach_absolute_time();
1571 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1572 
1573 #if STACKSHOT_COLLECTS_LATENCY_INFO
1574 	latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
1575 	latency_info->end_latency = mach_absolute_time();
1576 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1577 
1578 	if (collect_iostats) {
1579 		kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
1580 	}
1581 
1582 #if MONOTONIC
1583 	if (collect_instrs_cycles) {
1584 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
1585 	}
1586 #endif /* MONOTONIC */
1587 
1588 	kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
1589 
1590 #if STACKSHOT_COLLECTS_LATENCY_INFO
1591 	latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
1592 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1593 
1594 error_exit:
1595 	return error;
1596 }
1597 
1598 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)1599 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
1600 {
1601 #if !MONOTONIC
1602 #pragma unused(trace_flags)
1603 #endif /* !MONOTONIC */
1604 	kern_return_t error                       = KERN_SUCCESS;
1605 	struct task_delta_snapshot_v2 * cur_tsnap = NULL;
1606 	mach_vm_address_t out_addr                = 0;
1607 	(void) trace_flags;
1608 #if __arm__ || __arm64__
1609 	boolean_t collect_asid                    = ((trace_flags & STACKSHOT_ASID) != 0);
1610 #endif
1611 #if MONOTONIC
1612 	boolean_t collect_instrs_cycles           = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
1613 #endif /* MONOTONIC */
1614 
1615 	uint64_t task_uniqueid = get_task_uniqueid(task);
1616 
1617 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
1618 
1619 	cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
1620 
1621 	cur_tsnap->tds_unique_pid = task_uniqueid;
1622 	cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
1623 	cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
1624 
1625 	cur_tsnap->tds_user_time_in_terminated_threads = task->total_user_time;
1626 	cur_tsnap->tds_system_time_in_terminated_threads = task->total_system_time;
1627 
1628 	cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
1629 
1630 	cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
1631 	cur_tsnap->tds_suspend_count = task->suspend_count;
1632 	cur_tsnap->tds_faults            = counter_load(&task->faults);
1633 	cur_tsnap->tds_pageins           = counter_load(&task->pageins);
1634 	cur_tsnap->tds_cow_faults        = counter_load(&task->cow_faults);
1635 	cur_tsnap->tds_was_throttled     = (uint32_t)proc_was_throttled_from_task(task);
1636 	cur_tsnap->tds_did_throttle      = (uint32_t)proc_did_throttle_from_task(task);
1637 	cur_tsnap->tds_latency_qos       = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
1638 	    ? LATENCY_QOS_TIER_UNSPECIFIED
1639 	    : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
1640 
1641 #if __arm__ || __arm64__
1642 	if (collect_asid && have_pmap) {
1643 		uint32_t asid = PMAP_VASID(task->map->pmap);
1644 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
1645 		stackshot_memcpy((void*)out_addr, &asid, sizeof(asid));
1646 	}
1647 #endif
1648 
1649 #if MONOTONIC
1650 	if (collect_instrs_cycles) {
1651 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
1652 	}
1653 #endif /* MONOTONIC */
1654 
1655 error_exit:
1656 	return error;
1657 }
1658 
1659 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)1660 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
1661 {
1662 	kern_return_t error = KERN_SUCCESS;
1663 	mach_vm_address_t out_addr = 0;
1664 
1665 	/* I/O Statistics */
1666 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
1667 	if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
1668 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
1669 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
1670 		_iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
1671 		_iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
1672 		_iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
1673 		_iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
1674 		_iostat->ss_paging_count = thread->thread_io_stats->paging.count;
1675 		_iostat->ss_paging_size = thread->thread_io_stats->paging.size;
1676 		_iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
1677 		_iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
1678 		_iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
1679 		_iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
1680 		_iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
1681 		_iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
1682 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
1683 			_iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
1684 			_iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
1685 		}
1686 	}
1687 
1688 error_exit:
1689 	return error;
1690 }
1691 
1692 bool
machine_trace_thread_validate_kva(vm_offset_t addr)1693 machine_trace_thread_validate_kva(vm_offset_t addr)
1694 {
1695 	return _stackshot_validate_kva(addr, sizeof(uintptr_t));
1696 }
1697 
1698 struct _stackshot_backtrace_context {
1699 	vm_map_t sbc_map;
1700 	vm_offset_t sbc_prev_page;
1701 	vm_offset_t sbc_prev_kva;
1702 	uint32_t sbc_flags;
1703 	bool sbc_allow_faulting;
1704 };
1705 
1706 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)1707 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
1708 {
1709 	struct _stackshot_backtrace_context *ctx = vctx;
1710 	size_t map_page_mask = 0;
1711 	size_t __assert_only map_page_size = _stackshot_get_page_size(ctx->sbc_map,
1712 	    &map_page_mask);
1713 	assert(size < map_page_size);
1714 	if (src & (size - 1)) {
1715 		// The source should be aligned to the size passed in, like a stack
1716 		// frame or word.
1717 		return EINVAL;
1718 	}
1719 
1720 	vm_offset_t src_page = src & ~map_page_mask;
1721 	vm_offset_t src_kva = 0;
1722 
1723 	if (src_page != ctx->sbc_prev_page) {
1724 		uint32_t res = 0;
1725 		uint32_t flags = 0;
1726 		vm_offset_t src_pa = kdp_find_phys(ctx->sbc_map, src,
1727 		    ctx->sbc_allow_faulting, &res);
1728 
1729 		flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
1730 		flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
1731 		flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
1732 		ctx->sbc_flags |= flags;
1733 		if (src_pa == 0) {
1734 			return EFAULT;
1735 		}
1736 
1737 		src_kva = phystokv(src_pa);
1738 		ctx->sbc_prev_page = src_page;
1739 		ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
1740 	} else {
1741 		src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
1742 	}
1743 
1744 #if KASAN
1745 	kasan_notify_address(src_kva, size);
1746 #endif
1747 	memcpy(dst, (const void *)src_kva, size);
1748 
1749 	return 0;
1750 }
1751 
1752 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)1753 kcdata_record_thread_snapshot(
1754 	kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
1755 {
1756 	boolean_t dispatch_p              = ((trace_flags & STACKSHOT_GET_DQ) != 0);
1757 	boolean_t active_kthreads_only_p  = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
1758 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
1759 	boolean_t collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
1760 #if MONOTONIC
1761 	boolean_t collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
1762 #endif /* MONOTONIC */
1763 	kern_return_t error        = KERN_SUCCESS;
1764 
1765 #if STACKSHOT_COLLECTS_LATENCY_INFO
1766 	struct stackshot_latency_thread latency_info;
1767 	latency_info.cur_thsnap1_latency = mach_absolute_time();
1768 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1769 
1770 	mach_vm_address_t out_addr = 0;
1771 	int saved_count            = 0;
1772 
1773 	struct thread_snapshot_v4 * cur_thread_snap = NULL;
1774 	char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
1775 	uint64_t tval    = 0;
1776 
1777 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
1778 	cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
1779 
1780 	/* Populate the thread snapshot header */
1781 	cur_thread_snap->ths_ss_flags = 0;
1782 	cur_thread_snap->ths_thread_id = thread_tid(thread);
1783 	cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
1784 	cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
1785 	cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
1786 
1787 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
1788 		cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
1789 	} else {
1790 		cur_thread_snap->ths_voucher_identifier = 0;
1791 	}
1792 
1793 #if STACKSHOT_COLLECTS_LATENCY_INFO
1794 	latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
1795 	latency_info.dispatch_serial_latency = mach_absolute_time();
1796 	latency_info.dispatch_label_latency = 0;
1797 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1798 
1799 	cur_thread_snap->ths_dqserialnum = 0;
1800 	if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
1801 		uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
1802 		if (dqkeyaddr != 0) {
1803 			uint64_t dqaddr = 0;
1804 			boolean_t copyin_ok = kdp_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
1805 			if (copyin_ok && dqaddr != 0) {
1806 				uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
1807 				uint64_t dqserialnum = 0;
1808 				copyin_ok = kdp_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
1809 				if (copyin_ok) {
1810 					cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
1811 					cur_thread_snap->ths_dqserialnum = dqserialnum;
1812 				}
1813 
1814 #if STACKSHOT_COLLECTS_LATENCY_INFO
1815 				latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
1816 				latency_info.dispatch_label_latency = mach_absolute_time();
1817 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1818 
1819 				/* try copying in the queue label */
1820 				uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
1821 				if (label_offs) {
1822 					uint64_t dqlabeladdr = dqaddr + label_offs;
1823 					uint64_t actual_dqlabeladdr = 0;
1824 
1825 					copyin_ok = kdp_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
1826 					if (copyin_ok && actual_dqlabeladdr != 0) {
1827 						char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
1828 						int len;
1829 
1830 						bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
1831 						len = kdp_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
1832 						if (len > 0) {
1833 							mach_vm_address_t label_addr = 0;
1834 							kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
1835 							_stackshot_strlcpy((char*)label_addr, &label_buf[0], len);
1836 						}
1837 					}
1838 				}
1839 #if STACKSHOT_COLLECTS_LATENCY_INFO
1840 				latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
1841 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1842 			}
1843 		}
1844 	}
1845 
1846 #if STACKSHOT_COLLECTS_LATENCY_INFO
1847 	if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
1848 		latency_info.dispatch_serial_latency = 0;
1849 	}
1850 	latency_info.cur_thsnap2_latency = mach_absolute_time();
1851 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1852 
1853 	tval = safe_grab_timer_value(&thread->user_timer);
1854 	cur_thread_snap->ths_user_time = tval;
1855 	tval = safe_grab_timer_value(&thread->system_timer);
1856 
1857 	if (thread->precise_user_kernel_time) {
1858 		cur_thread_snap->ths_sys_time = tval;
1859 	} else {
1860 		cur_thread_snap->ths_user_time += tval;
1861 		cur_thread_snap->ths_sys_time = 0;
1862 	}
1863 
1864 	if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
1865 		cur_thread_snap->ths_ss_flags |= kThreadMain;
1866 	}
1867 	if (thread->effective_policy.thep_darwinbg) {
1868 		cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
1869 	}
1870 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
1871 		cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
1872 	}
1873 	if (thread->suspend_count > 0) {
1874 		cur_thread_snap->ths_ss_flags |= kThreadSuspended;
1875 	}
1876 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1877 		cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
1878 	}
1879 	if (thread_on_core) {
1880 		cur_thread_snap->ths_ss_flags |= kThreadOnCore;
1881 	}
1882 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
1883 		cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
1884 	}
1885 
1886 	/* make sure state flags defined in kcdata.h still match internal flags */
1887 	static_assert(SS_TH_WAIT == TH_WAIT);
1888 	static_assert(SS_TH_SUSP == TH_SUSP);
1889 	static_assert(SS_TH_RUN == TH_RUN);
1890 	static_assert(SS_TH_UNINT == TH_UNINT);
1891 	static_assert(SS_TH_TERMINATE == TH_TERMINATE);
1892 	static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
1893 	static_assert(SS_TH_IDLE == TH_IDLE);
1894 
1895 	cur_thread_snap->ths_last_run_time           = thread->last_run_time;
1896 	cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
1897 	cur_thread_snap->ths_state                   = thread->state;
1898 	cur_thread_snap->ths_sched_flags             = thread->sched_flags;
1899 	cur_thread_snap->ths_base_priority = thread->base_pri;
1900 	cur_thread_snap->ths_sched_priority = thread->sched_pri;
1901 	cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
1902 	cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
1903 	cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
1904 	    thread->requested_policy.thrp_qos_workq_override);
1905 	cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
1906 	cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
1907 
1908 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
1909 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
1910 	cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
1911 	cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
1912 
1913 #if STACKSHOT_COLLECTS_LATENCY_INFO
1914 	latency_info.cur_thsnap2_latency = mach_absolute_time()  - latency_info.cur_thsnap2_latency;
1915 	latency_info.thread_name_latency = mach_absolute_time();
1916 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1917 
1918 	/* if there is thread name then add to buffer */
1919 	cur_thread_name[0] = '\0';
1920 	proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
1921 	if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
1922 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
1923 		stackshot_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
1924 	}
1925 
1926 #if STACKSHOT_COLLECTS_LATENCY_INFO
1927 	latency_info.thread_name_latency = mach_absolute_time()  - latency_info.thread_name_latency;
1928 	latency_info.sur_times_latency = mach_absolute_time();
1929 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1930 
1931 	/* record system, user, and runnable times */
1932 	time_value_t user_time, system_time, runnable_time;
1933 	thread_read_times(thread, &user_time, &system_time, &runnable_time);
1934 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
1935 	struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
1936 	*stackshot_cpu_times = (struct stackshot_cpu_times_v2){
1937 		.user_usec = (uint64_t)user_time.seconds * USEC_PER_SEC + user_time.microseconds,
1938 		.system_usec = (uint64_t)system_time.seconds * USEC_PER_SEC + system_time.microseconds,
1939 		.runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
1940 	};
1941 
1942 #if STACKSHOT_COLLECTS_LATENCY_INFO
1943 	latency_info.sur_times_latency = mach_absolute_time()  - latency_info.sur_times_latency;
1944 	latency_info.user_stack_latency = mach_absolute_time();
1945 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1946 
1947 	/* Trace user stack, if any */
1948 	if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
1949 		uint32_t user_ths_ss_flags = 0;
1950 
1951 		/*
1952 		 * This relies on knowing the "end" address points to the start of the
1953 		 * next elements data and, in the case of arrays, the elements.
1954 		 */
1955 		out_addr = (mach_vm_address_t)kcd_end_address(kcd);
1956 		mach_vm_address_t max_addr = (mach_vm_address_t)kcd_max_address(kcd);
1957 		assert(out_addr <= max_addr);
1958 		size_t avail_frames = (max_addr - out_addr) / sizeof(uintptr_t);
1959 		size_t max_frames = MIN(avail_frames, MAX_FRAMES);
1960 		if (max_frames == 0) {
1961 			error = KERN_RESOURCE_SHORTAGE;
1962 			goto error_exit;
1963 		}
1964 		struct _stackshot_backtrace_context ctx = {
1965 			.sbc_map = task->map,
1966 			.sbc_allow_faulting = stack_enable_faulting,
1967 			.sbc_prev_page = -1,
1968 			.sbc_prev_kva = -1,
1969 		};
1970 		struct backtrace_control ctl = {
1971 			.btc_user_thread = thread,
1972 			.btc_user_copy = _stackshot_backtrace_copy,
1973 			.btc_user_copy_context = &ctx,
1974 		};
1975 		struct backtrace_user_info info = BTUINFO_INIT;
1976 
1977 		saved_count = backtrace_user((uintptr_t *)out_addr, max_frames, &ctl,
1978 		    &info);
1979 		if (saved_count > 0) {
1980 #if __LP64__
1981 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
1982 #else // __LP64__
1983 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
1984 #endif // !__LP64__
1985 			mach_vm_address_t out_addr_array;
1986 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd,
1987 			    STACKLR_WORDS, sizeof(uintptr_t), saved_count,
1988 			    &out_addr_array));
1989 			/*
1990 			 * Ensure the kcd_end_address (above) trick worked.
1991 			 */
1992 			assert(out_addr == out_addr_array);
1993 			if (info.btui_info & BTI_64_BIT) {
1994 				user_ths_ss_flags |= kUser64_p;
1995 			}
1996 			if ((info.btui_info & BTI_TRUNCATED) ||
1997 			    (ctx.sbc_flags & kThreadTruncatedBT)) {
1998 				user_ths_ss_flags |= kThreadTruncatedBT;
1999 				user_ths_ss_flags |= kThreadTruncUserBT;
2000 			}
2001 			user_ths_ss_flags |= ctx.sbc_flags;
2002 			ctx.sbc_flags = 0;
2003 #if __LP64__
2004 			/* We only support async stacks on 64-bit kernels */
2005 			if (info.btui_async_frame_addr != 0) {
2006 				uint32_t async_start_offset = info.btui_async_start_index;
2007 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
2008 				    sizeof(async_start_offset), &async_start_offset));
2009 				out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2010 				assert(out_addr <= max_addr);
2011 
2012 				avail_frames = (max_addr - out_addr) / sizeof(uintptr_t);
2013 				max_frames = MIN(avail_frames, MAX_FRAMES);
2014 				if (max_frames == 0) {
2015 					error = KERN_RESOURCE_SHORTAGE;
2016 					goto error_exit;
2017 				}
2018 				ctl.btc_frame_addr = info.btui_async_frame_addr;
2019 				ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
2020 				info = BTUINFO_INIT;
2021 				unsigned int async_count = backtrace_user((uintptr_t *)out_addr, max_frames, &ctl,
2022 				    &info);
2023 				if (async_count > 0) {
2024 					mach_vm_address_t async_out_addr;
2025 					kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd,
2026 					    STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64, sizeof(uintptr_t), async_count,
2027 					    &async_out_addr));
2028 					/*
2029 					 * Ensure the kcd_end_address (above) trick worked.
2030 					 */
2031 					assert(out_addr == async_out_addr);
2032 					if ((info.btui_info & BTI_TRUNCATED) ||
2033 					    (ctx.sbc_flags & kThreadTruncatedBT)) {
2034 						user_ths_ss_flags |= kThreadTruncatedBT;
2035 						user_ths_ss_flags |= kThreadTruncUserAsyncBT;
2036 					}
2037 					user_ths_ss_flags |= ctx.sbc_flags;
2038 				}
2039 			}
2040 #endif /* _LP64 */
2041 		}
2042 		if (user_ths_ss_flags != 0) {
2043 			cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
2044 		}
2045 	}
2046 
2047 #if STACKSHOT_COLLECTS_LATENCY_INFO
2048 	latency_info.user_stack_latency = mach_absolute_time()  - latency_info.user_stack_latency;
2049 	latency_info.kernel_stack_latency = mach_absolute_time();
2050 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2051 
2052 	/* Call through to the machine specific trace routines
2053 	 * Frames are added past the snapshot header.
2054 	 */
2055 	if (thread->kernel_stack != 0) {
2056 		uint32_t kern_ths_ss_flags = 0;
2057 		out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2058 #if defined(__LP64__)
2059 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
2060 		extern int machine_trace_thread64(thread_t thread, char *tracepos,
2061 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
2062 		saved_count = machine_trace_thread64(
2063 #else
2064 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
2065 		extern int machine_trace_thread(thread_t thread, char *tracepos,
2066 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
2067 		saved_count = machine_trace_thread(
2068 #endif
2069 			thread, (char *)out_addr, (char *)kcd_max_address(kcd), MAX_FRAMES,
2070 			&kern_ths_ss_flags);
2071 		if (saved_count > 0) {
2072 			int frame_size = sizeof(uintptr_t);
2073 #if defined(__LP64__)
2074 			cur_thread_snap->ths_ss_flags |= kKernel64_p;
2075 #endif
2076 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, stack_kcdata_type,
2077 			    frame_size, saved_count / frame_size, &out_addr));
2078 		}
2079 		if (kern_ths_ss_flags & kThreadTruncatedBT) {
2080 			kern_ths_ss_flags |= kThreadTruncKernBT;
2081 		}
2082 		if (kern_ths_ss_flags != 0) {
2083 			cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
2084 		}
2085 	}
2086 
2087 #if STACKSHOT_COLLECTS_LATENCY_INFO
2088 	latency_info.kernel_stack_latency = mach_absolute_time()  - latency_info.kernel_stack_latency;
2089 	latency_info.misc_latency = mach_absolute_time();
2090 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2091 
2092 #if CONFIG_THREAD_GROUPS
2093 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
2094 		uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
2095 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
2096 		stackshot_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
2097 	}
2098 #endif /* CONFIG_THREAD_GROUPS */
2099 
2100 	if (collect_iostats) {
2101 		kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
2102 	}
2103 
2104 #if MONOTONIC
2105 	if (collect_instrs_cycles) {
2106 		uint64_t instrs = 0, cycles = 0;
2107 		mt_stackshot_thread(thread, &instrs, &cycles);
2108 
2109 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
2110 		struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
2111 		    instrs_cycles->ics_instructions = instrs;
2112 		    instrs_cycles->ics_cycles = cycles;
2113 	}
2114 #endif /* MONOTONIC */
2115 
2116 #if STACKSHOT_COLLECTS_LATENCY_INFO
2117 	latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
2118 	if (collect_latency_info) {
2119 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
2120 	}
2121 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2122 
2123 error_exit:
2124 	return error;
2125 }
2126 
2127 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)2128 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
2129 {
2130 	cur_thread_snap->tds_thread_id = thread_tid(thread);
2131 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
2132 		cur_thread_snap->tds_voucher_identifier  = VM_KERNEL_ADDRPERM(thread->ith_voucher);
2133 	} else {
2134 		cur_thread_snap->tds_voucher_identifier = 0;
2135 	}
2136 
2137 	cur_thread_snap->tds_ss_flags = 0;
2138 	if (thread->effective_policy.thep_darwinbg) {
2139 		cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
2140 	}
2141 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
2142 		cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
2143 	}
2144 	if (thread->suspend_count > 0) {
2145 		cur_thread_snap->tds_ss_flags |= kThreadSuspended;
2146 	}
2147 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
2148 		cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
2149 	}
2150 	if (thread_on_core) {
2151 		cur_thread_snap->tds_ss_flags |= kThreadOnCore;
2152 	}
2153 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
2154 		cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
2155 	}
2156 
2157 	cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
2158 	cur_thread_snap->tds_state                   = thread->state;
2159 	cur_thread_snap->tds_sched_flags             = thread->sched_flags;
2160 	cur_thread_snap->tds_base_priority           = thread->base_pri;
2161 	cur_thread_snap->tds_sched_priority          = thread->sched_pri;
2162 	cur_thread_snap->tds_eqos                    = thread->effective_policy.thep_qos;
2163 	cur_thread_snap->tds_rqos                    = thread->requested_policy.thrp_qos;
2164 	cur_thread_snap->tds_rqos_override           = MAX(thread->requested_policy.thrp_qos_override,
2165 	    thread->requested_policy.thrp_qos_workq_override);
2166 	cur_thread_snap->tds_io_tier                 = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
2167 
2168 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
2169 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
2170 	cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
2171 	cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
2172 
2173 	return 0;
2174 }
2175 
2176 /*
2177  * Why 12?  12 strikes a decent balance between allocating a large array on
2178  * the stack and having large kcdata item overheads for recording nonrunable
2179  * tasks.
2180  */
2181 #define UNIQUEIDSPERFLUSH 12
2182 
2183 struct saved_uniqueids {
2184 	uint64_t ids[UNIQUEIDSPERFLUSH];
2185 	unsigned count;
2186 };
2187 
2188 enum thread_classification {
2189 	tc_full_snapshot,  /* take a full snapshot */
2190 	tc_delta_snapshot, /* take a delta snapshot */
2191 };
2192 
2193 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)2194 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
2195 {
2196 	processor_t last_processor = thread->last_processor;
2197 
2198 	boolean_t thread_on_core =
2199 	    (last_processor != PROCESSOR_NULL &&
2200 	    (last_processor->state == PROCESSOR_SHUTDOWN || last_processor->state == PROCESSOR_RUNNING) &&
2201 	    last_processor->active_thread == thread);
2202 
2203 	*thread_on_core_p = thread_on_core;
2204 
2205 	/* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
2206 	 * previous full stackshot */
2207 	if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stack_snapshot_delta_since_timestamp)) {
2208 		return tc_full_snapshot;
2209 	} else {
2210 		return tc_delta_snapshot;
2211 	}
2212 }
2213 
2214 struct stackshot_context {
2215 	int pid;
2216 	uint64_t trace_flags;
2217 };
2218 
2219 static kern_return_t
kdp_stackshot_record_task(struct stackshot_context * ctx,task_t task)2220 kdp_stackshot_record_task(struct stackshot_context *ctx, task_t task)
2221 {
2222 	boolean_t active_kthreads_only_p  = ((ctx->trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
2223 	boolean_t save_donating_pids_p    = ((ctx->trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
2224 	boolean_t collect_delta_stackshot = ((ctx->trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2225 	boolean_t save_owner_info         = ((ctx->trace_flags & STACKSHOT_THREAD_WAITINFO) != 0);
2226 
2227 	kern_return_t error = KERN_SUCCESS;
2228 	mach_vm_address_t out_addr = 0;
2229 	int saved_count = 0;
2230 
2231 	int task_pid                   = 0;
2232 	uint64_t task_uniqueid         = 0;
2233 	int num_delta_thread_snapshots = 0;
2234 	int num_waitinfo_threads       = 0;
2235 	int num_turnstileinfo_threads  = 0;
2236 
2237 	uint64_t task_start_abstime    = 0;
2238 	boolean_t have_map = FALSE, have_pmap = FALSE;
2239 	boolean_t some_thread_ran = FALSE;
2240 	unaligned_u64 task_snap_ss_flags = 0;
2241 
2242 #if STACKSHOT_COLLECTS_LATENCY_INFO
2243 	struct stackshot_latency_task latency_info;
2244 	latency_info.setup_latency = mach_absolute_time();
2245 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2246 
2247 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2248 	uint64_t task_begin_cpu_cycle_count = 0;
2249 	if (!panic_stackshot) {
2250 		task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
2251 	}
2252 #endif
2253 
2254 	if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
2255 		error = KERN_FAILURE;
2256 		goto error_exit;
2257 	}
2258 
2259 	boolean_t task_in_teardown        = (task->bsd_info == NULL) || proc_in_teardown(task->bsd_info);// has P_LPEXIT set during proc_exit()
2260 	boolean_t task_in_transition      = task_in_teardown;         // here we can add other types of transition.
2261 	uint32_t  container_type          = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
2262 	uint32_t  transition_type         = (task_in_teardown) ? kTaskIsTerminated : 0;
2263 
2264 	if (task_in_transition) {
2265 		collect_delta_stackshot = FALSE;
2266 	}
2267 
2268 	have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
2269 	have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
2270 
2271 	task_pid = pid_from_task(task);
2272 	/* Is returning -1 ok for terminating task ok ??? */
2273 	task_uniqueid = get_task_uniqueid(task);
2274 
2275 	if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
2276 		/*
2277 		 * Not interested in terminated tasks without threads.
2278 		 */
2279 		if (queue_empty(&task->threads) || task_pid == -1) {
2280 			return KERN_SUCCESS;
2281 		}
2282 	}
2283 
2284 	/* All PIDs should have the MSB unset */
2285 	assert((task_pid & (1ULL << 31)) == 0);
2286 
2287 #if STACKSHOT_COLLECTS_LATENCY_INFO
2288 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
2289 	latency_info.task_uniqueid = task_uniqueid;
2290 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2291 
2292 	/* Trace everything, unless a process was specified */
2293 	if ((ctx->pid == -1) || (ctx->pid == task_pid)) {
2294 		/* add task snapshot marker */
2295 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
2296 		    container_type, task_uniqueid));
2297 
2298 		if (collect_delta_stackshot) {
2299 			/*
2300 			 * For delta stackshots we need to know if a thread from this task has run since the
2301 			 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
2302 			 */
2303 			thread_t thread = THREAD_NULL;
2304 			queue_iterate(&task->threads, thread, thread_t, task_threads)
2305 			{
2306 				if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
2307 					error = KERN_FAILURE;
2308 					goto error_exit;
2309 				}
2310 
2311 				if (active_kthreads_only_p && thread->kernel_stack == 0) {
2312 					continue;
2313 				}
2314 
2315 				boolean_t thread_on_core;
2316 				enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
2317 
2318 				switch (thread_classification) {
2319 				case tc_full_snapshot:
2320 					some_thread_ran = TRUE;
2321 					break;
2322 				case tc_delta_snapshot:
2323 					num_delta_thread_snapshots++;
2324 					break;
2325 				}
2326 			}
2327 		}
2328 
2329 		if (collect_delta_stackshot) {
2330 			proc_starttime_kdp(task->bsd_info, NULL, NULL, &task_start_abstime);
2331 		}
2332 
2333 		/* Next record any relevant UUID info and store the task snapshot */
2334 		if (task_in_transition ||
2335 		    !collect_delta_stackshot ||
2336 		    (task_start_abstime == 0) ||
2337 		    (task_start_abstime > stack_snapshot_delta_since_timestamp) ||
2338 		    some_thread_ran) {
2339 			/*
2340 			 * Collect full task information in these scenarios:
2341 			 *
2342 			 * 1) a full stackshot or the task is in transition
2343 			 * 2) a delta stackshot where the task started after the previous full stackshot
2344 			 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
2345 			 *
2346 			 * because the task may have exec'ed, changing its name, architecture, load info, etc
2347 			 */
2348 
2349 			kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
2350 			kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, &task_snap_ss_flags));
2351 #if STACKSHOT_COLLECTS_LATENCY_INFO
2352 			if (!task_in_transition) {
2353 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags, &latency_info));
2354 			} else {
2355 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
2356 			}
2357 #else
2358 			if (!task_in_transition) {
2359 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
2360 			} else {
2361 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
2362 			}
2363 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2364 		} else {
2365 			kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
2366 		}
2367 
2368 #if STACKSHOT_COLLECTS_LATENCY_INFO
2369 		latency_info.misc_latency = mach_absolute_time();
2370 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2371 
2372 		struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
2373 		int current_delta_snapshot_index                  = 0;
2374 		if (num_delta_thread_snapshots > 0) {
2375 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
2376 			    sizeof(struct thread_delta_snapshot_v3),
2377 			    num_delta_thread_snapshots, &out_addr));
2378 			delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
2379 		}
2380 
2381 
2382 #if STACKSHOT_COLLECTS_LATENCY_INFO
2383 		latency_info.task_thread_count_loop_latency = mach_absolute_time();
2384 #endif
2385 		/*
2386 		 * Iterate over the task threads to save thread snapshots and determine
2387 		 * how much space we need for waitinfo and turnstile info
2388 		 */
2389 		thread_t thread = THREAD_NULL;
2390 		queue_iterate(&task->threads, thread, thread_t, task_threads)
2391 		{
2392 			if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
2393 				error = KERN_FAILURE;
2394 				goto error_exit;
2395 			}
2396 
2397 			uint64_t thread_uniqueid;
2398 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
2399 				continue;
2400 			}
2401 			thread_uniqueid = thread_tid(thread);
2402 
2403 			boolean_t thread_on_core;
2404 			enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
2405 
2406 			switch (thread_classification) {
2407 			case tc_full_snapshot:
2408 				/* add thread marker */
2409 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
2410 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
2411 
2412 				/* thread snapshot can be large, including strings, avoid overflowing the stack. */
2413 				kcdata_compression_window_open(stackshot_kcdata_p);
2414 
2415 				kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, ctx->trace_flags, have_pmap, thread_on_core));
2416 
2417 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
2418 
2419 				/* mark end of thread snapshot data */
2420 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
2421 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
2422 				break;
2423 			case tc_delta_snapshot:
2424 				kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
2425 				break;
2426 			}
2427 
2428 			/*
2429 			 * We want to report owner information regardless of whether a thread
2430 			 * has changed since the last delta, whether it's a normal stackshot,
2431 			 * or whether it's nonrunnable
2432 			 */
2433 			if (save_owner_info) {
2434 				if (stackshot_thread_has_valid_waitinfo(thread)) {
2435 					num_waitinfo_threads++;
2436 				}
2437 
2438 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
2439 					num_turnstileinfo_threads++;
2440 				}
2441 			}
2442 		}
2443 #if STACKSHOT_COLLECTS_LATENCY_INFO
2444 		latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
2445 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2446 
2447 
2448 		thread_waitinfo_t *thread_waitinfo           = NULL;
2449 		thread_turnstileinfo_t *thread_turnstileinfo = NULL;
2450 		int current_waitinfo_index              = 0;
2451 		int current_turnstileinfo_index         = 0;
2452 		/* allocate space for the wait and turnstil info */
2453 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
2454 			/* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
2455 			kcdata_compression_window_open(stackshot_kcdata_p);
2456 
2457 			if (num_waitinfo_threads > 0) {
2458 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
2459 				    sizeof(thread_waitinfo_t), num_waitinfo_threads, &out_addr));
2460 				thread_waitinfo = (thread_waitinfo_t *)out_addr;
2461 			}
2462 
2463 			if (num_turnstileinfo_threads > 0) {
2464 				/* get space for the turnstile info */
2465 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
2466 				    sizeof(thread_turnstileinfo_t), num_turnstileinfo_threads, &out_addr));
2467 				thread_turnstileinfo = (thread_turnstileinfo_t *)out_addr;
2468 			}
2469 		}
2470 
2471 #if STACKSHOT_COLLECTS_LATENCY_INFO
2472 		latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
2473 		latency_info.task_thread_data_loop_latency = mach_absolute_time();
2474 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2475 
2476 		/* Iterate over the task's threads to save the wait and turnstile info */
2477 		queue_iterate(&task->threads, thread, thread_t, task_threads)
2478 		{
2479 			uint64_t thread_uniqueid;
2480 
2481 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
2482 				continue;
2483 			}
2484 
2485 			thread_uniqueid = thread_tid(thread);
2486 
2487 			/* If we want owner info, we should capture it regardless of its classification */
2488 			if (save_owner_info) {
2489 				if (stackshot_thread_has_valid_waitinfo(thread)) {
2490 					stackshot_thread_wait_owner_info(
2491 						thread,
2492 						&thread_waitinfo[current_waitinfo_index++]);
2493 				}
2494 
2495 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
2496 					stackshot_thread_turnstileinfo(
2497 						thread,
2498 						&thread_turnstileinfo[current_turnstileinfo_index++]);
2499 				}
2500 			}
2501 		}
2502 
2503 #if STACKSHOT_COLLECTS_LATENCY_INFO
2504 		latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
2505 		latency_info.misc2_latency = mach_absolute_time();
2506 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2507 
2508 #if DEBUG || DEVELOPMENT
2509 		if (current_delta_snapshot_index != num_delta_thread_snapshots) {
2510 			panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
2511 			    num_delta_thread_snapshots, current_delta_snapshot_index);
2512 		}
2513 		if (current_waitinfo_index != num_waitinfo_threads) {
2514 			panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
2515 			    num_waitinfo_threads, current_waitinfo_index);
2516 		}
2517 #endif
2518 
2519 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
2520 			kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
2521 		}
2522 
2523 #if IMPORTANCE_INHERITANCE
2524 		if (save_donating_pids_p) {
2525 			kcd_exit_on_error(
2526 				((((mach_vm_address_t)kcd_end_address(stackshot_kcdata_p) + (TASK_IMP_WALK_LIMIT * sizeof(int32_t))) <
2527 				(mach_vm_address_t)kcd_max_address(stackshot_kcdata_p))
2528 				? KERN_SUCCESS
2529 				: KERN_RESOURCE_SHORTAGE));
2530 			saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
2531 			    (void *)kcd_end_address(stackshot_kcdata_p), TASK_IMP_WALK_LIMIT);
2532 			if (saved_count > 0) {
2533 				/* Variable size array - better not have it on the stack. */
2534 				kcdata_compression_window_open(stackshot_kcdata_p);
2535 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
2536 				    sizeof(int32_t), saved_count, &out_addr));
2537 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
2538 			}
2539 		}
2540 #endif
2541 
2542 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2543 		if (!panic_stackshot) {
2544 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
2545 			    "task_cpu_cycle_count"));
2546 		}
2547 #endif
2548 
2549 #if STACKSHOT_COLLECTS_LATENCY_INFO
2550 		latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
2551 		if (collect_latency_info) {
2552 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
2553 		}
2554 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2555 
2556 		/* mark end of task snapshot data */
2557 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
2558 		    task_uniqueid));
2559 	}
2560 
2561 
2562 error_exit:
2563 	return error;
2564 }
2565 
2566 
2567 static kern_return_t
kdp_stackshot_kcdata_format(int pid,uint64_t trace_flags,uint32_t * pBytesTraced,uint32_t * pBytesUncompressed)2568 kdp_stackshot_kcdata_format(int pid, uint64_t trace_flags, uint32_t * pBytesTraced, uint32_t * pBytesUncompressed)
2569 {
2570 	kern_return_t error        = KERN_SUCCESS;
2571 	mach_vm_address_t out_addr = 0;
2572 	uint64_t abs_time = 0, abs_time_end = 0;
2573 	uint64_t system_state_flags = 0;
2574 	task_t task = TASK_NULL;
2575 	mach_timebase_info_data_t timebase = {0, 0};
2576 	uint32_t length_to_copy = 0, tmp32 = 0;
2577 	abs_time = mach_absolute_time();
2578 	uint64_t last_task_start_time = 0;
2579 
2580 #if STACKSHOT_COLLECTS_LATENCY_INFO
2581 	struct stackshot_latency_collection latency_info;
2582 #endif
2583 
2584 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2585 	uint64_t stackshot_begin_cpu_cycle_count = 0;
2586 
2587 	if (!panic_stackshot) {
2588 		stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
2589 	}
2590 #endif
2591 
2592 #if STACKSHOT_COLLECTS_LATENCY_INFO
2593 	collect_latency_info = trace_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
2594 #endif
2595 
2596 	/* process the flags */
2597 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2598 	boolean_t use_fault_path          = ((trace_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
2599 	stack_enable_faulting = (trace_flags & (STACKSHOT_ENABLE_BT_FAULTING));
2600 
2601 	/* Currently we only support returning explicit KEXT load info on fileset kernels */
2602 	kc_format_t primary_kc_type = KCFormatUnknown;
2603 	if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
2604 		trace_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
2605 	}
2606 
2607 	struct stackshot_context ctx = {};
2608 	ctx.trace_flags = trace_flags;
2609 	ctx.pid = pid;
2610 
2611 	if (use_fault_path) {
2612 		fault_stats.sfs_pages_faulted_in = 0;
2613 		fault_stats.sfs_time_spent_faulting = 0;
2614 		fault_stats.sfs_stopped_faulting = (uint8_t) FALSE;
2615 	}
2616 
2617 	if (sizeof(void *) == 8) {
2618 		system_state_flags |= kKernel64_p;
2619 	}
2620 
2621 	if (stackshot_kcdata_p == NULL || pBytesTraced == NULL) {
2622 		error = KERN_INVALID_ARGUMENT;
2623 		goto error_exit;
2624 	}
2625 
2626 	_stackshot_validation_reset();
2627 
2628 	/* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
2629 	clock_timebase_info(&timebase);
2630 
2631 	/* begin saving data into the buffer */
2632 	*pBytesTraced = 0;
2633 	if (pBytesUncompressed) {
2634 		*pBytesUncompressed = 0;
2635 	}
2636 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, trace_flags, "stackshot_in_flags"));
2637 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)pid, "stackshot_in_pid"));
2638 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
2639 	if (trace_flags & STACKSHOT_PAGE_TABLES) {
2640 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stack_snapshot_pagetable_mask, "stackshot_pagetable_mask"));
2641 	}
2642 	if (stackshot_initial_estimate != 0) {
2643 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
2644 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
2645 	}
2646 
2647 #if STACKSHOT_COLLECTS_LATENCY_INFO
2648 	latency_info.setup_latency = mach_absolute_time();
2649 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2650 
2651 #if CONFIG_JETSAM
2652 	tmp32 = memorystatus_get_pressure_status_kdp();
2653 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
2654 #endif
2655 
2656 	if (!collect_delta_stackshot) {
2657 		tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
2658 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
2659 
2660 		tmp32 = PAGE_SIZE;
2661 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
2662 
2663 		/* save boot-args and osversion string */
2664 		length_to_copy =  MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
2665 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
2666 
2667 
2668 		length_to_copy =  MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
2669 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
2670 
2671 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
2672 	} else {
2673 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stack_snapshot_delta_since_timestamp));
2674 	}
2675 
2676 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
2677 
2678 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_microsecs));
2679 
2680 	/* record system level shared cache load info (if available) */
2681 	if (!collect_delta_stackshot && primary_system_shared_region &&
2682 	    _stackshot_validate_kva((vm_offset_t)primary_system_shared_region, sizeof(struct vm_shared_region))) {
2683 		struct dyld_shared_cache_loadinfo sys_shared_cache_info = {0};
2684 
2685 		/*
2686 		 * Historically, this data was in a dyld_uuid_info_64 structure, but the
2687 		 * naming of both the structure and fields for this use isn't great.  The
2688 		 * dyld_shared_cache_loadinfo structure has better names, but the same
2689 		 * layout and content as the original.
2690 		 *
2691 		 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
2692 		 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
2693 		 * entries; here, it's the slid base address, and we leave it that way
2694 		 * for backwards compatibility.
2695 		 */
2696 		stackshot_memcpy(sys_shared_cache_info.sharedCacheUUID, &primary_system_shared_region->sr_uuid, sizeof(primary_system_shared_region->sr_uuid));
2697 		sys_shared_cache_info.sharedCacheSlide =
2698 		    primary_system_shared_region->sr_slide;
2699 		sys_shared_cache_info.sharedCacheUnreliableSlidBaseAddress =
2700 		    primary_system_shared_region->sr_slide + primary_system_shared_region->sr_base_address;
2701 		sys_shared_cache_info.sharedCacheSlidFirstMapping =
2702 		    primary_system_shared_region->sr_base_address + primary_system_shared_region->sr_first_mapping;
2703 
2704 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
2705 		    sizeof(sys_shared_cache_info), &sys_shared_cache_info));
2706 
2707 		if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
2708 			/*
2709 			 * Include a map of the system shared cache layout if it has been populated
2710 			 * (which is only when the system is using a custom shared cache).
2711 			 */
2712 			if (primary_system_shared_region->sr_images && _stackshot_validate_kva((vm_offset_t)primary_system_shared_region->sr_images,
2713 			    (primary_system_shared_region->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
2714 				assert(primary_system_shared_region->sr_images_count != 0);
2715 				kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), primary_system_shared_region->sr_images_count, primary_system_shared_region->sr_images));
2716 			}
2717 		}
2718 	}
2719 
2720 	/* Add requested information first */
2721 	if (trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
2722 		struct mem_and_io_snapshot mais = {0};
2723 		kdp_mem_and_io_snapshot(&mais);
2724 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
2725 	}
2726 
2727 #if CONFIG_THREAD_GROUPS
2728 	struct thread_group_snapshot_v2 *thread_groups = NULL;
2729 	int num_thread_groups = 0;
2730 
2731 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2732 	uint64_t thread_group_begin_cpu_cycle_count = 0;
2733 
2734 	if (!panic_stackshot && (trace_flags & STACKSHOT_THREAD_GROUP)) {
2735 		thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
2736 	}
2737 #endif
2738 
2739 
2740 	/* Iterate over thread group names */
2741 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
2742 		/* Variable size array - better not have it on the stack. */
2743 		kcdata_compression_window_open(stackshot_kcdata_p);
2744 
2745 		if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
2746 			trace_flags &= ~(STACKSHOT_THREAD_GROUP);
2747 		}
2748 
2749 		if (num_thread_groups > 0) {
2750 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v2), num_thread_groups, &out_addr));
2751 			thread_groups = (struct thread_group_snapshot_v2 *)out_addr;
2752 		}
2753 
2754 		if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
2755 			error = KERN_FAILURE;
2756 			goto error_exit;
2757 		}
2758 
2759 		kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
2760 	}
2761 
2762 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2763 	if (!panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
2764 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
2765 		    "thread_groups_cpu_cycle_count"));
2766 	}
2767 #endif
2768 #else
2769 	trace_flags &= ~(STACKSHOT_THREAD_GROUP);
2770 #endif /* CONFIG_THREAD_GROUPS */
2771 
2772 
2773 #if STACKSHOT_COLLECTS_LATENCY_INFO
2774 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
2775 	latency_info.total_task_iteration_latency = mach_absolute_time();
2776 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2777 
2778 	/* Iterate over tasks */
2779 	queue_iterate(&tasks, task, task_t, tasks)
2780 	{
2781 		if (collect_delta_stackshot) {
2782 			uint64_t abstime;
2783 			proc_starttime_kdp(task->bsd_info, NULL, NULL, &abstime);
2784 
2785 			if (abstime > last_task_start_time) {
2786 				last_task_start_time = abstime;
2787 			}
2788 		}
2789 
2790 		error = kdp_stackshot_record_task(&ctx, task);
2791 		if (error) {
2792 			goto error_exit;
2793 		}
2794 	}
2795 
2796 
2797 #if STACKSHOT_COLLECTS_LATENCY_INFO
2798 	latency_info.total_task_iteration_latency = mach_absolute_time() - latency_info.total_task_iteration_latency;
2799 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2800 
2801 #if CONFIG_COALITIONS
2802 	/* Don't collect jetsam coalition data in delta stakshots - these don't change */
2803 	if (!collect_delta_stackshot || (last_task_start_time > stack_snapshot_delta_since_timestamp)) {
2804 		int num_coalitions = 0;
2805 		struct jetsam_coalition_snapshot *coalitions = NULL;
2806 
2807 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2808 		uint64_t coalition_begin_cpu_cycle_count = 0;
2809 
2810 		if (!panic_stackshot && (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
2811 			coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
2812 		}
2813 #endif /* INTERRUPT_MASKED_DEBUG && MONOTONIC */
2814 
2815 		/* Iterate over coalitions */
2816 		if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
2817 			if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
2818 				trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
2819 			}
2820 		}
2821 		if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
2822 			if (num_coalitions > 0) {
2823 				/* Variable size array - better not have it on the stack. */
2824 				kcdata_compression_window_open(stackshot_kcdata_p);
2825 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
2826 				coalitions = (struct jetsam_coalition_snapshot*)out_addr;
2827 
2828 				if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
2829 					error = KERN_FAILURE;
2830 					goto error_exit;
2831 				}
2832 
2833 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
2834 			}
2835 		}
2836 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2837 		if (!panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
2838 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
2839 			    "coalitions_cpu_cycle_count"));
2840 		}
2841 #endif /* INTERRUPT_MASKED_DEBUG && MONOTONIC */
2842 	}
2843 #else
2844 	trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
2845 #endif /* CONFIG_COALITIONS */
2846 
2847 #if STACKSHOT_COLLECTS_LATENCY_INFO
2848 	latency_info.total_terminated_task_iteration_latency = mach_absolute_time();
2849 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2850 
2851 	/*
2852 	 * Iterate over the tasks in the terminated tasks list. We only inspect
2853 	 * tasks that have a valid bsd_info pointer. The check for task transition
2854 	 * like past P_LPEXIT during proc_exit() is now checked for inside the
2855 	 * kdp_stackshot_record_task(), and then a safer and minimal
2856 	 * transitioning_task_snapshot struct is collected via
2857 	 * kcdata_record_transitioning_task_snapshot()
2858 	 */
2859 	queue_iterate(&terminated_tasks, task, task_t, tasks)
2860 	{
2861 		error = kdp_stackshot_record_task(&ctx, task);
2862 		if (error) {
2863 			goto error_exit;
2864 		}
2865 	}
2866 
2867 #if STACKSHOT_COLLECTS_LATENCY_INFO
2868 	latency_info.total_terminated_task_iteration_latency = mach_absolute_time() - latency_info.total_terminated_task_iteration_latency;
2869 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2870 
2871 	if (use_fault_path) {
2872 		kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
2873 		    sizeof(struct stackshot_fault_stats), &fault_stats);
2874 	}
2875 
2876 #if STACKSHOT_COLLECTS_LATENCY_INFO
2877 	if (collect_latency_info) {
2878 		latency_info.latency_version = 1;
2879 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO, sizeof(latency_info), &latency_info));
2880 	}
2881 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2882 
2883 	/* update timestamp of the stackshot */
2884 	abs_time_end = mach_absolute_time();
2885 	struct stackshot_duration_v2 stackshot_duration = {
2886 		.stackshot_duration         = (abs_time_end - abs_time),
2887 		.stackshot_duration_outer   = 0,
2888 		.stackshot_duration_prior   = stackshot_duration_prior_abs,
2889 	};
2890 
2891 	if ((trace_flags & STACKSHOT_DO_COMPRESS) == 0) {
2892 		kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
2893 		    sizeof(struct stackshot_duration_v2), &out_addr));
2894 		struct stackshot_duration_v2 *duration_p = (void *) out_addr;
2895 		stackshot_memcpy(duration_p, &stackshot_duration, sizeof(*duration_p));
2896 		stackshot_duration_outer                   = (unaligned_u64 *)&duration_p->stackshot_duration_outer;
2897 	} else {
2898 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_duration), &stackshot_duration));
2899 		stackshot_duration_outer = NULL;
2900 	}
2901 
2902 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2903 	if (!panic_stackshot) {
2904 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
2905 		    "stackshot_total_cpu_cycle_cnt"));
2906 	}
2907 #endif
2908 
2909 	kcd_finalize_compression(stackshot_kcdata_p);
2910 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, trace_flags, "stackshot_out_flags"));
2911 
2912 	kcd_exit_on_error(kcdata_write_buffer_end(stackshot_kcdata_p));
2913 
2914 	/*  === END of populating stackshot data === */
2915 
2916 	*pBytesTraced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_kcdata_p);
2917 	*pBytesUncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_kcdata_p);
2918 
2919 error_exit:;
2920 
2921 #if INTERRUPT_MASKED_DEBUG
2922 	bool disable_interrupts_masked_check = kern_feature_override(
2923 		KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
2924 	    (trace_flags & STACKSHOT_DO_COMPRESS) != 0;
2925 
2926 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
2927 	disable_interrupts_masked_check = true;
2928 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
2929 
2930 	if (disable_interrupts_masked_check) {
2931 		ml_spin_debug_clear_self();
2932 	}
2933 
2934 	if (!panic_stackshot && interrupt_masked_debug_mode) {
2935 		/*
2936 		 * Try to catch instances where stackshot takes too long BEFORE returning from
2937 		 * the debugger
2938 		 */
2939 		ml_handle_stackshot_interrupt_disabled_duration(current_thread());
2940 	}
2941 #endif /* INTERRUPT_MASKED_DEBUG */
2942 
2943 	stack_enable_faulting = FALSE;
2944 
2945 	return error;
2946 }
2947 
2948 static uint64_t
proc_was_throttled_from_task(task_t task)2949 proc_was_throttled_from_task(task_t task)
2950 {
2951 	uint64_t was_throttled = 0;
2952 
2953 	if (task->bsd_info) {
2954 		was_throttled = proc_was_throttled(task->bsd_info);
2955 	}
2956 
2957 	return was_throttled;
2958 }
2959 
2960 static uint64_t
proc_did_throttle_from_task(task_t task)2961 proc_did_throttle_from_task(task_t task)
2962 {
2963 	uint64_t did_throttle = 0;
2964 
2965 	if (task->bsd_info) {
2966 		did_throttle = proc_did_throttle(task->bsd_info);
2967 	}
2968 
2969 	return did_throttle;
2970 }
2971 
2972 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)2973 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
2974 {
2975 	unsigned int pages_reclaimed;
2976 	unsigned int pages_wanted;
2977 	kern_return_t kErr;
2978 
2979 	uint64_t compressions = 0;
2980 	uint64_t decompressions = 0;
2981 
2982 	compressions = counter_load(&vm_statistics_compressions);
2983 	decompressions = counter_load(&vm_statistics_decompressions);
2984 
2985 	memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
2986 	memio_snap->free_pages = vm_page_free_count;
2987 	memio_snap->active_pages = vm_page_active_count;
2988 	memio_snap->inactive_pages = vm_page_inactive_count;
2989 	memio_snap->purgeable_pages = vm_page_purgeable_count;
2990 	memio_snap->wired_pages = vm_page_wire_count;
2991 	memio_snap->speculative_pages = vm_page_speculative_count;
2992 	memio_snap->throttled_pages = vm_page_throttled_count;
2993 	memio_snap->busy_buffer_count = count_busy_buffers();
2994 	memio_snap->filebacked_pages = vm_page_pageable_external_count;
2995 	memio_snap->compressions = (uint32_t)compressions;
2996 	memio_snap->decompressions = (uint32_t)decompressions;
2997 	memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
2998 	kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
2999 
3000 	if (!kErr) {
3001 		memio_snap->pages_wanted = (uint32_t)pages_wanted;
3002 		memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
3003 		memio_snap->pages_wanted_reclaimed_valid = 1;
3004 	} else {
3005 		memio_snap->pages_wanted = 0;
3006 		memio_snap->pages_reclaimed = 0;
3007 		memio_snap->pages_wanted_reclaimed_valid = 0;
3008 	}
3009 }
3010 
3011 void
stackshot_memcpy(void * dst,const void * src,size_t len)3012 stackshot_memcpy(void *dst, const void *src, size_t len)
3013 {
3014 #if defined(__arm__) || defined(__arm64__)
3015 	if (panic_stackshot) {
3016 		uint8_t *dest_bytes = (uint8_t *)dst;
3017 		const uint8_t *src_bytes = (const uint8_t *)src;
3018 		for (size_t i = 0; i < len; i++) {
3019 			dest_bytes[i] = src_bytes[i];
3020 		}
3021 	} else
3022 #endif
3023 	memcpy(dst, src, len);
3024 }
3025 
3026 static size_t
_stackshot_strlcpy(char * dst,const char * src,size_t maxlen)3027 _stackshot_strlcpy(char *dst, const char *src, size_t maxlen)
3028 {
3029 	const size_t srclen = strlen(src);
3030 
3031 	if (srclen < maxlen) {
3032 		stackshot_memcpy(dst, src, srclen + 1);
3033 	} else if (maxlen != 0) {
3034 		stackshot_memcpy(dst, src, maxlen - 1);
3035 		dst[maxlen - 1] = '\0';
3036 	}
3037 
3038 	return srclen;
3039 }
3040 
3041 /*
3042  * Sets the appropriate page mask and size to use for dealing with pages --
3043  * it's important that this is a "min" of page size to account for both K16/U4
3044  * (Rosetta) and K4/U16 (armv7k) environments.
3045  */
3046 static inline size_t
_stackshot_get_page_size(vm_map_t map,size_t * effective_page_mask)3047 _stackshot_get_page_size(vm_map_t map, size_t *effective_page_mask)
3048 {
3049 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
3050 		*effective_page_mask = VM_MAP_PAGE_MASK(map);
3051 		return VM_MAP_PAGE_SIZE(map);
3052 	} else {
3053 		*effective_page_mask = PAGE_MASK;
3054 		return PAGE_SIZE;
3055 	}
3056 }
3057 
3058 /*
3059  * Returns the physical address of the specified map:target address,
3060  * using the kdp fault path if requested and the page is not resident.
3061  */
3062 vm_offset_t
kdp_find_phys(vm_map_t map,vm_offset_t target_addr,boolean_t try_fault,uint32_t * kdp_fault_results)3063 kdp_find_phys(vm_map_t map, vm_offset_t target_addr, boolean_t try_fault, uint32_t *kdp_fault_results)
3064 {
3065 	vm_offset_t cur_phys_addr;
3066 
3067 	if (map == VM_MAP_NULL) {
3068 		return 0;
3069 	}
3070 
3071 	cur_phys_addr = kdp_vtophys(map->pmap, target_addr);
3072 	if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
3073 		if (!try_fault || fault_stats.sfs_stopped_faulting) {
3074 			if (kdp_fault_results) {
3075 				*kdp_fault_results |= KDP_FAULT_RESULT_PAGED_OUT;
3076 			}
3077 
3078 			return 0;
3079 		}
3080 
3081 		/*
3082 		 * The pmap doesn't have a valid page so we start at the top level
3083 		 * vm map and try a lightweight fault. Update fault path usage stats.
3084 		 */
3085 		uint64_t fault_start_time = mach_absolute_time();
3086 		size_t effective_page_mask;
3087 		(void)_stackshot_get_page_size(map, &effective_page_mask);
3088 
3089 		cur_phys_addr = kdp_lightweight_fault(map, (target_addr & ~effective_page_mask));
3090 		fault_stats.sfs_time_spent_faulting += (mach_absolute_time() - fault_start_time);
3091 
3092 		if ((fault_stats.sfs_time_spent_faulting >= fault_stats.sfs_system_max_fault_time) && !panic_stackshot) {
3093 			fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
3094 		}
3095 
3096 		cur_phys_addr += (target_addr & effective_page_mask);
3097 
3098 		if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
3099 			if (kdp_fault_results) {
3100 				*kdp_fault_results |= (KDP_FAULT_RESULT_TRIED_FAULT | KDP_FAULT_RESULT_PAGED_OUT);
3101 			}
3102 
3103 			return 0;
3104 		}
3105 
3106 		if (kdp_fault_results) {
3107 			*kdp_fault_results |= KDP_FAULT_RESULT_FAULTED_IN;
3108 		}
3109 
3110 		fault_stats.sfs_pages_faulted_in++;
3111 	} else {
3112 		/*
3113 		 * This check is done in kdp_lightweight_fault for the fault path.
3114 		 */
3115 		unsigned int cur_wimg_bits = pmap_cache_attributes((ppnum_t) atop(cur_phys_addr));
3116 
3117 		if ((cur_wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
3118 			return 0;
3119 		}
3120 	}
3121 
3122 	return cur_phys_addr;
3123 }
3124 
3125 boolean_t
kdp_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,uint32_t * kdp_fault_results)3126 kdp_copyin_word(
3127 	task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results)
3128 {
3129 	if (task_has_64Bit_addr(task)) {
3130 		return kdp_copyin(task->map, addr, result, sizeof(uint64_t), try_fault, kdp_fault_results);
3131 	} else {
3132 		uint32_t buf;
3133 		boolean_t r = kdp_copyin(task->map, addr, &buf, sizeof(uint32_t), try_fault, kdp_fault_results);
3134 		*result = buf;
3135 		return r;
3136 	}
3137 }
3138 
3139 static int
kdp_copyin_string_slowpath(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,uint32_t * kdp_fault_results)3140 kdp_copyin_string_slowpath(
3141 	task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results)
3142 {
3143 	int i;
3144 	uint64_t validated = 0, valid_from;
3145 	uint64_t phys_src, phys_dest;
3146 	vm_map_t map = task->map;
3147 	size_t effective_page_mask;
3148 	size_t effective_page_size = _stackshot_get_page_size(map, &effective_page_mask);
3149 
3150 	for (i = 0; i < buf_sz; i++) {
3151 		if (validated == 0) {
3152 			valid_from = i;
3153 			phys_src = kdp_find_phys(map, addr + i, try_fault, kdp_fault_results);
3154 			phys_dest = kvtophys((vm_offset_t)&buf[i]);
3155 			uint64_t src_rem = effective_page_size - (phys_src & effective_page_mask);
3156 			uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
3157 			if (phys_src && phys_dest) {
3158 				validated = MIN(src_rem, dst_rem);
3159 				if (validated) {
3160 					bcopy_phys(phys_src, phys_dest, 1);
3161 					validated--;
3162 				} else {
3163 					return 0;
3164 				}
3165 			} else {
3166 				return 0;
3167 			}
3168 		} else {
3169 			bcopy_phys(phys_src + (i - valid_from), phys_dest + (i - valid_from), 1);
3170 			validated--;
3171 		}
3172 
3173 		if (buf[i] == '\0') {
3174 			return i + 1;
3175 		}
3176 	}
3177 
3178 	/* ran out of space */
3179 	return -1;
3180 }
3181 
3182 int
kdp_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,uint32_t * kdp_fault_results)3183 kdp_copyin_string(
3184 	task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results)
3185 {
3186 	/* try to opportunistically copyin 32 bytes, most strings should fit */
3187 	char optbuffer[32];
3188 	boolean_t res;
3189 
3190 	bzero(optbuffer, sizeof(optbuffer));
3191 	res = kdp_copyin(task->map, addr, optbuffer, sizeof(optbuffer), try_fault, kdp_fault_results);
3192 	if (res == FALSE || strnlen(optbuffer, sizeof(optbuffer)) == sizeof(optbuffer)) {
3193 		/* try the slowpath */
3194 		return kdp_copyin_string_slowpath(task, addr, buf, buf_sz, try_fault, kdp_fault_results);
3195 	}
3196 
3197 	/* success */
3198 	return (int) strlcpy(buf, optbuffer, buf_sz) + 1;
3199 }
3200 
3201 boolean_t
kdp_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,uint32_t * kdp_fault_results)3202 kdp_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_results)
3203 {
3204 	size_t rem = size;
3205 	char *kvaddr = dest;
3206 	size_t effective_page_mask;
3207 	size_t effective_page_size = _stackshot_get_page_size(map, &effective_page_mask);
3208 
3209 #if defined(__arm__) || defined(__arm64__)
3210 	/* Identify if destination buffer is in panic storage area */
3211 	if (panic_stackshot && ((vm_offset_t)dest >= gPanicBase) && ((vm_offset_t)dest < (gPanicBase + gPanicSize))) {
3212 		if (((vm_offset_t)dest + size) > (gPanicBase + gPanicSize)) {
3213 			return FALSE;
3214 		}
3215 	}
3216 #endif
3217 
3218 	while (rem) {
3219 		uint64_t phys_src = kdp_find_phys(map, uaddr, try_fault, kdp_fault_results);
3220 		uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr);
3221 		uint64_t src_rem = effective_page_size - (phys_src & effective_page_mask);
3222 		uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
3223 		size_t cur_size = (uint32_t) MIN(src_rem, dst_rem);
3224 		cur_size = MIN(cur_size, rem);
3225 
3226 		if (phys_src && phys_dest) {
3227 #if defined(__arm__) || defined(__arm64__)
3228 			/*
3229 			 * On arm devices the panic buffer is mapped as device memory and doesn't allow
3230 			 * unaligned accesses. To prevent these, we copy over bytes individually here.
3231 			 */
3232 			if (panic_stackshot) {
3233 				stackshot_memcpy(kvaddr, (const void *)phystokv(phys_src), cur_size);
3234 			} else
3235 #endif /* defined(__arm__) || defined(__arm64__) */
3236 			bcopy_phys(phys_src, phys_dest, cur_size);
3237 		} else {
3238 			break;
3239 		}
3240 
3241 		uaddr += cur_size;
3242 		kvaddr += cur_size;
3243 		rem -= cur_size;
3244 	}
3245 
3246 	return rem == 0;
3247 }
3248 
3249 kern_return_t
do_stackshot(void * context)3250 do_stackshot(void *context)
3251 {
3252 #pragma unused(context)
3253 	kdp_snapshot++;
3254 
3255 	stack_snapshot_ret = kdp_stackshot_kcdata_format(stack_snapshot_pid,
3256 	    stack_snapshot_flags,
3257 	    &stack_snapshot_bytes_traced,
3258 	    &stack_snapshot_bytes_uncompressed);
3259 
3260 	if (stack_snapshot_ret == KERN_SUCCESS && stack_snapshot_flags & STACKSHOT_DO_COMPRESS) {
3261 		kcdata_finish_compression(stackshot_kcdata_p);
3262 	}
3263 
3264 	kdp_snapshot--;
3265 	return stack_snapshot_ret;
3266 }
3267 
3268 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)3269 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
3270 {
3271 	/* When the pthread kext puts a worker thread to sleep, it will
3272 	 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
3273 	 * struct. See parkit() in kern/kern_support.c in libpthread.
3274 	 */
3275 	return (thread->state & TH_WAIT) &&
3276 	       (thread->block_hint == kThreadWaitParkedWorkQueue);
3277 }
3278 
3279 #if CONFIG_COALITIONS
3280 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)3281 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
3282 {
3283 #pragma unused(i, coal)
3284 	unsigned int *coalition_count = (unsigned int*)arg;
3285 	(*coalition_count)++;
3286 }
3287 
3288 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)3289 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
3290 {
3291 	if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
3292 		return;
3293 	}
3294 
3295 	struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
3296 	struct jetsam_coalition_snapshot *jcs = &coalitions[i];
3297 	task_t leader = TASK_NULL;
3298 	jcs->jcs_id = coalition_id(coal);
3299 	jcs->jcs_flags = 0;
3300 	jcs->jcs_thread_group = 0;
3301 
3302 	if (coalition_term_requested(coal)) {
3303 		jcs->jcs_flags |= kCoalitionTermRequested;
3304 	}
3305 	if (coalition_is_terminated(coal)) {
3306 		jcs->jcs_flags |= kCoalitionTerminated;
3307 	}
3308 	if (coalition_is_reaped(coal)) {
3309 		jcs->jcs_flags |= kCoalitionReaped;
3310 	}
3311 	if (coalition_is_privileged(coal)) {
3312 		jcs->jcs_flags |= kCoalitionPrivileged;
3313 	}
3314 
3315 #if CONFIG_THREAD_GROUPS
3316 	struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
3317 	if (thread_group) {
3318 		jcs->jcs_thread_group = thread_group_get_id(thread_group);
3319 	}
3320 #endif /* CONFIG_THREAD_GROUPS */
3321 
3322 	leader = kdp_coalition_get_leader(coal);
3323 	if (leader) {
3324 		jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
3325 	} else {
3326 		jcs->jcs_leader_task_uniqueid = 0;
3327 	}
3328 }
3329 #endif /* CONFIG_COALITIONS */
3330 
3331 #if CONFIG_THREAD_GROUPS
3332 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)3333 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
3334 {
3335 #pragma unused(i, tg)
3336 	unsigned int *n = (unsigned int*)arg;
3337 	(*n)++;
3338 }
3339 
3340 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)3341 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
3342 {
3343 	struct thread_group_snapshot_v2 *thread_groups = (struct thread_group_snapshot_v2 *)arg;
3344 	struct thread_group_snapshot_v2 *tgs = &thread_groups[i];
3345 	uint32_t flags = thread_group_get_flags(tg);
3346 	tgs->tgs_id = thread_group_get_id(tg);
3347 	stackshot_memcpy(tgs->tgs_name, thread_group_get_name(tg), THREAD_GROUP_MAXNAME);
3348 	tgs->tgs_flags = ((flags & THREAD_GROUP_FLAGS_EFFICIENT) ? kThreadGroupEfficient : 0) |
3349 	    ((flags & THREAD_GROUP_FLAGS_UI_APP) ? kThreadGroupUIApp : 0);
3350 }
3351 #endif /* CONFIG_THREAD_GROUPS */
3352 
3353 /* Determine if a thread has waitinfo that stackshot can provide */
3354 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)3355 stackshot_thread_has_valid_waitinfo(thread_t thread)
3356 {
3357 	if (!(thread->state & TH_WAIT)) {
3358 		return 0;
3359 	}
3360 
3361 	switch (thread->block_hint) {
3362 	// If set to None or is a parked work queue, ignore it
3363 	case kThreadWaitParkedWorkQueue:
3364 	case kThreadWaitNone:
3365 		return 0;
3366 	// There is a short window where the pthread kext removes a thread
3367 	// from its ksyn wait queue before waking the thread up
3368 	case kThreadWaitPThreadMutex:
3369 	case kThreadWaitPThreadRWLockRead:
3370 	case kThreadWaitPThreadRWLockWrite:
3371 	case kThreadWaitPThreadCondVar:
3372 		return kdp_pthread_get_thread_kwq(thread) != NULL;
3373 	// All other cases are valid block hints if in a wait state
3374 	default:
3375 		return 1;
3376 	}
3377 }
3378 
3379 /* Determine if a thread has turnstileinfo that stackshot can provide */
3380 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)3381 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
3382 {
3383 	struct turnstile *ts = thread_get_waiting_turnstile(thread);
3384 
3385 	return stackshot_thread_has_valid_waitinfo(thread) &&
3386 	       ts != TURNSTILE_NULL;
3387 }
3388 
3389 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_t * tsinfo)3390 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_t *tsinfo)
3391 {
3392 	struct turnstile *ts;
3393 
3394 	/* acquire turnstile information and store it in the stackshot */
3395 	ts = thread_get_waiting_turnstile(thread);
3396 	tsinfo->waiter = thread_tid(thread);
3397 	kdp_turnstile_fill_tsinfo(ts, tsinfo);
3398 }
3399 
3400 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_t * waitinfo)3401 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_t *waitinfo)
3402 {
3403 	waitinfo->waiter        = thread_tid(thread);
3404 	waitinfo->wait_type     = thread->block_hint;
3405 
3406 	switch (waitinfo->wait_type) {
3407 	case kThreadWaitKernelMutex:
3408 		kdp_lck_mtx_find_owner(thread->waitq, thread->wait_event, waitinfo);
3409 		break;
3410 	case kThreadWaitPortReceive:
3411 		kdp_mqueue_recv_find_owner(thread->waitq, thread->wait_event, waitinfo);
3412 		break;
3413 	case kThreadWaitPortSend:
3414 		kdp_mqueue_send_find_owner(thread->waitq, thread->wait_event, waitinfo);
3415 		break;
3416 	case kThreadWaitSemaphore:
3417 		kdp_sema_find_owner(thread->waitq, thread->wait_event, waitinfo);
3418 		break;
3419 	case kThreadWaitUserLock:
3420 		kdp_ulock_find_owner(thread->waitq, thread->wait_event, waitinfo);
3421 		break;
3422 	case kThreadWaitKernelRWLockRead:
3423 	case kThreadWaitKernelRWLockWrite:
3424 	case kThreadWaitKernelRWLockUpgrade:
3425 		kdp_rwlck_find_owner(thread->waitq, thread->wait_event, waitinfo);
3426 		break;
3427 	case kThreadWaitPThreadMutex:
3428 	case kThreadWaitPThreadRWLockRead:
3429 	case kThreadWaitPThreadRWLockWrite:
3430 	case kThreadWaitPThreadCondVar:
3431 		kdp_pthread_find_owner(thread, waitinfo);
3432 		break;
3433 	case kThreadWaitWorkloopSyncWait:
3434 		kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo);
3435 		break;
3436 	case kThreadWaitOnProcess:
3437 		kdp_wait4_find_process(thread, thread->wait_event, waitinfo);
3438 		break;
3439 	case kThreadWaitSleepWithInheritor:
3440 		kdp_sleep_with_inheritor_find_owner(thread->waitq, thread->wait_event, waitinfo);
3441 		break;
3442 	case kThreadWaitEventlink:
3443 		kdp_eventlink_find_owner(thread->waitq, thread->wait_event, waitinfo);
3444 		break;
3445 	case kThreadWaitCompressor:
3446 		kdp_compressor_busy_find_owner(thread->wait_event, waitinfo);
3447 		break;
3448 	default:
3449 		waitinfo->owner = 0;
3450 		waitinfo->context = 0;
3451 		break;
3452 	}
3453 }
3454