xref: /xnu-8020.140.41/osfmk/kern/kern_stackshot.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/vm_param.h>
31 #include <mach/mach_vm.h>
32 #include <mach/clock_types.h>
33 #include <sys/errno.h>
34 #include <sys/stackshot.h>
35 #ifdef IMPORTANCE_INHERITANCE
36 #include <ipc/ipc_importance.h>
37 #endif
38 #include <sys/appleapiopts.h>
39 #include <kern/debug.h>
40 #include <kern/block_hint.h>
41 #include <uuid/uuid.h>
42 
43 #include <kdp/kdp_dyld.h>
44 #include <kdp/kdp_en_debugger.h>
45 
46 #include <libsa/types.h>
47 #include <libkern/version.h>
48 #include <libkern/section_keywords.h>
49 
50 #include <string.h> /* bcopy */
51 
52 #include <kern/backtrace.h>
53 #include <kern/coalition.h>
54 #include <kern/processor.h>
55 #include <kern/host_statistics.h>
56 #include <kern/counter.h>
57 #include <kern/thread.h>
58 #include <kern/thread_group.h>
59 #include <kern/task.h>
60 #include <kern/telemetry.h>
61 #include <kern/clock.h>
62 #include <kern/policy_internal.h>
63 #include <kern/socd_client.h>
64 #include <vm/vm_map.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_pageout.h>
67 #include <vm/vm_fault.h>
68 #include <vm/vm_shared_region.h>
69 #include <vm/vm_compressor.h>
70 #include <libkern/OSKextLibPrivate.h>
71 #include <os/log.h>
72 
73 #if defined(__x86_64__)
74 #include <i386/mp.h>
75 #include <i386/cpu_threads.h>
76 #endif
77 
78 #include <pexpert/pexpert.h>
79 
80 #if MONOTONIC
81 #include <kern/monotonic.h>
82 #endif /* MONOTONIC */
83 
84 #include <san/kasan.h>
85 
86 #if DEBUG || DEVELOPMENT
87 # define STACKSHOT_COLLECTS_LATENCY_INFO 1
88 #else
89 # define STACKSHOT_COLLECTS_LATENCY_INFO 0
90 #endif /* DEBUG || DEVELOPMENT */
91 
92 extern unsigned int not_in_kdp;
93 
94 /* indicate to the compiler that some accesses are unaligned */
95 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
96 
97 extern addr64_t kdp_vtophys(pmap_t pmap, addr64_t va);
98 
99 int kdp_snapshot                            = 0;
100 static kern_return_t stack_snapshot_ret     = 0;
101 static uint32_t stack_snapshot_bytes_traced = 0;
102 static uint32_t stack_snapshot_bytes_uncompressed  = 0;
103 
104 #if STACKSHOT_COLLECTS_LATENCY_INFO
105 static bool collect_latency_info = true;
106 #endif
107 static kcdata_descriptor_t stackshot_kcdata_p = NULL;
108 static void *stack_snapshot_buf;
109 static uint32_t stack_snapshot_bufsize;
110 int stack_snapshot_pid;
111 static uint64_t stack_snapshot_flags;
112 static uint64_t stack_snapshot_delta_since_timestamp;
113 static uint32_t stack_snapshot_pagetable_mask;
114 static boolean_t panic_stackshot;
115 
116 static boolean_t stack_enable_faulting = FALSE;
117 static struct stackshot_fault_stats fault_stats;
118 
119 /*
120  * Experimentally, our current estimates are 20% short 96% of the time; 40 gets
121  * us into 99.9%+ territory.  In the longer run, we need to make stackshot
122  * estimates use a better approach (rdar://78880038); this is intended to be a
123  * short-term fix.
124  */
125 uint32_t stackshot_estimate_adj = 40; /* experiment factor: 0-100, adjust our estimate up by this amount */
126 
127 static uint32_t stackshot_initial_estimate;
128 static uint32_t stackshot_initial_estimate_adj;
129 static uint64_t stackshot_duration_prior_abs;   /* prior attempts, abs */
130 static unaligned_u64 * stackshot_duration_outer;
131 static uint64_t stackshot_microsecs;
132 
133 void * kernel_stackshot_buf   = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
134 int kernel_stackshot_buf_size = 0;
135 
136 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
137 
138 __private_extern__ void stackshot_init( void );
139 static boolean_t memory_iszero(void *addr, size_t size);
140 uint32_t                get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj);
141 kern_return_t           kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config,
142     size_t stackshot_config_size, boolean_t stackshot_from_user);
143 kern_return_t           do_stackshot(void *);
144 void                    kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags, kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask);
145 boolean_t               stackshot_thread_is_idle_worker_unsafe(thread_t thread);
146 static int              kdp_stackshot_kcdata_format(int pid, uint64_t trace_flags, uint32_t *pBytesTraced, uint32_t *pBytesUncompressed);
147 uint32_t                kdp_stack_snapshot_bytes_traced(void);
148 uint32_t                kdp_stack_snapshot_bytes_uncompressed(void);
149 static void             kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
150 static boolean_t        kdp_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
151 static int              kdp_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
152 static boolean_t        kdp_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
153 static uint64_t         proc_was_throttled_from_task(task_t task);
154 static void             stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t * waitinfo);
155 static int              stackshot_thread_has_valid_waitinfo(thread_t thread);
156 static void             stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo);
157 static int              stackshot_thread_has_valid_turnstileinfo(thread_t thread);
158 
159 #if CONFIG_COALITIONS
160 static void             stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
161 static void             stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
162 #endif /* CONFIG_COALITIONS */
163 
164 #if CONFIG_THREAD_GROUPS
165 static void             stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
166 static void             stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
167 #endif /* CONFIG_THREAD_GROUPS */
168 
169 extern uint32_t         workqueue_get_pwq_state_kdp(void *proc);
170 
171 struct proc;
172 extern int              proc_pid(struct proc *p);
173 extern uint64_t         proc_uniqueid(void *p);
174 extern uint64_t         proc_was_throttled(void *p);
175 extern uint64_t         proc_did_throttle(void *p);
176 extern int              proc_exiting(void *p);
177 extern int              proc_in_teardown(void *p);
178 static uint64_t         proc_did_throttle_from_task(task_t task);
179 extern void             proc_name_kdp(struct proc *p, char * buf, int size);
180 extern int              proc_threadname_kdp(void * uth, char * buf, size_t size);
181 extern void             proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
182 extern void             proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
183 extern boolean_t        proc_binary_uuid_kdp(task_t task, uuid_t uuid);
184 extern int              memorystatus_get_pressure_status_kdp(void);
185 extern void             memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
186 
187 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
188 extern void bcopy_phys(addr64_t, addr64_t, vm_size_t);
189 
190 #if CONFIG_TELEMETRY
191 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
192 #endif /* CONFIG_TELEMETRY */
193 
194 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
195 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
196 
197 static size_t _stackshot_get_page_size(vm_map_t, size_t *page_mask_out);
198 static size_t stackshot_plh_est_size(void);
199 
200 /*
201  * Validates that the given address for a word is both a valid page and has
202  * default caching attributes for the current map.
203  */
204 bool machine_trace_thread_validate_kva(vm_offset_t);
205 /*
206  * Validates a region that stackshot will potentially inspect.
207  */
208 static bool _stackshot_validate_kva(vm_offset_t, size_t);
209 /*
210  * Must be called whenever stackshot is re-driven.
211  */
212 static void _stackshot_validation_reset(void);
213 
214 #define KDP_FAULT_RESULT_PAGED_OUT   0x1 /* some data was unable to be retrieved */
215 #define KDP_FAULT_RESULT_TRIED_FAULT 0x2 /* tried to fault in data */
216 #define KDP_FAULT_RESULT_FAULTED_IN  0x4 /* successfully faulted in data */
217 
218 /*
219  * Looks up the physical translation for the given address in the target map, attempting
220  * to fault data in if requested and it is not resident. Populates thread_trace_flags if requested
221  * as well.
222  */
223 vm_offset_t kdp_find_phys(vm_map_t map, vm_offset_t target_addr, boolean_t try_fault, uint32_t *kdp_fault_results);
224 
225 static long _stackshot_strlen(const char *s, size_t maxlen);    /* -1 if no \0 before fault or maxlen */
226 static size_t _stackshot_strlcpy(char *dst, const char *src, size_t maxlen);
227 void stackshot_memcpy(void *dst, const void *src, size_t len);
228 
229 #define MAX_FRAMES 1000
230 #define MAX_LOADINFOS 500
231 #define TASK_IMP_WALK_LIMIT 20
232 
233 typedef struct thread_snapshot *thread_snapshot_t;
234 typedef struct task_snapshot *task_snapshot_t;
235 
236 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
237 extern kdp_send_t    kdp_en_send_pkt;
238 #endif
239 
240 /*
241  * Stackshot locking and other defines.
242  */
243 static LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
244 static LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
245 
246 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
247 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
248 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
249 
250 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
251 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
252 
253 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
254 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
255 
256 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
257 
258 /*
259  * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
260  * for non-panic stackshots where faulting is requested.
261  */
262 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
263 
264 #define STACKSHOT_SUPP_SIZE (16 * 1024) /* Minimum stackshot size */
265 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
266 
267 #ifndef ROUNDUP
268 #define ROUNDUP(x, y)            ((((x)+(y)-1)/(y))*(y))
269 #endif
270 
271 #define STACKSHOT_QUEUE_LABEL_MAXSIZE  64
272 
273 /*
274  * Initialize the mutex governing access to the stack snapshot subsystem
275  * and other stackshot related bits.
276  */
277 __private_extern__ void
stackshot_init(void)278 stackshot_init( void )
279 {
280 	mach_timebase_info_data_t timebase;
281 
282 	clock_timebase_info(&timebase);
283 	fault_stats.sfs_system_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
284 
285 	max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
286 
287 	PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
288 }
289 
290 /*
291  * Method for grabbing timer values safely, in the sense that no infinite loop will occur
292  * Certain flavors of the timer_grab function, which would seem to be the thing to use,
293  * can loop infinitely if called while the timer is in the process of being updated.
294  * Unfortunately, it is (rarely) possible to get inconsistent top and bottom halves of
295  * the timer using this method. This seems insoluble, since stackshot runs in a context
296  * where the timer might be half-updated, and has no way of yielding control just long
297  * enough to finish the update.
298  */
299 
300 static uint64_t
safe_grab_timer_value(struct timer * t)301 safe_grab_timer_value(struct timer *t)
302 {
303 #if   defined(__LP64__)
304 	return t->all_bits;
305 #else
306 	uint64_t time = t->high_bits; /* endian independent grab */
307 	time = (time << 32) | t->low_bits;
308 	return time;
309 #endif
310 }
311 
312 /*
313  * Called with interrupts disabled after stackshot context has been
314  * initialized. Updates stack_snapshot_ret.
315  */
316 static kern_return_t
stackshot_trap()317 stackshot_trap()
318 {
319 	kern_return_t   rv;
320 
321 #if defined(__x86_64__)
322 	/*
323 	 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
324 	 * operation, it's essential to apply mutual exclusion to the other when one
325 	 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
326 	 * capture CPUs.
327 	 *
328 	 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
329 	 * allowed, so we check to ensure there there is no rendezvous in progress before
330 	 * trying to grab the lock (if there is, a deadlock will occur when we try to
331 	 * grab the lock).  This is accomplished by setting cpu_rendezvous_in_progress to
332 	 * TRUE in the mp rendezvous action function.  If stackshot_trap() is called by
333 	 * a subordinate of the call chain within the mp rendezvous action, this flag will
334 	 * be set and can be used to detect the inevitable deadlock that would occur
335 	 * if this thread tried to grab the rendezvous lock.
336 	 */
337 
338 	if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
339 		panic("Calling stackshot from a rendezvous is not allowed!");
340 	}
341 
342 	mp_rendezvous_lock();
343 #endif
344 
345 	rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0);
346 
347 #if defined(__x86_64__)
348 	mp_rendezvous_unlock();
349 #endif
350 	return rv;
351 }
352 
353 
354 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)355 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
356 {
357 	kern_return_t error = KERN_SUCCESS;
358 	boolean_t istate;
359 
360 #if DEVELOPMENT || DEBUG
361 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
362 		error = KERN_NOT_SUPPORTED;
363 		goto out;
364 	}
365 #endif
366 	if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
367 		return KERN_INVALID_ARGUMENT;
368 	}
369 
370 	/* cap in individual stackshot to max_tracebuf_size */
371 	if (size > max_tracebuf_size) {
372 		size = max_tracebuf_size;
373 	}
374 
375 	/* Serialize tracing */
376 	if (flags & STACKSHOT_TRYLOCK) {
377 		if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
378 			return KERN_LOCK_OWNED;
379 		}
380 	} else {
381 		STACKSHOT_SUBSYS_LOCK();
382 	}
383 
384 	struct kcdata_descriptor kcdata;
385 	uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ?
386 	    KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT : KCDATA_BUFFER_BEGIN_STACKSHOT;
387 
388 	error = kcdata_memory_static_init(&kcdata, (mach_vm_address_t)buf, hdr_tag, size,
389 	    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
390 	if (error) {
391 		goto out;
392 	}
393 
394 	stackshot_initial_estimate = 0;
395 	stackshot_duration_prior_abs = 0;
396 	stackshot_duration_outer = NULL;
397 
398 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
399 	    flags, size, pid, delta_since_timestamp);
400 
401 	istate = ml_set_interrupts_enabled(FALSE);
402 	uint64_t time_start      = mach_absolute_time();
403 
404 	/* Emit a SOCD tracepoint that we are initiating a stackshot */
405 	SOCD_TRACE_XNU_START(STACKSHOT);
406 
407 	/* Preload trace parameters*/
408 	kdp_snapshot_preflight(pid, buf, size, flags, &kcdata,
409 	    delta_since_timestamp, pagetable_mask);
410 
411 	/*
412 	 * Trap to the debugger to obtain a coherent stack snapshot; this populates
413 	 * the trace buffer
414 	 */
415 	error = stackshot_trap();
416 
417 	uint64_t time_end               = mach_absolute_time();
418 
419 	/* Emit a SOCD tracepoint that we have completed the stackshot */
420 	SOCD_TRACE_XNU_END(STACKSHOT);
421 
422 	ml_set_interrupts_enabled(istate);
423 
424 	if (stackshot_duration_outer) {
425 		*stackshot_duration_outer = time_end - time_start;
426 	}
427 	*bytes_traced = kdp_stack_snapshot_bytes_traced();
428 
429 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
430 	    error, (time_end - time_start), size, *bytes_traced);
431 out:
432 	stackshot_kcdata_p = NULL;
433 	STACKSHOT_SUBSYS_UNLOCK();
434 	return error;
435 }
436 
437 #if CONFIG_TELEMETRY
438 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)439 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
440 {
441 	int error = KERN_SUCCESS;
442 	uint32_t bytes_traced = 0;
443 
444 	*retval = -1;
445 
446 	/*
447 	 * Control related operations
448 	 */
449 	if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
450 		telemetry_global_ctl(1);
451 		*retval = 0;
452 		goto exit;
453 	} else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
454 		telemetry_global_ctl(0);
455 		*retval = 0;
456 		goto exit;
457 	}
458 
459 	/*
460 	 * Data related operations
461 	 */
462 	*retval = -1;
463 
464 	if ((((void*)tracebuf) == NULL) || (tracebuf_size == 0)) {
465 		error = KERN_INVALID_ARGUMENT;
466 		goto exit;
467 	}
468 
469 	STACKSHOT_SUBSYS_LOCK();
470 
471 	if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
472 		if (tracebuf_size > max_tracebuf_size) {
473 			error = KERN_INVALID_ARGUMENT;
474 			goto unlock_exit;
475 		}
476 
477 		bytes_traced = tracebuf_size;
478 		error = telemetry_gather(tracebuf, &bytes_traced,
479 		    (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? true : false);
480 		*retval = (int)bytes_traced;
481 		goto unlock_exit;
482 	}
483 
484 unlock_exit:
485 	STACKSHOT_SUBSYS_UNLOCK();
486 exit:
487 	return error;
488 }
489 #endif /* CONFIG_TELEMETRY */
490 
491 /*
492  * Return the estimated size of a stackshot based on the
493  * number of currently running threads and tasks.
494  *
495  * adj is an adjustment in units of percentage
496  *
497  * This function is mostly unhinged from reality; struct thread_snapshot and
498  * struct task_stackshot are legacy, much larger versions of the structures we
499  * actually use, and there's no accounting for how we actually generate
500  * task & thread information.  rdar://78880038 intends to replace this all.
501  */
502 uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj)503 get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj)
504 {
505 	vm_size_t thread_total;
506 	vm_size_t task_total;
507 	uint64_t size;
508 	uint32_t estimated_size;
509 	size_t est_thread_size = sizeof(struct thread_snapshot);
510 	size_t est_task_size = sizeof(struct task_snapshot) + TASK_UUID_AVG_SIZE;
511 
512 	adj = MIN(adj, 100u);   /* no more than double our estimate */
513 
514 #if STACKSHOT_COLLECTS_LATENCY_INFO
515 	if (collect_latency_info) {
516 		est_thread_size += sizeof(struct stackshot_latency_thread);
517 		est_task_size += sizeof(struct stackshot_latency_task);
518 	}
519 #endif
520 
521 	thread_total = (threads_count * est_thread_size);
522 	task_total = (tasks_count  * est_task_size);
523 
524 	size = thread_total + task_total + STACKSHOT_SUPP_SIZE;                 /* estimate */
525 	size += (size * adj) / 100;                                                                     /* add adj */
526 	size = MAX(size, prev_size_hint);                                                               /* allow hint to increase */
527 	size += stackshot_plh_est_size(); /* add space for the port label hash */
528 	size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK));             /* avoid overflow */
529 	estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
530 
531 	return estimated_size;
532 }
533 
534 /*
535  * stackshot_remap_buffer:	Utility function to remap bytes_traced bytes starting at stackshotbuf
536  *				into the current task's user space and subsequently copy out the address
537  *				at which the buffer has been mapped in user space to out_buffer_addr.
538  *
539  * Inputs:			stackshotbuf - pointer to the original buffer in the kernel's address space
540  *				bytes_traced - length of the buffer to remap starting from stackshotbuf
541  *				out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
542  *				out_size_addr - pointer to be filled in with the size of the buffer
543  *
544  * Outputs:			ENOSPC if there is not enough free space in the task's address space to remap the buffer
545  *				EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
546  *				an error from copyout
547  */
548 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)549 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
550 {
551 	int                     error = 0;
552 	mach_vm_offset_t        stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
553 	vm_prot_t               cur_prot, max_prot;
554 
555 	error = mach_vm_remap_kernel(get_task_map(current_task()), &stackshotbuf_user_addr, bytes_traced, 0,
556 	    VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE, &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
557 	/*
558 	 * If the call to mach_vm_remap fails, we return the appropriate converted error
559 	 */
560 	if (error == KERN_SUCCESS) {
561 		/*
562 		 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
563 		 * we just made in the task's user space.
564 		 */
565 		error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
566 		if (error != KERN_SUCCESS) {
567 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
568 			return error;
569 		}
570 		error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
571 		if (error != KERN_SUCCESS) {
572 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
573 			return error;
574 		}
575 	}
576 	return error;
577 }
578 
579 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)580 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
581 {
582 	int error = 0;
583 	boolean_t prev_interrupt_state;
584 	uint32_t bytes_traced = 0;
585 	uint32_t stackshot_estimate = 0;
586 	uint32_t stackshotbuf_size = 0;
587 	void * stackshotbuf = NULL;
588 	kcdata_descriptor_t kcdata_p = NULL;
589 
590 	void * buf_to_free = NULL;
591 	int size_to_free = 0;
592 	bool is_traced = false;    /* has FUNC_START tracepoint fired? */
593 	uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
594 
595 	/* Parsed arguments */
596 	uint64_t                out_buffer_addr;
597 	uint64_t                out_size_addr;
598 	int                     pid = -1;
599 	uint64_t                flags;
600 	uint64_t                since_timestamp;
601 	uint32_t                size_hint = 0;
602 	uint32_t                pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
603 
604 	if (stackshot_config == NULL) {
605 		return KERN_INVALID_ARGUMENT;
606 	}
607 #if DEVELOPMENT || DEBUG
608 	/* TBD: ask stackshot clients to avoid issuing stackshots in this
609 	 * configuration in lieu of the kernel feature override.
610 	 */
611 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
612 		return KERN_NOT_SUPPORTED;
613 	}
614 #endif
615 
616 	switch (stackshot_config_version) {
617 	case STACKSHOT_CONFIG_TYPE:
618 		if (stackshot_config_size != sizeof(stackshot_config_t)) {
619 			return KERN_INVALID_ARGUMENT;
620 		}
621 		stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
622 		out_buffer_addr = config->sc_out_buffer_addr;
623 		out_size_addr = config->sc_out_size_addr;
624 		pid = config->sc_pid;
625 		flags = config->sc_flags;
626 		since_timestamp = config->sc_delta_timestamp;
627 		if (config->sc_size <= max_tracebuf_size) {
628 			size_hint = config->sc_size;
629 		}
630 		/*
631 		 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
632 		 * dump every level if the pagetable_mask is not set
633 		 */
634 		if (flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
635 			pagetable_mask = config->sc_pagetable_mask;
636 		}
637 		break;
638 	default:
639 		return KERN_NOT_SUPPORTED;
640 	}
641 
642 	/*
643 	 * Currently saving a kernel buffer and trylock are only supported from the
644 	 * internal/KEXT API.
645 	 */
646 	if (stackshot_from_user) {
647 		if (flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
648 			return KERN_NO_ACCESS;
649 		}
650 #if !DEVELOPMENT && !DEBUG
651 		if (flags & (STACKSHOT_DO_COMPRESS)) {
652 			return KERN_NO_ACCESS;
653 		}
654 #endif
655 	} else {
656 		if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
657 			return KERN_NOT_SUPPORTED;
658 		}
659 	}
660 
661 	if (!((flags & STACKSHOT_KCDATA_FORMAT) || (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
662 		return KERN_NOT_SUPPORTED;
663 	}
664 
665 	/* Compresssed delta stackshots or page dumps are not yet supported */
666 	if (((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (flags & STACKSHOT_PAGE_TABLES))
667 	    && (flags & STACKSHOT_DO_COMPRESS)) {
668 		return KERN_NOT_SUPPORTED;
669 	}
670 
671 	/*
672 	 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
673 	 */
674 	if ((!out_buffer_addr || !out_size_addr) && !(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
675 		return KERN_INVALID_ARGUMENT;
676 	}
677 
678 	if (since_timestamp != 0 && ((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
679 		return KERN_INVALID_ARGUMENT;
680 	}
681 
682 #if MONOTONIC
683 	if (!mt_core_supported) {
684 		flags &= ~STACKSHOT_INSTRS_CYCLES;
685 	}
686 #else /* MONOTONIC */
687 	flags &= ~STACKSHOT_INSTRS_CYCLES;
688 #endif /* !MONOTONIC */
689 
690 	STACKSHOT_SUBSYS_LOCK();
691 
692 	if (flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
693 		/*
694 		 * Don't overwrite an existing stackshot
695 		 */
696 		if (kernel_stackshot_buf != NULL) {
697 			error = KERN_MEMORY_PRESENT;
698 			goto error_exit;
699 		}
700 	} else if (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
701 		if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
702 			error = KERN_NOT_IN_SET;
703 			goto error_exit;
704 		}
705 		error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
706 		    out_buffer_addr, out_size_addr);
707 		/*
708 		 * If we successfully remapped the buffer into the user's address space, we
709 		 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
710 		 * and then clear the kernel stackshot pointer and associated size.
711 		 */
712 		if (error == KERN_SUCCESS) {
713 			buf_to_free = kernel_stackshot_buf;
714 			size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
715 			kernel_stackshot_buf = NULL;
716 			kernel_stackshot_buf_size = 0;
717 		}
718 
719 		goto error_exit;
720 	}
721 
722 	if (flags & STACKSHOT_GET_BOOT_PROFILE) {
723 		void *bootprofile = NULL;
724 		uint32_t len = 0;
725 #if CONFIG_TELEMETRY
726 		bootprofile_get(&bootprofile, &len);
727 #endif
728 		if (!bootprofile || !len) {
729 			error = KERN_NOT_IN_SET;
730 			goto error_exit;
731 		}
732 		error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
733 		goto error_exit;
734 	}
735 
736 	stackshot_duration_prior_abs = 0;
737 	stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
738 	stackshotbuf_size = stackshot_estimate =
739 	    get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj);
740 	stackshot_initial_estimate = stackshot_estimate;
741 
742 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
743 	    flags, stackshotbuf_size, pid, since_timestamp);
744 	is_traced = true;
745 
746 	for (; stackshotbuf_size <= max_tracebuf_size; stackshotbuf_size <<= 1) {
747 		if (kmem_alloc(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size,
748 		    KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
749 			error = KERN_RESOURCE_SHORTAGE;
750 			goto error_exit;
751 		}
752 
753 
754 		uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
755 		    : (flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
756 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
757 		kcdata_p = kcdata_memory_alloc_init((mach_vm_address_t)stackshotbuf, hdr_tag, stackshotbuf_size,
758 		    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
759 
760 		stackshot_duration_outer = NULL;
761 
762 		/* if compression was requested, allocate the extra zlib scratch area */
763 		if (flags & STACKSHOT_DO_COMPRESS) {
764 			hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
765 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
766 			error = kcdata_init_compress(kcdata_p, hdr_tag, stackshot_memcpy, KCDCT_ZLIB);
767 			if (error != KERN_SUCCESS) {
768 				os_log(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
769 				    (int) error);
770 				goto error_exit;
771 			}
772 		}
773 
774 		/*
775 		 * Disable interrupts and save the current interrupt state.
776 		 */
777 		prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
778 		uint64_t time_start      = mach_absolute_time();
779 
780 		/* Emit a SOCD tracepoint that we are initiating a stackshot */
781 		SOCD_TRACE_XNU_START(STACKSHOT);
782 
783 		/*
784 		 * Load stackshot parameters.
785 		 */
786 		kdp_snapshot_preflight(pid, stackshotbuf, stackshotbuf_size, flags, kcdata_p, since_timestamp,
787 		    pagetable_mask);
788 
789 		error = stackshot_trap();
790 
791 		/* record the duration that interupts were disabled */
792 		uint64_t time_end = mach_absolute_time();
793 
794 		/* Emit a SOCD tracepoint that we have completed the stackshot */
795 		SOCD_TRACE_XNU_END(STACKSHOT);
796 		ml_set_interrupts_enabled(prev_interrupt_state);
797 
798 		if (stackshot_duration_outer) {
799 			*stackshot_duration_outer = time_end - time_start;
800 		}
801 		tot_interrupts_off_abs += time_end - time_start;
802 
803 		if (error != KERN_SUCCESS) {
804 			if (kcdata_p != NULL) {
805 				kcdata_memory_destroy(kcdata_p);
806 				kcdata_p = NULL;
807 				stackshot_kcdata_p = NULL;
808 			}
809 			kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
810 			stackshotbuf = NULL;
811 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
812 				/*
813 				 * If we didn't allocate a big enough buffer, deallocate and try again.
814 				 */
815 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
816 				    time_end - time_start, stackshot_estimate, stackshotbuf_size);
817 				stackshot_duration_prior_abs += (time_end - time_start);
818 				continue;
819 			} else {
820 				goto error_exit;
821 			}
822 		}
823 
824 		bytes_traced = kdp_stack_snapshot_bytes_traced();
825 		if (bytes_traced <= 0) {
826 			error = KERN_ABORTED;
827 			goto error_exit;
828 		}
829 
830 		assert(bytes_traced <= stackshotbuf_size);
831 		if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
832 			error = stackshot_remap_buffer(stackshotbuf, bytes_traced, out_buffer_addr, out_size_addr);
833 			goto error_exit;
834 		}
835 
836 		/*
837 		 * Save the stackshot in the kernel buffer.
838 		 */
839 		kernel_stackshot_buf = stackshotbuf;
840 		kernel_stackshot_buf_size =  bytes_traced;
841 		/*
842 		 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
843 		 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
844 		 * update size_to_free for kmem_free accordingly.
845 		 */
846 		size_to_free = stackshotbuf_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
847 
848 		assert(size_to_free >= 0);
849 
850 		if (size_to_free != 0) {
851 			buf_to_free = (void *)((uint64_t)stackshotbuf + stackshotbuf_size - size_to_free);
852 		}
853 
854 		stackshotbuf = NULL;
855 		stackshotbuf_size = 0;
856 		goto error_exit;
857 	}
858 
859 	if (stackshotbuf_size > max_tracebuf_size) {
860 		error = KERN_RESOURCE_SHORTAGE;
861 	}
862 
863 error_exit:
864 	if (is_traced) {
865 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
866 		    error, tot_interrupts_off_abs, stackshotbuf_size, bytes_traced);
867 	}
868 	if (kcdata_p != NULL) {
869 		kcdata_memory_destroy(kcdata_p);
870 		kcdata_p = NULL;
871 		stackshot_kcdata_p = NULL;
872 	}
873 
874 	if (stackshotbuf != NULL) {
875 		kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
876 	}
877 	if (buf_to_free != NULL) {
878 		kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
879 	}
880 	STACKSHOT_SUBSYS_UNLOCK();
881 	return error;
882 }
883 
884 /*
885  * Cache stack snapshot parameters in preparation for a trace.
886  */
887 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)888 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
889     kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
890 {
891 	uint64_t microsecs = 0, secs = 0;
892 	clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)&microsecs);
893 
894 	stackshot_microsecs = microsecs + (secs * USEC_PER_SEC);
895 	stack_snapshot_pid = pid;
896 	stack_snapshot_buf = tracebuf;
897 	stack_snapshot_bufsize = tracebuf_size;
898 	stack_snapshot_flags = flags;
899 	stack_snapshot_delta_since_timestamp = since_timestamp;
900 	stack_snapshot_pagetable_mask = pagetable_mask;
901 
902 	panic_stackshot = ((flags & STACKSHOT_FROM_PANIC) != 0);
903 
904 	assert(data_p != NULL);
905 	assert(stackshot_kcdata_p == NULL);
906 	stackshot_kcdata_p = data_p;
907 
908 	stack_snapshot_bytes_traced = 0;
909 	stack_snapshot_bytes_uncompressed = 0;
910 }
911 
912 void
panic_stackshot_reset_state()913 panic_stackshot_reset_state()
914 {
915 	stackshot_kcdata_p = NULL;
916 }
917 
918 boolean_t
stackshot_active()919 stackshot_active()
920 {
921 	return stackshot_kcdata_p != NULL;
922 }
923 
924 uint32_t
kdp_stack_snapshot_bytes_traced(void)925 kdp_stack_snapshot_bytes_traced(void)
926 {
927 	return stack_snapshot_bytes_traced;
928 }
929 
930 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)931 kdp_stack_snapshot_bytes_uncompressed(void)
932 {
933 	return stack_snapshot_bytes_uncompressed;
934 }
935 
936 static boolean_t
memory_iszero(void * addr,size_t size)937 memory_iszero(void *addr, size_t size)
938 {
939 	char *data = (char *)addr;
940 	for (size_t i = 0; i < size; i++) {
941 		if (data[i] != 0) {
942 			return FALSE;
943 		}
944 	}
945 	return TRUE;
946 }
947 
948 /*
949  * Keep a simple cache of the most recent validation done at a page granularity
950  * to avoid the expensive software KVA-to-phys translation in the VM.
951  */
952 
953 struct _stackshot_validation_state {
954 	vm_offset_t last_valid_page_kva;
955 	size_t last_valid_size;
956 } g_validation_state;
957 
958 static void
_stackshot_validation_reset(void)959 _stackshot_validation_reset(void)
960 {
961 	g_validation_state.last_valid_page_kva = -1;
962 	g_validation_state.last_valid_size = 0;
963 }
964 
965 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)966 _stackshot_validate_kva(vm_offset_t addr, size_t size)
967 {
968 	vm_offset_t page_addr = atop_kernel(addr);
969 	if (g_validation_state.last_valid_page_kva == page_addr &&
970 	    g_validation_state.last_valid_size <= size) {
971 		return true;
972 	}
973 
974 	if (ml_validate_nofault(addr, size)) {
975 		g_validation_state.last_valid_page_kva = page_addr;
976 		g_validation_state.last_valid_size = size;
977 		return true;
978 	}
979 	return false;
980 }
981 
982 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
983 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
984 /*
985  * Use of the kcd_exit_on_error(action) macro requires a local
986  * 'kern_return_t error' variable and 'error_exit' label.
987  */
988 #define kcd_exit_on_error(action)                      \
989 	do {                                               \
990 	        if (KERN_SUCCESS != (error = (action))) {      \
991 	                if (error == KERN_RESOURCE_SHORTAGE) {     \
992 	                        error = KERN_INSUFFICIENT_BUFFER_SIZE; \
993 	                }                                          \
994 	                goto error_exit;                           \
995 	        }                                              \
996 	} while (0); /* end kcd_exit_on_error */
997 
998 
999 /*
1000  * For port labels, we have a small hash table we use to track the
1001  * struct ipc_service_port_label pointers we see along the way.
1002  * This structure encapsulates the global state.
1003  *
1004  * The hash table is insert-only, similar to "intern"ing strings.  It's
1005  * only used an manipulated in during the stackshot collection.  We use
1006  * seperate chaining, with the hash elements and chains being int16_ts
1007  * indexes into the parallel arrays, with -1 ending the chain.  Array indices are
1008  * allocated using a bump allocator.
1009  *
1010  * The parallel arrays contain:
1011  *      - plh_array[idx]	the pointer entered
1012  *      - plh_chains[idx]	the hash chain
1013  *      - plh_gen[idx]		the last 'generation #' seen
1014  *
1015  * Generation IDs are used to track entries looked up in the current
1016  * task; 0 is never used, and the plh_gen array is cleared to 0 on
1017  * rollover.
1018  *
1019  * The portlabel_ids we report externally are just the index in the array,
1020  * plus 1 to avoid 0 as a value.  0 is NONE, -1 is UNKNOWN (e.g. there is
1021  * one, but we ran out of space)
1022  */
1023 struct port_label_hash {
1024 	uint16_t                plh_size;       /* size of allocations; 0 disables tracking */
1025 	uint16_t                plh_count;      /* count of used entries in plh_array */
1026 	struct ipc_service_port_label **plh_array; /* _size allocated, _count used */
1027 	int16_t                *plh_chains;    /* _size allocated */
1028 	uint8_t                *plh_gen;       /* last 'gen #' seen in */
1029 	int16_t                *plh_hash;      /* (1 << STACKSHOT_PLH_SHIFT) entry hash table: hash(ptr) -> array index */
1030 	int16_t                 plh_curgen_min; /* min idx seen for this gen */
1031 	int16_t                 plh_curgen_max; /* max idx seen for this gen */
1032 	uint8_t                 plh_curgen;     /* current gen */
1033 #if DEVELOPMENT || DEBUG
1034 	/* statistics */
1035 	uint32_t                plh_lookups;    /* # lookups or inserts */
1036 	uint32_t                plh_found;
1037 	uint32_t                plh_found_depth;
1038 	uint32_t                plh_insert;
1039 	uint32_t                plh_insert_depth;
1040 	uint32_t                plh_bad;
1041 	uint32_t                plh_bad_depth;
1042 	uint32_t                plh_lookup_send;
1043 	uint32_t                plh_lookup_receive;
1044 #define PLH_STAT_OP(...)    (void)(__VA_ARGS__)
1045 #else /* DEVELOPMENT || DEBUG */
1046 #define PLH_STAT_OP(...)    (void)(0)
1047 #endif /* DEVELOPMENT || DEBUG */
1048 } port_label_hash;
1049 
1050 #define STACKSHOT_PLH_SHIFT    7
1051 #define STACKSHOT_PLH_SIZE_MAX ((kdp_ipc_have_splabel)? 1024 : 0)
1052 size_t stackshot_port_label_size = (2 * (1u << STACKSHOT_PLH_SHIFT));
1053 #define STASKSHOT_PLH_SIZE(x) MIN((x), STACKSHOT_PLH_SIZE_MAX)
1054 
1055 static size_t
stackshot_plh_est_size(void)1056 stackshot_plh_est_size(void)
1057 {
1058 	struct port_label_hash *plh = &port_label_hash;
1059 	size_t size = STASKSHOT_PLH_SIZE(stackshot_port_label_size);
1060 
1061 	if (size == 0) {
1062 		return 0;
1063 	}
1064 #define SIZE_EST(x) ROUNDUP((x), sizeof (uintptr_t))
1065 	return SIZE_EST(size * sizeof(*plh->plh_array)) +
1066 	       SIZE_EST(size * sizeof(*plh->plh_chains)) +
1067 	       SIZE_EST(size * sizeof(*plh->plh_gen)) +
1068 	       SIZE_EST((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh->plh_hash));
1069 #undef SIZE_EST
1070 }
1071 
1072 static void
stackshot_plh_reset(void)1073 stackshot_plh_reset(void)
1074 {
1075 	port_label_hash = (struct port_label_hash){.plh_size = 0};  /* structure assignment */
1076 }
1077 
1078 static void
stackshot_plh_setup(kcdata_descriptor_t data)1079 stackshot_plh_setup(kcdata_descriptor_t data)
1080 {
1081 	struct port_label_hash plh = {
1082 		.plh_size = STASKSHOT_PLH_SIZE(stackshot_port_label_size),
1083 		.plh_count = 0,
1084 		.plh_curgen = 1,
1085 		.plh_curgen_min = STACKSHOT_PLH_SIZE_MAX,
1086 		.plh_curgen_max = 0,
1087 	};
1088 	stackshot_plh_reset();
1089 	size_t size = plh.plh_size;
1090 	if (size == 0) {
1091 		return;
1092 	}
1093 	plh.plh_array = kcdata_endalloc(data, size * sizeof(*plh.plh_array));
1094 	plh.plh_chains = kcdata_endalloc(data, size * sizeof(*plh.plh_chains));
1095 	plh.plh_gen = kcdata_endalloc(data, size * sizeof(*plh.plh_gen));
1096 	plh.plh_hash = kcdata_endalloc(data, (1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh.plh_hash));
1097 	if (plh.plh_array == NULL || plh.plh_chains == NULL || plh.plh_gen == NULL || plh.plh_hash == NULL) {
1098 		PLH_STAT_OP(port_label_hash.plh_bad++);
1099 		return;
1100 	}
1101 	for (int x = 0; x < size; x++) {
1102 		plh.plh_array[x] = NULL;
1103 		plh.plh_chains[x] = -1;
1104 		plh.plh_gen[x] = 0;
1105 	}
1106 	for (int x = 0; x < (1ul << STACKSHOT_PLH_SHIFT); x++) {
1107 		plh.plh_hash[x] = -1;
1108 	}
1109 	port_label_hash = plh;  /* structure assignment */
1110 }
1111 
1112 static int16_t
stackshot_plh_hash(struct ipc_service_port_label * ispl)1113 stackshot_plh_hash(struct ipc_service_port_label *ispl)
1114 {
1115 	uintptr_t ptr = (uintptr_t)ispl;
1116 	static_assert(STACKSHOT_PLH_SHIFT < 16, "plh_hash must fit in 15 bits");
1117 #define PLH_HASH_STEP(ptr, x) \
1118 	    ((((x) * STACKSHOT_PLH_SHIFT) < (sizeof(ispl) * CHAR_BIT)) ? ((ptr) >> ((x) * STACKSHOT_PLH_SHIFT)) : 0)
1119 	ptr ^= PLH_HASH_STEP(ptr, 16);
1120 	ptr ^= PLH_HASH_STEP(ptr, 8);
1121 	ptr ^= PLH_HASH_STEP(ptr, 4);
1122 	ptr ^= PLH_HASH_STEP(ptr, 2);
1123 	ptr ^= PLH_HASH_STEP(ptr, 1);
1124 #undef PLH_HASH_STEP
1125 	return (int16_t)(ptr & ((1ul << STACKSHOT_PLH_SHIFT) - 1));
1126 }
1127 
1128 enum stackshot_plh_lookup_type {
1129 	STACKSHOT_PLH_LOOKUP_UNKNOWN,
1130 	STACKSHOT_PLH_LOOKUP_SEND,
1131 	STACKSHOT_PLH_LOOKUP_RECEIVE,
1132 };
1133 
1134 static void
stackshot_plh_resetgen(void)1135 stackshot_plh_resetgen(void)
1136 {
1137 	struct port_label_hash *plh = &port_label_hash;
1138 	if (plh->plh_curgen_min == STACKSHOT_PLH_SIZE_MAX && plh->plh_curgen_max == 0) {
1139 		return;  // no lookups, nothing using the current generation
1140 	}
1141 	plh->plh_curgen++;
1142 	plh->plh_curgen_min = STACKSHOT_PLH_SIZE_MAX;
1143 	plh->plh_curgen_max = 0;
1144 	if (plh->plh_curgen == 0) { // wrapped, zero the array and increment the generation
1145 		for (int x = 0; x < plh->plh_size; x++) {
1146 			plh->plh_gen[x] = 0;
1147 		}
1148 		plh->plh_curgen = 1;
1149 	}
1150 }
1151 
1152 static int16_t
stackshot_plh_lookup(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)1153 stackshot_plh_lookup(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
1154 {
1155 	struct port_label_hash *plh = &port_label_hash;
1156 	int depth;
1157 	int16_t cur;
1158 	if (ispl == NULL) {
1159 		return STACKSHOT_PORTLABELID_NONE;
1160 	}
1161 	switch (type) {
1162 	case STACKSHOT_PLH_LOOKUP_SEND:
1163 		PLH_STAT_OP(plh->plh_lookup_send++);
1164 		break;
1165 	case STACKSHOT_PLH_LOOKUP_RECEIVE:
1166 		PLH_STAT_OP(plh->plh_lookup_receive++);
1167 		break;
1168 	default:
1169 		break;
1170 	}
1171 	PLH_STAT_OP(plh->plh_lookups++);
1172 	if (plh->plh_size == 0) {
1173 		return STACKSHOT_PORTLABELID_MISSING;
1174 	}
1175 	int16_t hash = stackshot_plh_hash(ispl);
1176 	assert(hash >= 0 && hash < (1ul << STACKSHOT_PLH_SHIFT));
1177 	depth = 0;
1178 	for (cur = plh->plh_hash[hash]; cur >= 0; cur = plh->plh_chains[cur]) {
1179 		/* cur must be in-range, and chain depth can never be above our # allocated */
1180 		if (cur >= plh->plh_count || depth > plh->plh_count || depth > plh->plh_size) {
1181 			PLH_STAT_OP((plh->plh_bad++), (plh->plh_bad_depth += depth));
1182 			return STACKSHOT_PORTLABELID_MISSING;
1183 		}
1184 		assert(cur < plh->plh_count);
1185 		if (plh->plh_array[cur] == ispl) {
1186 			PLH_STAT_OP((plh->plh_found++), (plh->plh_found_depth += depth));
1187 			goto found;
1188 		}
1189 		depth++;
1190 	}
1191 	/* not found in hash table, so alloc and insert it */
1192 	if (cur != -1) {
1193 		PLH_STAT_OP((plh->plh_bad++), (plh->plh_bad_depth += depth));
1194 		return STACKSHOT_PORTLABELID_MISSING; /* bad end of chain */
1195 	}
1196 	PLH_STAT_OP((plh->plh_insert++), (plh->plh_insert_depth += depth));
1197 	if (plh->plh_count >= plh->plh_size) {
1198 		return STACKSHOT_PORTLABELID_MISSING; /* no space */
1199 	}
1200 	cur = plh->plh_count;
1201 	plh->plh_count++;
1202 	plh->plh_array[cur] = ispl;
1203 	plh->plh_chains[cur] = plh->plh_hash[hash];
1204 	plh->plh_hash[hash] = cur;
1205 found:
1206 	plh->plh_gen[cur] = plh->plh_curgen;
1207 	if (plh->plh_curgen_min > cur) {
1208 		plh->plh_curgen_min = cur;
1209 	}
1210 	if (plh->plh_curgen_max < cur) {
1211 		plh->plh_curgen_max = cur;
1212 	}
1213 	return cur + 1;   /* offset to avoid 0 */
1214 }
1215 
1216 // record any PLH referenced since the last stackshot_plh_resetgen() call
1217 static kern_return_t
kdp_stackshot_plh_record(void)1218 kdp_stackshot_plh_record(void)
1219 {
1220 	kern_return_t error = KERN_SUCCESS;
1221 	struct port_label_hash *plh = &port_label_hash;
1222 	uint16_t count = plh->plh_count;
1223 	uint8_t curgen = plh->plh_curgen;
1224 	int16_t curgen_min = plh->plh_curgen_min;
1225 	int16_t curgen_max = plh->plh_curgen_max;
1226 	if (curgen_min <= curgen_max && curgen_max < count &&
1227 	    count <= plh->plh_size && plh->plh_size <= STACKSHOT_PLH_SIZE_MAX) {
1228 		struct ipc_service_port_label **arr = plh->plh_array;
1229 		size_t ispl_size, max_namelen;
1230 		kdp_ipc_splabel_size(&ispl_size, &max_namelen);
1231 		for (int idx = curgen_min; idx <= curgen_max; idx++) {
1232 			struct ipc_service_port_label *ispl = arr[idx];
1233 			struct portlabel_info spl = {
1234 				.portlabel_id = (idx + 1),
1235 			};
1236 			const char *name = NULL;
1237 			long name_sz = 0;
1238 			if (plh->plh_gen[idx] != curgen) {
1239 				continue;
1240 			}
1241 			if (_stackshot_validate_kva((vm_offset_t)ispl, ispl_size)) {
1242 				kdp_ipc_fill_splabel(ispl, &spl, &name);
1243 			}
1244 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
1245 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
1246 			if (name != NULL && (name_sz = _stackshot_strlen(name, max_namelen)) > 0) {   /* validates the kva */
1247 				kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL_NAME, name_sz + 1, name));
1248 			} else {
1249 				spl.portlabel_flags |= STACKSHOT_PORTLABEL_READFAILED;
1250 			}
1251 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL, sizeof(spl), &spl));
1252 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
1253 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
1254 		}
1255 	}
1256 
1257 error_exit:
1258 	return error;
1259 }
1260 
1261 #if DEVELOPMENT || DEBUG
1262 static kern_return_t
kdp_stackshot_plh_stats(void)1263 kdp_stackshot_plh_stats(void)
1264 {
1265 	kern_return_t error = KERN_SUCCESS;
1266 	struct port_label_hash *plh = &port_label_hash;
1267 
1268 #define PLH_STAT(x) do { if (plh->x != 0) { \
1269 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, plh->x, "stackshot_" #x)); \
1270 } } while (0)
1271 	PLH_STAT(plh_size);
1272 	PLH_STAT(plh_lookups);
1273 	PLH_STAT(plh_found);
1274 	PLH_STAT(plh_found_depth);
1275 	PLH_STAT(plh_insert);
1276 	PLH_STAT(plh_insert_depth);
1277 	PLH_STAT(plh_bad);
1278 	PLH_STAT(plh_bad_depth);
1279 	PLH_STAT(plh_lookup_send);
1280 	PLH_STAT(plh_lookup_receive);
1281 #undef PLH_STAT
1282 
1283 error_exit:
1284 	return error;
1285 }
1286 #endif /* DEVELOPMENT || DEBUG */
1287 
1288 static uint64_t
kcdata_get_task_ss_flags(task_t task)1289 kcdata_get_task_ss_flags(task_t task)
1290 {
1291 	uint64_t ss_flags = 0;
1292 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
1293 
1294 	if (task_64bit_addr) {
1295 		ss_flags |= kUser64_p;
1296 	}
1297 	if (!task->active || task_is_a_corpse(task) || proc_exiting(task->bsd_info)) {
1298 		ss_flags |= kTerminatedSnapshot;
1299 	}
1300 	if (task->pidsuspended) {
1301 		ss_flags |= kPidSuspended;
1302 	}
1303 	if (task->frozen) {
1304 		ss_flags |= kFrozen;
1305 	}
1306 	if (task->effective_policy.tep_darwinbg == 1) {
1307 		ss_flags |= kTaskDarwinBG;
1308 	}
1309 	if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
1310 		ss_flags |= kTaskIsForeground;
1311 	}
1312 	if (task->requested_policy.trp_boosted == 1) {
1313 		ss_flags |= kTaskIsBoosted;
1314 	}
1315 	if (task->effective_policy.tep_sup_active == 1) {
1316 		ss_flags |= kTaskIsSuppressed;
1317 	}
1318 #if CONFIG_MEMORYSTATUS
1319 
1320 	boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
1321 	memorystatus_proc_flags_unsafe(task->bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
1322 	if (dirty) {
1323 		ss_flags |= kTaskIsDirty;
1324 	}
1325 	if (dirty_tracked) {
1326 		ss_flags |= kTaskIsDirtyTracked;
1327 	}
1328 	if (allow_idle_exit) {
1329 		ss_flags |= kTaskAllowIdleExit;
1330 	}
1331 
1332 #endif
1333 	if (task->effective_policy.tep_tal_engaged) {
1334 		ss_flags |= kTaskTALEngaged;
1335 	}
1336 
1337 	ss_flags |= (0x7 & workqueue_get_pwq_state_kdp(task->bsd_info)) << 17;
1338 
1339 #if IMPORTANCE_INHERITANCE
1340 	if (task->task_imp_base) {
1341 		if (task->task_imp_base->iit_donor) {
1342 			ss_flags |= kTaskIsImpDonor;
1343 		}
1344 		if (task->task_imp_base->iit_live_donor) {
1345 			ss_flags |= kTaskIsLiveImpDonor;
1346 		}
1347 	}
1348 #endif
1349 	return ss_flags;
1350 }
1351 
1352 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)1353 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
1354 {
1355 	kern_return_t error = KERN_SUCCESS;
1356 
1357 	uint64_t shared_cache_slide = 0;
1358 	uint64_t shared_cache_first_mapping = 0;
1359 	uint32_t kdp_fault_results = 0;
1360 	struct dyld_shared_cache_loadinfo shared_cache_data = {0};
1361 
1362 
1363 	assert(task_snap_ss_flags != NULL);
1364 
1365 	/* Get basic info about the shared region pointer, regardless of any failures */
1366 	if (task->shared_region == NULL) {
1367 		*task_snap_ss_flags |= kTaskSharedRegionNone;
1368 	} else if (task->shared_region == primary_system_shared_region) {
1369 		*task_snap_ss_flags |= kTaskSharedRegionSystem;
1370 	} else {
1371 		*task_snap_ss_flags |= kTaskSharedRegionOther;
1372 	}
1373 
1374 	if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
1375 		struct vm_shared_region *sr = task->shared_region;
1376 		shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
1377 
1378 	} else {
1379 		*task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
1380 		goto error_exit;
1381 	}
1382 
1383 	/* We haven't copied in the shared region UUID yet as part of setup */
1384 	if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
1385 		goto error_exit;
1386 	}
1387 
1388 
1389 	/*
1390 	 * No refcounting here, but we are in debugger context, so that should be safe.
1391 	 */
1392 	shared_cache_slide = task->shared_region->sr_slide;
1393 
1394 	if (task->shared_region == primary_system_shared_region) {
1395 		/* skip adding shared cache info -- it's the same as the system level one */
1396 		goto error_exit;
1397 	}
1398 
1399 	/*
1400 	 * Historically, this data was in a dyld_uuid_info_64 structure, but the
1401 	 * naming of both the structure and fields for this use wasn't great.  The
1402 	 * dyld_shared_cache_loadinfo structure has better names, but the same
1403 	 * layout and content as the original.
1404 	 *
1405 	 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
1406 	 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
1407 	 * entries; here, it's the slid first mapping, and we leave it that way
1408 	 * for backwards compatibility.
1409 	 */
1410 	shared_cache_data.sharedCacheSlide = shared_cache_slide;
1411 	stackshot_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
1412 	shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
1413 	shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
1414 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
1415 
1416 error_exit:
1417 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
1418 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
1419 	}
1420 
1421 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
1422 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
1423 	}
1424 
1425 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
1426 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
1427 	}
1428 
1429 	return error;
1430 }
1431 
1432 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)1433 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
1434 {
1435 	boolean_t save_loadinfo_p         = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
1436 	boolean_t save_kextloadinfo_p     = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
1437 	boolean_t should_fault            = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
1438 
1439 	kern_return_t error        = KERN_SUCCESS;
1440 	mach_vm_address_t out_addr = 0;
1441 
1442 	uint32_t uuid_info_count         = 0;
1443 	mach_vm_address_t uuid_info_addr = 0;
1444 	uint64_t uuid_info_timestamp     = 0;
1445 	uint32_t kdp_fault_results       = 0;
1446 
1447 
1448 	assert(task_snap_ss_flags != NULL);
1449 
1450 	int task_pid     = pid_from_task(task);
1451 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
1452 
1453 	if (save_loadinfo_p && have_pmap && task->active && task_pid > 0) {
1454 		/* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
1455 		if (task_64bit_addr) {
1456 			struct user64_dyld_all_image_infos task_image_infos;
1457 			if (kdp_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1458 			    sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1459 				uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
1460 				uuid_info_addr = task_image_infos.uuidArray;
1461 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1462 					uuid_info_timestamp = task_image_infos.timestamp;
1463 				}
1464 
1465 			}
1466 		} else {
1467 			struct user32_dyld_all_image_infos task_image_infos;
1468 			if (kdp_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1469 			    sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1470 				uuid_info_count = task_image_infos.uuidArrayCount;
1471 				uuid_info_addr = task_image_infos.uuidArray;
1472 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1473 					uuid_info_timestamp = task_image_infos.timestamp;
1474 				}
1475 			}
1476 		}
1477 
1478 		/*
1479 		 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
1480 		 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
1481 		 * for this task.
1482 		 */
1483 		if (!uuid_info_addr) {
1484 			uuid_info_count = 0;
1485 		}
1486 
1487 
1488 	}
1489 
1490 	if (have_pmap && task_pid == 0) {
1491 		if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
1492 			uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
1493 		} else {
1494 			uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
1495 		}
1496 	}
1497 
1498 	if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
1499 		uint32_t copied_uuid_count = 0;
1500 		uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
1501 		uint32_t uuid_info_array_size = 0;
1502 
1503 		/* Open a compression window to avoid overflowing the stack */
1504 		kcdata_compression_window_open(kcd);
1505 
1506 		/* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
1507 		if (uuid_info_count > 0) {
1508 			uuid_info_array_size = uuid_info_count * uuid_info_size;
1509 
1510 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
1511 			    uuid_info_size, uuid_info_count, &out_addr));
1512 
1513 			if (!kdp_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
1514 				bzero((void *)out_addr, uuid_info_array_size);
1515 			} else {
1516 				copied_uuid_count = uuid_info_count;
1517 			}
1518 		}
1519 
1520 		uuid_t binary_uuid;
1521 		if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
1522 			/* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
1523 			if (uuid_info_array_size == 0) {
1524 				/* We just need to store one UUID */
1525 				uuid_info_array_size = uuid_info_size;
1526 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
1527 				    uuid_info_size, 1, &out_addr));
1528 			}
1529 
1530 			if (task_64bit_addr) {
1531 				struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
1532 				uint64_t image_load_address = task->mach_header_vm_address;
1533 
1534 				stackshot_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
1535 				stackshot_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
1536 			} else {
1537 				struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
1538 				uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
1539 
1540 				stackshot_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
1541 				stackshot_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
1542 			}
1543 		}
1544 
1545 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
1546 	} else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
1547 		uintptr_t image_load_address;
1548 
1549 		do {
1550 #if defined(__arm__) || defined(__arm64__)
1551 			if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
1552 				struct dyld_uuid_info_64 kc_uuid = {0};
1553 				kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1554 				stackshot_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
1555 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
1556 				break;
1557 			}
1558 #endif /* defined(__arm__) || defined(__arm64__) */
1559 
1560 			if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
1561 				/* Kernel UUID not found or inaccessible */
1562 				break;
1563 			}
1564 
1565 			uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
1566 			if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
1567 				uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
1568 #if  defined(__arm64__)
1569 				kc_format_t primary_kc_type = KCFormatUnknown;
1570 				if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
1571 					/* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
1572 					uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
1573 				}
1574 #endif
1575 			}
1576 
1577 			/*
1578 			 * The element count of the array can vary - avoid overflowing the
1579 			 * stack by opening a window.
1580 			 */
1581 			kcdata_compression_window_open(kcd);
1582 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
1583 			    sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
1584 			kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
1585 
1586 			image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
1587 #if defined(__arm64__)
1588 			if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
1589 				/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
1590 				extern vm_offset_t segTEXTEXECB;
1591 				image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
1592 			}
1593 #endif
1594 			uuid_info_array[0].imageLoadAddress = image_load_address;
1595 			stackshot_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
1596 
1597 			if (save_kextloadinfo_p &&
1598 			    _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
1599 			    _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
1600 			    gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
1601 				uint32_t kexti;
1602 				for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
1603 					image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
1604 #if defined(__arm64__)
1605 					if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
1606 						/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
1607 						image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
1608 					}
1609 #endif
1610 					uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
1611 					stackshot_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
1612 				}
1613 			}
1614 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
1615 		} while (0);
1616 	}
1617 
1618 error_exit:
1619 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
1620 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
1621 	}
1622 
1623 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
1624 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
1625 	}
1626 
1627 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
1628 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
1629 	}
1630 
1631 	return error;
1632 }
1633 
1634 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)1635 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
1636 {
1637 	kern_return_t error = KERN_SUCCESS;
1638 	mach_vm_address_t out_addr = 0;
1639 
1640 	/* I/O Statistics if any counters are non zero */
1641 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
1642 	if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
1643 		/* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
1644 		kcdata_compression_window_open(kcd);
1645 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
1646 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
1647 		_iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
1648 		_iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
1649 		_iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
1650 		_iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
1651 		_iostat->ss_paging_count = task->task_io_stats->paging.count;
1652 		_iostat->ss_paging_size = task->task_io_stats->paging.size;
1653 		_iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
1654 		_iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
1655 		_iostat->ss_metadata_count = task->task_io_stats->metadata.count;
1656 		_iostat->ss_metadata_size = task->task_io_stats->metadata.size;
1657 		_iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
1658 		_iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
1659 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
1660 			_iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
1661 			_iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
1662 		}
1663 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
1664 	}
1665 
1666 
1667 error_exit:
1668 	return error;
1669 }
1670 
1671 #if MONOTONIC
1672 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)1673 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
1674 {
1675 	struct instrs_cycles_snapshot instrs_cycles = {0};
1676 	uint64_t ics_instructions;
1677 	uint64_t ics_cycles;
1678 
1679 	mt_stackshot_task(task, &ics_instructions, &ics_cycles);
1680 	instrs_cycles.ics_instructions = ics_instructions;
1681 	instrs_cycles.ics_cycles = ics_cycles;
1682 
1683 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
1684 }
1685 #endif /* MONOTONIC */
1686 
1687 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)1688 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
1689 {
1690 	struct stackshot_cpu_architecture cpu_architecture = {0};
1691 	int32_t cputype;
1692 	int32_t cpusubtype;
1693 
1694 	proc_archinfo_kdp(task->bsd_info, &cputype, &cpusubtype);
1695 	cpu_architecture.cputype = cputype;
1696 	cpu_architecture.cpusubtype = cpusubtype;
1697 
1698 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
1699 }
1700 
1701 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)1702 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
1703 {
1704 	kern_return_t error                 = KERN_SUCCESS;
1705 	mach_vm_address_t out_addr          = 0;
1706 	struct transitioning_task_snapshot * cur_tsnap = NULL;
1707 
1708 	int task_pid           = pid_from_task(task);
1709 	/* Is returning -1 ok for terminating task ok ??? */
1710 	uint64_t task_uniqueid = get_task_uniqueid(task);
1711 
1712 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
1713 		/*
1714 		 * if this task is a transit task from another one, show the pid as
1715 		 * negative
1716 		 */
1717 		task_pid = 0 - task_pid;
1718 	}
1719 
1720 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
1721 	kcdata_compression_window_open(kcd);
1722 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
1723 	cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
1724 	bzero(cur_tsnap, sizeof(*cur_tsnap));
1725 
1726 	cur_tsnap->tts_unique_pid = task_uniqueid;
1727 	cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task);
1728 	cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
1729 	cur_tsnap->tts_transition_type = transition_type;
1730 	cur_tsnap->tts_pid = task_pid;
1731 
1732 	/* Add the BSD process identifiers */
1733 	if (task_pid != -1 && task->bsd_info != NULL) {
1734 		proc_name_kdp(task->bsd_info, cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
1735 	} else {
1736 		cur_tsnap->tts_p_comm[0] = '\0';
1737 	}
1738 
1739 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
1740 
1741 error_exit:
1742 	return error;
1743 }
1744 
1745 static kern_return_t
1746 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)1747 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
1748 #else
1749 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
1750 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1751 {
1752 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
1753 	boolean_t collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
1754 #if MONOTONIC
1755 	boolean_t collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
1756 #endif /* MONOTONIC */
1757 #if __arm__ || __arm64__
1758 	boolean_t collect_asid            = ((trace_flags & STACKSHOT_ASID) != 0);
1759 #endif
1760 	boolean_t collect_pagetables       = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
1761 
1762 
1763 	kern_return_t error                 = KERN_SUCCESS;
1764 	mach_vm_address_t out_addr          = 0;
1765 	struct task_snapshot_v2 * cur_tsnap = NULL;
1766 #if STACKSHOT_COLLECTS_LATENCY_INFO
1767 	latency_info->cur_tsnap_latency = mach_absolute_time();
1768 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1769 
1770 	int task_pid           = pid_from_task(task);
1771 	uint64_t task_uniqueid = get_task_uniqueid(task);
1772 	uint64_t proc_starttime_secs = 0;
1773 
1774 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
1775 		/*
1776 		 * if this task is a transit task from another one, show the pid as
1777 		 * negative
1778 		 */
1779 		task_pid = 0 - task_pid;
1780 	}
1781 
1782 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
1783 	kcdata_compression_window_open(kcd);
1784 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
1785 	cur_tsnap = (struct task_snapshot_v2 *)out_addr;
1786 	bzero(cur_tsnap, sizeof(*cur_tsnap));
1787 
1788 	cur_tsnap->ts_unique_pid = task_uniqueid;
1789 	cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
1790 	cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
1791 	cur_tsnap->ts_user_time_in_terminated_threads = task->total_user_time;
1792 	cur_tsnap->ts_system_time_in_terminated_threads = task->total_system_time;
1793 
1794 	proc_starttime_kdp(task->bsd_info, &proc_starttime_secs, NULL, NULL);
1795 	cur_tsnap->ts_p_start_sec = proc_starttime_secs;
1796 	cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
1797 	cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
1798 	cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
1799 	cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
1800 
1801 	cur_tsnap->ts_suspend_count = task->suspend_count;
1802 	cur_tsnap->ts_faults = counter_load(&task->faults);
1803 	cur_tsnap->ts_pageins = counter_load(&task->pageins);
1804 	cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
1805 	cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
1806 	    LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
1807 	cur_tsnap->ts_pid = task_pid;
1808 
1809 	/* Add the BSD process identifiers */
1810 	if (task_pid != -1 && task->bsd_info != NULL) {
1811 		proc_name_kdp(task->bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
1812 	} else {
1813 		cur_tsnap->ts_p_comm[0] = '\0';
1814 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
1815 		if (task->task_imp_base != NULL) {
1816 			_stackshot_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
1817 			    MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
1818 		}
1819 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
1820 	}
1821 
1822 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
1823 
1824 #if CONFIG_COALITIONS
1825 	if (task_pid != -1 && task->bsd_info != NULL &&
1826 	    ((trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) && (task->coalition[COALITION_TYPE_JETSAM] != NULL))) {
1827 		uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
1828 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
1829 	}
1830 #endif /* CONFIG_COALITIONS */
1831 
1832 #if __arm__ || __arm64__
1833 	if (collect_asid && have_pmap) {
1834 		uint32_t asid = PMAP_VASID(task->map->pmap);
1835 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
1836 	}
1837 #endif
1838 
1839 #if STACKSHOT_COLLECTS_LATENCY_INFO
1840 	latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
1841 	latency_info->pmap_latency = mach_absolute_time();
1842 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1843 
1844 	if (collect_pagetables && have_pmap) {
1845 #if INTERRUPT_MASKED_DEBUG
1846 		// pagetable dumps can be large; reset the interrupt timeout to avoid a panic
1847 		ml_spin_debug_clear_self();
1848 #endif
1849 		size_t bytes_dumped = 0;
1850 		error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stack_snapshot_pagetable_mask, &bytes_dumped);
1851 		if (error != KERN_SUCCESS) {
1852 			goto error_exit;
1853 		} else {
1854 			/* Variable size array - better not have it on the stack. */
1855 			kcdata_compression_window_open(kcd);
1856 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
1857 			    sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
1858 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
1859 		}
1860 	}
1861 
1862 #if STACKSHOT_COLLECTS_LATENCY_INFO
1863 	latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
1864 	latency_info->bsd_proc_ids_latency = mach_absolute_time();
1865 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1866 
1867 #if STACKSHOT_COLLECTS_LATENCY_INFO
1868 	latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
1869 	latency_info->end_latency = mach_absolute_time();
1870 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1871 
1872 	if (collect_iostats) {
1873 		kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
1874 	}
1875 
1876 #if MONOTONIC
1877 	if (collect_instrs_cycles) {
1878 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
1879 	}
1880 #endif /* MONOTONIC */
1881 
1882 	kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
1883 
1884 #if STACKSHOT_COLLECTS_LATENCY_INFO
1885 	latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
1886 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1887 
1888 error_exit:
1889 	return error;
1890 }
1891 
1892 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)1893 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
1894 {
1895 #if !MONOTONIC
1896 #pragma unused(trace_flags)
1897 #endif /* !MONOTONIC */
1898 	kern_return_t error                       = KERN_SUCCESS;
1899 	struct task_delta_snapshot_v2 * cur_tsnap = NULL;
1900 	mach_vm_address_t out_addr                = 0;
1901 	(void) trace_flags;
1902 #if __arm__ || __arm64__
1903 	boolean_t collect_asid                    = ((trace_flags & STACKSHOT_ASID) != 0);
1904 #endif
1905 #if MONOTONIC
1906 	boolean_t collect_instrs_cycles           = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
1907 #endif /* MONOTONIC */
1908 
1909 	uint64_t task_uniqueid = get_task_uniqueid(task);
1910 
1911 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
1912 
1913 	cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
1914 
1915 	cur_tsnap->tds_unique_pid = task_uniqueid;
1916 	cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
1917 	cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
1918 
1919 	cur_tsnap->tds_user_time_in_terminated_threads = task->total_user_time;
1920 	cur_tsnap->tds_system_time_in_terminated_threads = task->total_system_time;
1921 
1922 	cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
1923 
1924 	cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
1925 	cur_tsnap->tds_suspend_count = task->suspend_count;
1926 	cur_tsnap->tds_faults            = counter_load(&task->faults);
1927 	cur_tsnap->tds_pageins           = counter_load(&task->pageins);
1928 	cur_tsnap->tds_cow_faults        = counter_load(&task->cow_faults);
1929 	cur_tsnap->tds_was_throttled     = (uint32_t)proc_was_throttled_from_task(task);
1930 	cur_tsnap->tds_did_throttle      = (uint32_t)proc_did_throttle_from_task(task);
1931 	cur_tsnap->tds_latency_qos       = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
1932 	    ? LATENCY_QOS_TIER_UNSPECIFIED
1933 	    : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
1934 
1935 #if __arm__ || __arm64__
1936 	if (collect_asid && have_pmap) {
1937 		uint32_t asid = PMAP_VASID(task->map->pmap);
1938 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
1939 		stackshot_memcpy((void*)out_addr, &asid, sizeof(asid));
1940 	}
1941 #endif
1942 
1943 #if MONOTONIC
1944 	if (collect_instrs_cycles) {
1945 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
1946 	}
1947 #endif /* MONOTONIC */
1948 
1949 error_exit:
1950 	return error;
1951 }
1952 
1953 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)1954 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
1955 {
1956 	kern_return_t error = KERN_SUCCESS;
1957 	mach_vm_address_t out_addr = 0;
1958 
1959 	/* I/O Statistics */
1960 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
1961 	if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
1962 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
1963 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
1964 		_iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
1965 		_iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
1966 		_iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
1967 		_iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
1968 		_iostat->ss_paging_count = thread->thread_io_stats->paging.count;
1969 		_iostat->ss_paging_size = thread->thread_io_stats->paging.size;
1970 		_iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
1971 		_iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
1972 		_iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
1973 		_iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
1974 		_iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
1975 		_iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
1976 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
1977 			_iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
1978 			_iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
1979 		}
1980 	}
1981 
1982 error_exit:
1983 	return error;
1984 }
1985 
1986 bool
machine_trace_thread_validate_kva(vm_offset_t addr)1987 machine_trace_thread_validate_kva(vm_offset_t addr)
1988 {
1989 	return _stackshot_validate_kva(addr, sizeof(uintptr_t));
1990 }
1991 
1992 struct _stackshot_backtrace_context {
1993 	vm_map_t sbc_map;
1994 	vm_offset_t sbc_prev_page;
1995 	vm_offset_t sbc_prev_kva;
1996 	uint32_t sbc_flags;
1997 	bool sbc_allow_faulting;
1998 };
1999 
2000 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)2001 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
2002 {
2003 	struct _stackshot_backtrace_context *ctx = vctx;
2004 	size_t map_page_mask = 0;
2005 	size_t __assert_only map_page_size = _stackshot_get_page_size(ctx->sbc_map,
2006 	    &map_page_mask);
2007 	assert(size < map_page_size);
2008 	if (src & (size - 1)) {
2009 		// The source should be aligned to the size passed in, like a stack
2010 		// frame or word.
2011 		return EINVAL;
2012 	}
2013 
2014 	vm_offset_t src_page = src & ~map_page_mask;
2015 	vm_offset_t src_kva = 0;
2016 
2017 	if (src_page != ctx->sbc_prev_page) {
2018 		uint32_t res = 0;
2019 		uint32_t flags = 0;
2020 		vm_offset_t src_pa = kdp_find_phys(ctx->sbc_map, src,
2021 		    ctx->sbc_allow_faulting, &res);
2022 
2023 		flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
2024 		flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
2025 		flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
2026 		ctx->sbc_flags |= flags;
2027 		if (src_pa == 0) {
2028 			return EFAULT;
2029 		}
2030 
2031 		src_kva = phystokv(src_pa);
2032 		ctx->sbc_prev_page = src_page;
2033 		ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
2034 	} else {
2035 		src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
2036 	}
2037 
2038 #if KASAN
2039 	kasan_notify_address(src_kva, size);
2040 #endif
2041 	memcpy(dst, (const void *)src_kva, size);
2042 
2043 	return 0;
2044 }
2045 
2046 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)2047 kcdata_record_thread_snapshot(
2048 	kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
2049 {
2050 	boolean_t dispatch_p              = ((trace_flags & STACKSHOT_GET_DQ) != 0);
2051 	boolean_t active_kthreads_only_p  = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
2052 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2053 	boolean_t collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
2054 #if MONOTONIC
2055 	boolean_t collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
2056 #endif /* MONOTONIC */
2057 	kern_return_t error        = KERN_SUCCESS;
2058 
2059 #if STACKSHOT_COLLECTS_LATENCY_INFO
2060 	struct stackshot_latency_thread latency_info;
2061 	latency_info.cur_thsnap1_latency = mach_absolute_time();
2062 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2063 
2064 	mach_vm_address_t out_addr = 0;
2065 	int saved_count            = 0;
2066 
2067 	struct thread_snapshot_v4 * cur_thread_snap = NULL;
2068 	char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
2069 	uint64_t tval    = 0;
2070 
2071 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
2072 	cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
2073 
2074 	/* Populate the thread snapshot header */
2075 	cur_thread_snap->ths_ss_flags = 0;
2076 	cur_thread_snap->ths_thread_id = thread_tid(thread);
2077 	cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
2078 	cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
2079 	cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
2080 
2081 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
2082 		cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
2083 	} else {
2084 		cur_thread_snap->ths_voucher_identifier = 0;
2085 	}
2086 
2087 #if STACKSHOT_COLLECTS_LATENCY_INFO
2088 	latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
2089 	latency_info.dispatch_serial_latency = mach_absolute_time();
2090 	latency_info.dispatch_label_latency = 0;
2091 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2092 
2093 	cur_thread_snap->ths_dqserialnum = 0;
2094 	if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
2095 		uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
2096 		if (dqkeyaddr != 0) {
2097 			uint64_t dqaddr = 0;
2098 			boolean_t copyin_ok = kdp_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
2099 			if (copyin_ok && dqaddr != 0) {
2100 				uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
2101 				uint64_t dqserialnum = 0;
2102 				copyin_ok = kdp_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
2103 				if (copyin_ok) {
2104 					cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
2105 					cur_thread_snap->ths_dqserialnum = dqserialnum;
2106 				}
2107 
2108 #if STACKSHOT_COLLECTS_LATENCY_INFO
2109 				latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
2110 				latency_info.dispatch_label_latency = mach_absolute_time();
2111 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2112 
2113 				/* try copying in the queue label */
2114 				uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
2115 				if (label_offs) {
2116 					uint64_t dqlabeladdr = dqaddr + label_offs;
2117 					uint64_t actual_dqlabeladdr = 0;
2118 
2119 					copyin_ok = kdp_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
2120 					if (copyin_ok && actual_dqlabeladdr != 0) {
2121 						char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
2122 						int len;
2123 
2124 						bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
2125 						len = kdp_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
2126 						if (len > 0) {
2127 							mach_vm_address_t label_addr = 0;
2128 							kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
2129 							_stackshot_strlcpy((char*)label_addr, &label_buf[0], len);
2130 						}
2131 					}
2132 				}
2133 #if STACKSHOT_COLLECTS_LATENCY_INFO
2134 				latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
2135 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2136 			}
2137 		}
2138 	}
2139 
2140 #if STACKSHOT_COLLECTS_LATENCY_INFO
2141 	if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
2142 		latency_info.dispatch_serial_latency = 0;
2143 	}
2144 	latency_info.cur_thsnap2_latency = mach_absolute_time();
2145 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2146 
2147 	tval = safe_grab_timer_value(&thread->user_timer);
2148 	cur_thread_snap->ths_user_time = tval;
2149 	tval = safe_grab_timer_value(&thread->system_timer);
2150 
2151 	if (thread->precise_user_kernel_time) {
2152 		cur_thread_snap->ths_sys_time = tval;
2153 	} else {
2154 		cur_thread_snap->ths_user_time += tval;
2155 		cur_thread_snap->ths_sys_time = 0;
2156 	}
2157 
2158 	if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
2159 		cur_thread_snap->ths_ss_flags |= kThreadMain;
2160 	}
2161 	if (thread->effective_policy.thep_darwinbg) {
2162 		cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
2163 	}
2164 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
2165 		cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
2166 	}
2167 	if (thread->suspend_count > 0) {
2168 		cur_thread_snap->ths_ss_flags |= kThreadSuspended;
2169 	}
2170 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
2171 		cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
2172 	}
2173 	if (thread_on_core) {
2174 		cur_thread_snap->ths_ss_flags |= kThreadOnCore;
2175 	}
2176 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
2177 		cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
2178 	}
2179 
2180 	/* make sure state flags defined in kcdata.h still match internal flags */
2181 	static_assert(SS_TH_WAIT == TH_WAIT);
2182 	static_assert(SS_TH_SUSP == TH_SUSP);
2183 	static_assert(SS_TH_RUN == TH_RUN);
2184 	static_assert(SS_TH_UNINT == TH_UNINT);
2185 	static_assert(SS_TH_TERMINATE == TH_TERMINATE);
2186 	static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
2187 	static_assert(SS_TH_IDLE == TH_IDLE);
2188 
2189 	cur_thread_snap->ths_last_run_time           = thread->last_run_time;
2190 	cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
2191 	cur_thread_snap->ths_state                   = thread->state;
2192 	cur_thread_snap->ths_sched_flags             = thread->sched_flags;
2193 	cur_thread_snap->ths_base_priority = thread->base_pri;
2194 	cur_thread_snap->ths_sched_priority = thread->sched_pri;
2195 	cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
2196 	cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
2197 	cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
2198 	    thread->requested_policy.thrp_qos_workq_override);
2199 	cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
2200 	cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
2201 
2202 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
2203 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
2204 	cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
2205 	cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
2206 
2207 #if STACKSHOT_COLLECTS_LATENCY_INFO
2208 	latency_info.cur_thsnap2_latency = mach_absolute_time()  - latency_info.cur_thsnap2_latency;
2209 	latency_info.thread_name_latency = mach_absolute_time();
2210 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2211 
2212 	/* if there is thread name then add to buffer */
2213 	cur_thread_name[0] = '\0';
2214 	proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
2215 	if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
2216 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
2217 		stackshot_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
2218 	}
2219 
2220 #if STACKSHOT_COLLECTS_LATENCY_INFO
2221 	latency_info.thread_name_latency = mach_absolute_time()  - latency_info.thread_name_latency;
2222 	latency_info.sur_times_latency = mach_absolute_time();
2223 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2224 
2225 	/* record system, user, and runnable times */
2226 	time_value_t user_time, system_time, runnable_time;
2227 	thread_read_times(thread, &user_time, &system_time, &runnable_time);
2228 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
2229 	struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
2230 	*stackshot_cpu_times = (struct stackshot_cpu_times_v2){
2231 		.user_usec = (uint64_t)user_time.seconds * USEC_PER_SEC + user_time.microseconds,
2232 		.system_usec = (uint64_t)system_time.seconds * USEC_PER_SEC + system_time.microseconds,
2233 		.runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
2234 	};
2235 
2236 #if STACKSHOT_COLLECTS_LATENCY_INFO
2237 	latency_info.sur_times_latency = mach_absolute_time()  - latency_info.sur_times_latency;
2238 	latency_info.user_stack_latency = mach_absolute_time();
2239 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2240 
2241 	/* Trace user stack, if any */
2242 	if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
2243 		uint32_t user_ths_ss_flags = 0;
2244 
2245 		/*
2246 		 * This relies on knowing the "end" address points to the start of the
2247 		 * next elements data and, in the case of arrays, the elements.
2248 		 */
2249 		out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2250 		mach_vm_address_t max_addr = (mach_vm_address_t)kcd_max_address(kcd);
2251 		assert(out_addr <= max_addr);
2252 		size_t avail_frames = (max_addr - out_addr) / sizeof(uintptr_t);
2253 		size_t max_frames = MIN(avail_frames, MAX_FRAMES);
2254 		if (max_frames == 0) {
2255 			error = KERN_RESOURCE_SHORTAGE;
2256 			goto error_exit;
2257 		}
2258 		struct _stackshot_backtrace_context ctx = {
2259 			.sbc_map = task->map,
2260 			.sbc_allow_faulting = stack_enable_faulting,
2261 			.sbc_prev_page = -1,
2262 			.sbc_prev_kva = -1,
2263 		};
2264 		struct backtrace_control ctl = {
2265 			.btc_user_thread = thread,
2266 			.btc_user_copy = _stackshot_backtrace_copy,
2267 			.btc_user_copy_context = &ctx,
2268 		};
2269 		struct backtrace_user_info info = BTUINFO_INIT;
2270 
2271 		saved_count = backtrace_user((uintptr_t *)out_addr, max_frames, &ctl,
2272 		    &info);
2273 		if (saved_count > 0) {
2274 #if __LP64__
2275 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
2276 #else // __LP64__
2277 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
2278 #endif // !__LP64__
2279 			mach_vm_address_t out_addr_array;
2280 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd,
2281 			    STACKLR_WORDS, sizeof(uintptr_t), saved_count,
2282 			    &out_addr_array));
2283 			/*
2284 			 * Ensure the kcd_end_address (above) trick worked.
2285 			 */
2286 			assert(out_addr == out_addr_array);
2287 			if (info.btui_info & BTI_64_BIT) {
2288 				user_ths_ss_flags |= kUser64_p;
2289 			}
2290 			if ((info.btui_info & BTI_TRUNCATED) ||
2291 			    (ctx.sbc_flags & kThreadTruncatedBT)) {
2292 				user_ths_ss_flags |= kThreadTruncatedBT;
2293 				user_ths_ss_flags |= kThreadTruncUserBT;
2294 			}
2295 			user_ths_ss_flags |= ctx.sbc_flags;
2296 			ctx.sbc_flags = 0;
2297 #if __LP64__
2298 			/* We only support async stacks on 64-bit kernels */
2299 			if (info.btui_async_frame_addr != 0) {
2300 				uint32_t async_start_offset = info.btui_async_start_index;
2301 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
2302 				    sizeof(async_start_offset), &async_start_offset));
2303 				out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2304 				assert(out_addr <= max_addr);
2305 
2306 				avail_frames = (max_addr - out_addr) / sizeof(uintptr_t);
2307 				max_frames = MIN(avail_frames, MAX_FRAMES);
2308 				if (max_frames == 0) {
2309 					error = KERN_RESOURCE_SHORTAGE;
2310 					goto error_exit;
2311 				}
2312 				ctl.btc_frame_addr = info.btui_async_frame_addr;
2313 				ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
2314 				info = BTUINFO_INIT;
2315 				unsigned int async_count = backtrace_user((uintptr_t *)out_addr, max_frames, &ctl,
2316 				    &info);
2317 				if (async_count > 0) {
2318 					mach_vm_address_t async_out_addr;
2319 					kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd,
2320 					    STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64, sizeof(uintptr_t), async_count,
2321 					    &async_out_addr));
2322 					/*
2323 					 * Ensure the kcd_end_address (above) trick worked.
2324 					 */
2325 					assert(out_addr == async_out_addr);
2326 					if ((info.btui_info & BTI_TRUNCATED) ||
2327 					    (ctx.sbc_flags & kThreadTruncatedBT)) {
2328 						user_ths_ss_flags |= kThreadTruncatedBT;
2329 						user_ths_ss_flags |= kThreadTruncUserAsyncBT;
2330 					}
2331 					user_ths_ss_flags |= ctx.sbc_flags;
2332 				}
2333 			}
2334 #endif /* _LP64 */
2335 		}
2336 		if (user_ths_ss_flags != 0) {
2337 			cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
2338 		}
2339 	}
2340 
2341 #if STACKSHOT_COLLECTS_LATENCY_INFO
2342 	latency_info.user_stack_latency = mach_absolute_time()  - latency_info.user_stack_latency;
2343 	latency_info.kernel_stack_latency = mach_absolute_time();
2344 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2345 
2346 	/* Call through to the machine specific trace routines
2347 	 * Frames are added past the snapshot header.
2348 	 */
2349 	if (thread->kernel_stack != 0) {
2350 		uint32_t kern_ths_ss_flags = 0;
2351 		out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2352 #if defined(__LP64__)
2353 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
2354 		extern int machine_trace_thread64(thread_t thread, char *tracepos,
2355 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
2356 		saved_count = machine_trace_thread64(
2357 #else
2358 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
2359 		extern int machine_trace_thread(thread_t thread, char *tracepos,
2360 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
2361 		saved_count = machine_trace_thread(
2362 #endif
2363 			thread, (char *)out_addr, (char *)kcd_max_address(kcd), MAX_FRAMES,
2364 			&kern_ths_ss_flags);
2365 		if (saved_count > 0) {
2366 			int frame_size = sizeof(uintptr_t);
2367 #if defined(__LP64__)
2368 			cur_thread_snap->ths_ss_flags |= kKernel64_p;
2369 #endif
2370 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, stack_kcdata_type,
2371 			    frame_size, saved_count / frame_size, &out_addr));
2372 		}
2373 		if (kern_ths_ss_flags & kThreadTruncatedBT) {
2374 			kern_ths_ss_flags |= kThreadTruncKernBT;
2375 		}
2376 		if (kern_ths_ss_flags != 0) {
2377 			cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
2378 		}
2379 	}
2380 
2381 #if STACKSHOT_COLLECTS_LATENCY_INFO
2382 	latency_info.kernel_stack_latency = mach_absolute_time()  - latency_info.kernel_stack_latency;
2383 	latency_info.misc_latency = mach_absolute_time();
2384 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2385 
2386 #if CONFIG_THREAD_GROUPS
2387 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
2388 		uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
2389 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
2390 		stackshot_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
2391 	}
2392 #endif /* CONFIG_THREAD_GROUPS */
2393 
2394 	if (collect_iostats) {
2395 		kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
2396 	}
2397 
2398 #if MONOTONIC
2399 	if (collect_instrs_cycles) {
2400 		uint64_t instrs = 0, cycles = 0;
2401 		mt_stackshot_thread(thread, &instrs, &cycles);
2402 
2403 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
2404 		struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
2405 		    instrs_cycles->ics_instructions = instrs;
2406 		    instrs_cycles->ics_cycles = cycles;
2407 	}
2408 #endif /* MONOTONIC */
2409 
2410 #if STACKSHOT_COLLECTS_LATENCY_INFO
2411 	latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
2412 	if (collect_latency_info) {
2413 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
2414 	}
2415 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2416 
2417 error_exit:
2418 	return error;
2419 }
2420 
2421 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)2422 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
2423 {
2424 	cur_thread_snap->tds_thread_id = thread_tid(thread);
2425 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
2426 		cur_thread_snap->tds_voucher_identifier  = VM_KERNEL_ADDRPERM(thread->ith_voucher);
2427 	} else {
2428 		cur_thread_snap->tds_voucher_identifier = 0;
2429 	}
2430 
2431 	cur_thread_snap->tds_ss_flags = 0;
2432 	if (thread->effective_policy.thep_darwinbg) {
2433 		cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
2434 	}
2435 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
2436 		cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
2437 	}
2438 	if (thread->suspend_count > 0) {
2439 		cur_thread_snap->tds_ss_flags |= kThreadSuspended;
2440 	}
2441 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
2442 		cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
2443 	}
2444 	if (thread_on_core) {
2445 		cur_thread_snap->tds_ss_flags |= kThreadOnCore;
2446 	}
2447 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
2448 		cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
2449 	}
2450 
2451 	cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
2452 	cur_thread_snap->tds_state                   = thread->state;
2453 	cur_thread_snap->tds_sched_flags             = thread->sched_flags;
2454 	cur_thread_snap->tds_base_priority           = thread->base_pri;
2455 	cur_thread_snap->tds_sched_priority          = thread->sched_pri;
2456 	cur_thread_snap->tds_eqos                    = thread->effective_policy.thep_qos;
2457 	cur_thread_snap->tds_rqos                    = thread->requested_policy.thrp_qos;
2458 	cur_thread_snap->tds_rqos_override           = MAX(thread->requested_policy.thrp_qos_override,
2459 	    thread->requested_policy.thrp_qos_workq_override);
2460 	cur_thread_snap->tds_io_tier                 = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
2461 
2462 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
2463 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
2464 	cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
2465 	cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
2466 
2467 	return 0;
2468 }
2469 
2470 /*
2471  * Why 12?  12 strikes a decent balance between allocating a large array on
2472  * the stack and having large kcdata item overheads for recording nonrunable
2473  * tasks.
2474  */
2475 #define UNIQUEIDSPERFLUSH 12
2476 
2477 struct saved_uniqueids {
2478 	uint64_t ids[UNIQUEIDSPERFLUSH];
2479 	unsigned count;
2480 };
2481 
2482 enum thread_classification {
2483 	tc_full_snapshot,  /* take a full snapshot */
2484 	tc_delta_snapshot, /* take a delta snapshot */
2485 };
2486 
2487 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)2488 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
2489 {
2490 	processor_t last_processor = thread->last_processor;
2491 
2492 	boolean_t thread_on_core =
2493 	    (last_processor != PROCESSOR_NULL &&
2494 	    (last_processor->state == PROCESSOR_SHUTDOWN || last_processor->state == PROCESSOR_RUNNING) &&
2495 	    last_processor->active_thread == thread);
2496 
2497 	*thread_on_core_p = thread_on_core;
2498 
2499 	/* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
2500 	 * previous full stackshot */
2501 	if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stack_snapshot_delta_since_timestamp)) {
2502 		return tc_full_snapshot;
2503 	} else {
2504 		return tc_delta_snapshot;
2505 	}
2506 }
2507 
2508 struct stackshot_context {
2509 	int pid;
2510 	uint64_t trace_flags;
2511 };
2512 
2513 static kern_return_t
kdp_stackshot_record_task(struct stackshot_context * ctx,task_t task)2514 kdp_stackshot_record_task(struct stackshot_context *ctx, task_t task)
2515 {
2516 	boolean_t active_kthreads_only_p  = ((ctx->trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
2517 	boolean_t save_donating_pids_p    = ((ctx->trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
2518 	boolean_t collect_delta_stackshot = ((ctx->trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2519 	boolean_t save_owner_info         = ((ctx->trace_flags & STACKSHOT_THREAD_WAITINFO) != 0);
2520 
2521 	kern_return_t error = KERN_SUCCESS;
2522 	mach_vm_address_t out_addr = 0;
2523 	int saved_count = 0;
2524 
2525 	int task_pid                   = 0;
2526 	uint64_t task_uniqueid         = 0;
2527 	int num_delta_thread_snapshots = 0;
2528 	int num_waitinfo_threads       = 0;
2529 	int num_turnstileinfo_threads  = 0;
2530 
2531 	uint64_t task_start_abstime    = 0;
2532 	boolean_t have_map = FALSE, have_pmap = FALSE;
2533 	boolean_t some_thread_ran = FALSE;
2534 	unaligned_u64 task_snap_ss_flags = 0;
2535 
2536 #if STACKSHOT_COLLECTS_LATENCY_INFO
2537 	struct stackshot_latency_task latency_info;
2538 	latency_info.setup_latency = mach_absolute_time();
2539 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2540 
2541 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2542 	uint64_t task_begin_cpu_cycle_count = 0;
2543 	if (!panic_stackshot) {
2544 		task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
2545 	}
2546 #endif
2547 
2548 	if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
2549 		error = KERN_FAILURE;
2550 		goto error_exit;
2551 	}
2552 
2553 	boolean_t task_in_teardown        = (task->bsd_info == NULL) || proc_in_teardown(task->bsd_info);// has P_LPEXIT set during proc_exit()
2554 	boolean_t task_in_transition      = task_in_teardown;         // here we can add other types of transition.
2555 	uint32_t  container_type          = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
2556 	uint32_t  transition_type         = (task_in_teardown) ? kTaskIsTerminated : 0;
2557 
2558 	if (task_in_transition) {
2559 		collect_delta_stackshot = FALSE;
2560 	}
2561 
2562 	have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
2563 	have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
2564 
2565 	task_pid = pid_from_task(task);
2566 	/* Is returning -1 ok for terminating task ok ??? */
2567 	task_uniqueid = get_task_uniqueid(task);
2568 
2569 	if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
2570 		/*
2571 		 * Not interested in terminated tasks without threads.
2572 		 */
2573 		if (queue_empty(&task->threads) || task_pid == -1) {
2574 			return KERN_SUCCESS;
2575 		}
2576 	}
2577 
2578 	/* All PIDs should have the MSB unset */
2579 	assert((task_pid & (1ULL << 31)) == 0);
2580 
2581 #if STACKSHOT_COLLECTS_LATENCY_INFO
2582 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
2583 	latency_info.task_uniqueid = task_uniqueid;
2584 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2585 
2586 	/* Trace everything, unless a process was specified */
2587 	if ((ctx->pid == -1) || (ctx->pid == task_pid)) {
2588 		/* add task snapshot marker */
2589 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
2590 		    container_type, task_uniqueid));
2591 
2592 		if (collect_delta_stackshot) {
2593 			/*
2594 			 * For delta stackshots we need to know if a thread from this task has run since the
2595 			 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
2596 			 */
2597 			thread_t thread = THREAD_NULL;
2598 			queue_iterate(&task->threads, thread, thread_t, task_threads)
2599 			{
2600 				if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
2601 					error = KERN_FAILURE;
2602 					goto error_exit;
2603 				}
2604 
2605 				if (active_kthreads_only_p && thread->kernel_stack == 0) {
2606 					continue;
2607 				}
2608 
2609 				boolean_t thread_on_core;
2610 				enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
2611 
2612 				switch (thread_classification) {
2613 				case tc_full_snapshot:
2614 					some_thread_ran = TRUE;
2615 					break;
2616 				case tc_delta_snapshot:
2617 					num_delta_thread_snapshots++;
2618 					break;
2619 				}
2620 			}
2621 		}
2622 
2623 		if (collect_delta_stackshot) {
2624 			proc_starttime_kdp(task->bsd_info, NULL, NULL, &task_start_abstime);
2625 		}
2626 
2627 		/* Next record any relevant UUID info and store the task snapshot */
2628 		if (task_in_transition ||
2629 		    !collect_delta_stackshot ||
2630 		    (task_start_abstime == 0) ||
2631 		    (task_start_abstime > stack_snapshot_delta_since_timestamp) ||
2632 		    some_thread_ran) {
2633 			/*
2634 			 * Collect full task information in these scenarios:
2635 			 *
2636 			 * 1) a full stackshot or the task is in transition
2637 			 * 2) a delta stackshot where the task started after the previous full stackshot
2638 			 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
2639 			 *
2640 			 * because the task may have exec'ed, changing its name, architecture, load info, etc
2641 			 */
2642 
2643 			kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
2644 			kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, &task_snap_ss_flags));
2645 #if STACKSHOT_COLLECTS_LATENCY_INFO
2646 			if (!task_in_transition) {
2647 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags, &latency_info));
2648 			} else {
2649 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
2650 			}
2651 #else
2652 			if (!task_in_transition) {
2653 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
2654 			} else {
2655 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
2656 			}
2657 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2658 		} else {
2659 			kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
2660 		}
2661 
2662 #if STACKSHOT_COLLECTS_LATENCY_INFO
2663 		latency_info.misc_latency = mach_absolute_time();
2664 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2665 
2666 		struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
2667 		int current_delta_snapshot_index                  = 0;
2668 		if (num_delta_thread_snapshots > 0) {
2669 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
2670 			    sizeof(struct thread_delta_snapshot_v3),
2671 			    num_delta_thread_snapshots, &out_addr));
2672 			delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
2673 		}
2674 
2675 
2676 #if STACKSHOT_COLLECTS_LATENCY_INFO
2677 		latency_info.task_thread_count_loop_latency = mach_absolute_time();
2678 #endif
2679 		/*
2680 		 * Iterate over the task threads to save thread snapshots and determine
2681 		 * how much space we need for waitinfo and turnstile info
2682 		 */
2683 		thread_t thread = THREAD_NULL;
2684 		queue_iterate(&task->threads, thread, thread_t, task_threads)
2685 		{
2686 			if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
2687 				error = KERN_FAILURE;
2688 				goto error_exit;
2689 			}
2690 
2691 			uint64_t thread_uniqueid;
2692 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
2693 				continue;
2694 			}
2695 			thread_uniqueid = thread_tid(thread);
2696 
2697 			boolean_t thread_on_core;
2698 			enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
2699 
2700 			switch (thread_classification) {
2701 			case tc_full_snapshot:
2702 				/* add thread marker */
2703 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
2704 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
2705 
2706 				/* thread snapshot can be large, including strings, avoid overflowing the stack. */
2707 				kcdata_compression_window_open(stackshot_kcdata_p);
2708 
2709 				kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, ctx->trace_flags, have_pmap, thread_on_core));
2710 
2711 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
2712 
2713 				/* mark end of thread snapshot data */
2714 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
2715 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
2716 				break;
2717 			case tc_delta_snapshot:
2718 				kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
2719 				break;
2720 			}
2721 
2722 			/*
2723 			 * We want to report owner information regardless of whether a thread
2724 			 * has changed since the last delta, whether it's a normal stackshot,
2725 			 * or whether it's nonrunnable
2726 			 */
2727 			if (save_owner_info) {
2728 				if (stackshot_thread_has_valid_waitinfo(thread)) {
2729 					num_waitinfo_threads++;
2730 				}
2731 
2732 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
2733 					num_turnstileinfo_threads++;
2734 				}
2735 			}
2736 		}
2737 #if STACKSHOT_COLLECTS_LATENCY_INFO
2738 		latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
2739 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2740 
2741 
2742 		thread_waitinfo_v2_t *thread_waitinfo           = NULL;
2743 		thread_turnstileinfo_v2_t *thread_turnstileinfo = NULL;
2744 		int current_waitinfo_index              = 0;
2745 		int current_turnstileinfo_index         = 0;
2746 		/* allocate space for the wait and turnstil info */
2747 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
2748 			/* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
2749 			kcdata_compression_window_open(stackshot_kcdata_p);
2750 
2751 			if (num_waitinfo_threads > 0) {
2752 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
2753 				    sizeof(thread_waitinfo_v2_t), num_waitinfo_threads, &out_addr));
2754 				thread_waitinfo = (thread_waitinfo_v2_t *)out_addr;
2755 			}
2756 
2757 			if (num_turnstileinfo_threads > 0) {
2758 				/* get space for the turnstile info */
2759 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
2760 				    sizeof(thread_turnstileinfo_v2_t), num_turnstileinfo_threads, &out_addr));
2761 				thread_turnstileinfo = (thread_turnstileinfo_v2_t *)out_addr;
2762 			}
2763 
2764 			stackshot_plh_resetgen();  // so we know which portlabel_ids are referenced
2765 		}
2766 
2767 #if STACKSHOT_COLLECTS_LATENCY_INFO
2768 		latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
2769 		latency_info.task_thread_data_loop_latency = mach_absolute_time();
2770 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2771 
2772 		/* Iterate over the task's threads to save the wait and turnstile info */
2773 		queue_iterate(&task->threads, thread, thread_t, task_threads)
2774 		{
2775 			uint64_t thread_uniqueid;
2776 
2777 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
2778 				continue;
2779 			}
2780 
2781 			thread_uniqueid = thread_tid(thread);
2782 
2783 			/* If we want owner info, we should capture it regardless of its classification */
2784 			if (save_owner_info) {
2785 				if (stackshot_thread_has_valid_waitinfo(thread)) {
2786 					stackshot_thread_wait_owner_info(
2787 						thread,
2788 						&thread_waitinfo[current_waitinfo_index++]);
2789 				}
2790 
2791 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
2792 					stackshot_thread_turnstileinfo(
2793 						thread,
2794 						&thread_turnstileinfo[current_turnstileinfo_index++]);
2795 				}
2796 			}
2797 		}
2798 
2799 #if STACKSHOT_COLLECTS_LATENCY_INFO
2800 		latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
2801 		latency_info.misc2_latency = mach_absolute_time();
2802 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2803 
2804 #if DEBUG || DEVELOPMENT
2805 		if (current_delta_snapshot_index != num_delta_thread_snapshots) {
2806 			panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
2807 			    num_delta_thread_snapshots, current_delta_snapshot_index);
2808 		}
2809 		if (current_waitinfo_index != num_waitinfo_threads) {
2810 			panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
2811 			    num_waitinfo_threads, current_waitinfo_index);
2812 		}
2813 #endif
2814 
2815 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
2816 			kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
2817 			// now, record the portlabel hashes.
2818 			kcd_exit_on_error(kdp_stackshot_plh_record());
2819 		}
2820 
2821 #if IMPORTANCE_INHERITANCE
2822 		if (save_donating_pids_p) {
2823 			kcd_exit_on_error(
2824 				((((mach_vm_address_t)kcd_end_address(stackshot_kcdata_p) + (TASK_IMP_WALK_LIMIT * sizeof(int32_t))) <
2825 				(mach_vm_address_t)kcd_max_address(stackshot_kcdata_p))
2826 				? KERN_SUCCESS
2827 				: KERN_RESOURCE_SHORTAGE));
2828 			saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
2829 			    (void *)kcd_end_address(stackshot_kcdata_p), TASK_IMP_WALK_LIMIT);
2830 			if (saved_count > 0) {
2831 				/* Variable size array - better not have it on the stack. */
2832 				kcdata_compression_window_open(stackshot_kcdata_p);
2833 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
2834 				    sizeof(int32_t), saved_count, &out_addr));
2835 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
2836 			}
2837 		}
2838 #endif
2839 
2840 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2841 		if (!panic_stackshot) {
2842 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
2843 			    "task_cpu_cycle_count"));
2844 		}
2845 #endif
2846 
2847 #if STACKSHOT_COLLECTS_LATENCY_INFO
2848 		latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
2849 		if (collect_latency_info) {
2850 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
2851 		}
2852 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2853 
2854 		/* mark end of task snapshot data */
2855 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
2856 		    task_uniqueid));
2857 	}
2858 
2859 
2860 error_exit:
2861 	return error;
2862 }
2863 
2864 
2865 static kern_return_t
kdp_stackshot_kcdata_format(int pid,uint64_t trace_flags,uint32_t * pBytesTraced,uint32_t * pBytesUncompressed)2866 kdp_stackshot_kcdata_format(int pid, uint64_t trace_flags, uint32_t * pBytesTraced, uint32_t * pBytesUncompressed)
2867 {
2868 	kern_return_t error        = KERN_SUCCESS;
2869 	mach_vm_address_t out_addr = 0;
2870 	uint64_t abs_time = 0, abs_time_end = 0;
2871 	uint64_t system_state_flags = 0;
2872 	task_t task = TASK_NULL;
2873 	mach_timebase_info_data_t timebase = {0, 0};
2874 	uint32_t length_to_copy = 0, tmp32 = 0;
2875 	abs_time = mach_absolute_time();
2876 	uint64_t last_task_start_time = 0;
2877 
2878 #if STACKSHOT_COLLECTS_LATENCY_INFO
2879 	struct stackshot_latency_collection latency_info;
2880 #endif
2881 
2882 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2883 	uint64_t stackshot_begin_cpu_cycle_count = 0;
2884 
2885 	if (!panic_stackshot) {
2886 		stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
2887 	}
2888 #endif
2889 
2890 #if STACKSHOT_COLLECTS_LATENCY_INFO
2891 	collect_latency_info = trace_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
2892 #endif
2893 
2894 	/* process the flags */
2895 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2896 	boolean_t use_fault_path          = ((trace_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
2897 	stack_enable_faulting = (trace_flags & (STACKSHOT_ENABLE_BT_FAULTING));
2898 
2899 	/* Currently we only support returning explicit KEXT load info on fileset kernels */
2900 	kc_format_t primary_kc_type = KCFormatUnknown;
2901 	if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
2902 		trace_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
2903 	}
2904 
2905 	struct stackshot_context ctx = {};
2906 	ctx.trace_flags = trace_flags;
2907 	ctx.pid = pid;
2908 
2909 	if (use_fault_path) {
2910 		fault_stats.sfs_pages_faulted_in = 0;
2911 		fault_stats.sfs_time_spent_faulting = 0;
2912 		fault_stats.sfs_stopped_faulting = (uint8_t) FALSE;
2913 	}
2914 
2915 	if (sizeof(void *) == 8) {
2916 		system_state_flags |= kKernel64_p;
2917 	}
2918 
2919 	if (stackshot_kcdata_p == NULL || pBytesTraced == NULL) {
2920 		error = KERN_INVALID_ARGUMENT;
2921 		goto error_exit;
2922 	}
2923 
2924 	_stackshot_validation_reset();
2925 	stackshot_plh_setup(stackshot_kcdata_p); /* set up port label hash */
2926 
2927 	/* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
2928 	clock_timebase_info(&timebase);
2929 
2930 	/* begin saving data into the buffer */
2931 	*pBytesTraced = 0;
2932 	if (pBytesUncompressed) {
2933 		*pBytesUncompressed = 0;
2934 	}
2935 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, trace_flags, "stackshot_in_flags"));
2936 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)pid, "stackshot_in_pid"));
2937 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
2938 	if (trace_flags & STACKSHOT_PAGE_TABLES) {
2939 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stack_snapshot_pagetable_mask, "stackshot_pagetable_mask"));
2940 	}
2941 	if (stackshot_initial_estimate != 0) {
2942 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
2943 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
2944 	}
2945 
2946 #if STACKSHOT_COLLECTS_LATENCY_INFO
2947 	latency_info.setup_latency = mach_absolute_time();
2948 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2949 
2950 #if CONFIG_JETSAM
2951 	tmp32 = memorystatus_get_pressure_status_kdp();
2952 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
2953 #endif
2954 
2955 	if (!collect_delta_stackshot) {
2956 		tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
2957 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
2958 
2959 		tmp32 = PAGE_SIZE;
2960 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
2961 
2962 		/* save boot-args and osversion string */
2963 		length_to_copy =  MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
2964 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
2965 
2966 
2967 		length_to_copy =  MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
2968 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
2969 
2970 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
2971 	} else {
2972 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stack_snapshot_delta_since_timestamp));
2973 	}
2974 
2975 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
2976 
2977 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_microsecs));
2978 
2979 	/* record system level shared cache load info (if available) */
2980 	if (!collect_delta_stackshot && primary_system_shared_region &&
2981 	    _stackshot_validate_kva((vm_offset_t)primary_system_shared_region, sizeof(struct vm_shared_region))) {
2982 		struct dyld_shared_cache_loadinfo sys_shared_cache_info = {0};
2983 
2984 		/*
2985 		 * Historically, this data was in a dyld_uuid_info_64 structure, but the
2986 		 * naming of both the structure and fields for this use isn't great.  The
2987 		 * dyld_shared_cache_loadinfo structure has better names, but the same
2988 		 * layout and content as the original.
2989 		 *
2990 		 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
2991 		 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
2992 		 * entries; here, it's the slid base address, and we leave it that way
2993 		 * for backwards compatibility.
2994 		 */
2995 		stackshot_memcpy(sys_shared_cache_info.sharedCacheUUID, &primary_system_shared_region->sr_uuid, sizeof(primary_system_shared_region->sr_uuid));
2996 		sys_shared_cache_info.sharedCacheSlide =
2997 		    primary_system_shared_region->sr_slide;
2998 		sys_shared_cache_info.sharedCacheUnreliableSlidBaseAddress =
2999 		    primary_system_shared_region->sr_slide + primary_system_shared_region->sr_base_address;
3000 		sys_shared_cache_info.sharedCacheSlidFirstMapping =
3001 		    primary_system_shared_region->sr_base_address + primary_system_shared_region->sr_first_mapping;
3002 
3003 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
3004 		    sizeof(sys_shared_cache_info), &sys_shared_cache_info));
3005 
3006 		if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
3007 			/*
3008 			 * Include a map of the system shared cache layout if it has been populated
3009 			 * (which is only when the system is using a custom shared cache).
3010 			 */
3011 			if (primary_system_shared_region->sr_images && _stackshot_validate_kva((vm_offset_t)primary_system_shared_region->sr_images,
3012 			    (primary_system_shared_region->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
3013 				assert(primary_system_shared_region->sr_images_count != 0);
3014 				kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), primary_system_shared_region->sr_images_count, primary_system_shared_region->sr_images));
3015 			}
3016 		}
3017 	}
3018 
3019 	/* Add requested information first */
3020 	if (trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
3021 		struct mem_and_io_snapshot mais = {0};
3022 		kdp_mem_and_io_snapshot(&mais);
3023 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
3024 	}
3025 
3026 #if CONFIG_THREAD_GROUPS
3027 	struct thread_group_snapshot_v3 *thread_groups = NULL;
3028 	int num_thread_groups = 0;
3029 
3030 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
3031 	uint64_t thread_group_begin_cpu_cycle_count = 0;
3032 
3033 	if (!panic_stackshot && (trace_flags & STACKSHOT_THREAD_GROUP)) {
3034 		thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3035 	}
3036 #endif
3037 
3038 
3039 	/* Iterate over thread group names */
3040 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
3041 		/* Variable size array - better not have it on the stack. */
3042 		kcdata_compression_window_open(stackshot_kcdata_p);
3043 
3044 		if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
3045 			trace_flags &= ~(STACKSHOT_THREAD_GROUP);
3046 		}
3047 
3048 		if (num_thread_groups > 0) {
3049 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v3), num_thread_groups, &out_addr));
3050 			thread_groups = (struct thread_group_snapshot_v3 *)out_addr;
3051 		}
3052 
3053 		if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
3054 			error = KERN_FAILURE;
3055 			goto error_exit;
3056 		}
3057 
3058 		kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3059 	}
3060 
3061 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
3062 	if (!panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
3063 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
3064 		    "thread_groups_cpu_cycle_count"));
3065 	}
3066 #endif
3067 #else
3068 	trace_flags &= ~(STACKSHOT_THREAD_GROUP);
3069 #endif /* CONFIG_THREAD_GROUPS */
3070 
3071 
3072 #if STACKSHOT_COLLECTS_LATENCY_INFO
3073 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
3074 	latency_info.total_task_iteration_latency = mach_absolute_time();
3075 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3076 
3077 	/* Iterate over tasks */
3078 	queue_iterate(&tasks, task, task_t, tasks)
3079 	{
3080 		if (collect_delta_stackshot) {
3081 			uint64_t abstime;
3082 			proc_starttime_kdp(task->bsd_info, NULL, NULL, &abstime);
3083 
3084 			if (abstime > last_task_start_time) {
3085 				last_task_start_time = abstime;
3086 			}
3087 		}
3088 
3089 		error = kdp_stackshot_record_task(&ctx, task);
3090 		if (error) {
3091 			goto error_exit;
3092 		}
3093 	}
3094 
3095 
3096 #if STACKSHOT_COLLECTS_LATENCY_INFO
3097 	latency_info.total_task_iteration_latency = mach_absolute_time() - latency_info.total_task_iteration_latency;
3098 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3099 
3100 #if CONFIG_COALITIONS
3101 	/* Don't collect jetsam coalition data in delta stakshots - these don't change */
3102 	if (!collect_delta_stackshot || (last_task_start_time > stack_snapshot_delta_since_timestamp)) {
3103 		int num_coalitions = 0;
3104 		struct jetsam_coalition_snapshot *coalitions = NULL;
3105 
3106 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
3107 		uint64_t coalition_begin_cpu_cycle_count = 0;
3108 
3109 		if (!panic_stackshot && (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
3110 			coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3111 		}
3112 #endif /* INTERRUPT_MASKED_DEBUG && MONOTONIC */
3113 
3114 		/* Iterate over coalitions */
3115 		if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
3116 			if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
3117 				trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
3118 			}
3119 		}
3120 		if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
3121 			if (num_coalitions > 0) {
3122 				/* Variable size array - better not have it on the stack. */
3123 				kcdata_compression_window_open(stackshot_kcdata_p);
3124 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
3125 				coalitions = (struct jetsam_coalition_snapshot*)out_addr;
3126 
3127 				if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
3128 					error = KERN_FAILURE;
3129 					goto error_exit;
3130 				}
3131 
3132 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3133 			}
3134 		}
3135 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
3136 		if (!panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
3137 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
3138 			    "coalitions_cpu_cycle_count"));
3139 		}
3140 #endif /* INTERRUPT_MASKED_DEBUG && MONOTONIC */
3141 	}
3142 #else
3143 	trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
3144 #endif /* CONFIG_COALITIONS */
3145 
3146 #if STACKSHOT_COLLECTS_LATENCY_INFO
3147 	latency_info.total_terminated_task_iteration_latency = mach_absolute_time();
3148 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3149 
3150 	/*
3151 	 * Iterate over the tasks in the terminated tasks list. We only inspect
3152 	 * tasks that have a valid bsd_info pointer. The check for task transition
3153 	 * like past P_LPEXIT during proc_exit() is now checked for inside the
3154 	 * kdp_stackshot_record_task(), and then a safer and minimal
3155 	 * transitioning_task_snapshot struct is collected via
3156 	 * kcdata_record_transitioning_task_snapshot()
3157 	 */
3158 	queue_iterate(&terminated_tasks, task, task_t, tasks)
3159 	{
3160 		error = kdp_stackshot_record_task(&ctx, task);
3161 		if (error) {
3162 			goto error_exit;
3163 		}
3164 	}
3165 #if DEVELOPMENT || DEBUG
3166 	kcd_exit_on_error(kdp_stackshot_plh_stats());
3167 #endif /* DEVELOPMENT || DEBUG */
3168 
3169 #if STACKSHOT_COLLECTS_LATENCY_INFO
3170 	latency_info.total_terminated_task_iteration_latency = mach_absolute_time() - latency_info.total_terminated_task_iteration_latency;
3171 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3172 
3173 	if (use_fault_path) {
3174 		kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
3175 		    sizeof(struct stackshot_fault_stats), &fault_stats);
3176 	}
3177 
3178 #if STACKSHOT_COLLECTS_LATENCY_INFO
3179 	if (collect_latency_info) {
3180 		latency_info.latency_version = 1;
3181 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO, sizeof(latency_info), &latency_info));
3182 	}
3183 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3184 
3185 	/* update timestamp of the stackshot */
3186 	abs_time_end = mach_absolute_time();
3187 	struct stackshot_duration_v2 stackshot_duration = {
3188 		.stackshot_duration         = (abs_time_end - abs_time),
3189 		.stackshot_duration_outer   = 0,
3190 		.stackshot_duration_prior   = stackshot_duration_prior_abs,
3191 	};
3192 
3193 	if ((trace_flags & STACKSHOT_DO_COMPRESS) == 0) {
3194 		kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
3195 		    sizeof(struct stackshot_duration_v2), &out_addr));
3196 		struct stackshot_duration_v2 *duration_p = (void *) out_addr;
3197 		stackshot_memcpy(duration_p, &stackshot_duration, sizeof(*duration_p));
3198 		stackshot_duration_outer                   = (unaligned_u64 *)&duration_p->stackshot_duration_outer;
3199 	} else {
3200 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_duration), &stackshot_duration));
3201 		stackshot_duration_outer = NULL;
3202 	}
3203 
3204 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
3205 	if (!panic_stackshot) {
3206 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
3207 		    "stackshot_total_cpu_cycle_cnt"));
3208 	}
3209 #endif
3210 
3211 	kcd_finalize_compression(stackshot_kcdata_p);
3212 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, trace_flags, "stackshot_out_flags"));
3213 
3214 	kcd_exit_on_error(kcdata_write_buffer_end(stackshot_kcdata_p));
3215 
3216 	/*  === END of populating stackshot data === */
3217 
3218 	*pBytesTraced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_kcdata_p);
3219 	*pBytesUncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_kcdata_p);
3220 
3221 error_exit:;
3222 
3223 #if INTERRUPT_MASKED_DEBUG
3224 	bool disable_interrupts_masked_check = kern_feature_override(
3225 		KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
3226 	    (trace_flags & STACKSHOT_DO_COMPRESS) != 0;
3227 
3228 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
3229 	disable_interrupts_masked_check = true;
3230 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
3231 
3232 	if (disable_interrupts_masked_check) {
3233 		ml_spin_debug_clear_self();
3234 	}
3235 
3236 	if (!panic_stackshot && interrupt_masked_debug_mode) {
3237 		/*
3238 		 * Try to catch instances where stackshot takes too long BEFORE returning from
3239 		 * the debugger
3240 		 */
3241 		ml_handle_stackshot_interrupt_disabled_duration(current_thread());
3242 	}
3243 #endif /* INTERRUPT_MASKED_DEBUG */
3244 	stackshot_plh_reset();
3245 	stack_enable_faulting = FALSE;
3246 
3247 	return error;
3248 }
3249 
3250 static uint64_t
proc_was_throttled_from_task(task_t task)3251 proc_was_throttled_from_task(task_t task)
3252 {
3253 	uint64_t was_throttled = 0;
3254 
3255 	if (task->bsd_info) {
3256 		was_throttled = proc_was_throttled(task->bsd_info);
3257 	}
3258 
3259 	return was_throttled;
3260 }
3261 
3262 static uint64_t
proc_did_throttle_from_task(task_t task)3263 proc_did_throttle_from_task(task_t task)
3264 {
3265 	uint64_t did_throttle = 0;
3266 
3267 	if (task->bsd_info) {
3268 		did_throttle = proc_did_throttle(task->bsd_info);
3269 	}
3270 
3271 	return did_throttle;
3272 }
3273 
3274 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)3275 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
3276 {
3277 	unsigned int pages_reclaimed;
3278 	unsigned int pages_wanted;
3279 	kern_return_t kErr;
3280 
3281 	uint64_t compressions = 0;
3282 	uint64_t decompressions = 0;
3283 
3284 	compressions = counter_load(&vm_statistics_compressions);
3285 	decompressions = counter_load(&vm_statistics_decompressions);
3286 
3287 	memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
3288 	memio_snap->free_pages = vm_page_free_count;
3289 	memio_snap->active_pages = vm_page_active_count;
3290 	memio_snap->inactive_pages = vm_page_inactive_count;
3291 	memio_snap->purgeable_pages = vm_page_purgeable_count;
3292 	memio_snap->wired_pages = vm_page_wire_count;
3293 	memio_snap->speculative_pages = vm_page_speculative_count;
3294 	memio_snap->throttled_pages = vm_page_throttled_count;
3295 	memio_snap->busy_buffer_count = count_busy_buffers();
3296 	memio_snap->filebacked_pages = vm_page_pageable_external_count;
3297 	memio_snap->compressions = (uint32_t)compressions;
3298 	memio_snap->decompressions = (uint32_t)decompressions;
3299 	memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
3300 	kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
3301 
3302 	if (!kErr) {
3303 		memio_snap->pages_wanted = (uint32_t)pages_wanted;
3304 		memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
3305 		memio_snap->pages_wanted_reclaimed_valid = 1;
3306 	} else {
3307 		memio_snap->pages_wanted = 0;
3308 		memio_snap->pages_reclaimed = 0;
3309 		memio_snap->pages_wanted_reclaimed_valid = 0;
3310 	}
3311 }
3312 
3313 void
stackshot_memcpy(void * dst,const void * src,size_t len)3314 stackshot_memcpy(void *dst, const void *src, size_t len)
3315 {
3316 #if defined(__arm__) || defined(__arm64__)
3317 	if (panic_stackshot) {
3318 		uint8_t *dest_bytes = (uint8_t *)dst;
3319 		const uint8_t *src_bytes = (const uint8_t *)src;
3320 		for (size_t i = 0; i < len; i++) {
3321 			dest_bytes[i] = src_bytes[i];
3322 		}
3323 	} else
3324 #endif
3325 	memcpy(dst, src, len);
3326 }
3327 
3328 static long
_stackshot_strlen(const char * s,size_t maxlen)3329 _stackshot_strlen(const char *s, size_t maxlen)
3330 {
3331 	size_t len = 0;
3332 	for (len = 0; _stackshot_validate_kva((vm_offset_t)s, 1); len++, s++) {
3333 		if (*s == 0) {
3334 			return len;
3335 		}
3336 		if (len >= maxlen) {
3337 			return -1;
3338 		}
3339 	}
3340 	return -1; /* failed before end of string */
3341 }
3342 static size_t
_stackshot_strlcpy(char * dst,const char * src,size_t maxlen)3343 _stackshot_strlcpy(char *dst, const char *src, size_t maxlen)
3344 {
3345 	const size_t srclen = strlen(src);
3346 
3347 	if (srclen < maxlen) {
3348 		stackshot_memcpy(dst, src, srclen + 1);
3349 	} else if (maxlen != 0) {
3350 		stackshot_memcpy(dst, src, maxlen - 1);
3351 		dst[maxlen - 1] = '\0';
3352 	}
3353 
3354 	return srclen;
3355 }
3356 
3357 /*
3358  * Sets the appropriate page mask and size to use for dealing with pages --
3359  * it's important that this is a "min" of page size to account for both K16/U4
3360  * (Rosetta) and K4/U16 (armv7k) environments.
3361  */
3362 static inline size_t
_stackshot_get_page_size(vm_map_t map,size_t * effective_page_mask)3363 _stackshot_get_page_size(vm_map_t map, size_t *effective_page_mask)
3364 {
3365 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
3366 		*effective_page_mask = VM_MAP_PAGE_MASK(map);
3367 		return VM_MAP_PAGE_SIZE(map);
3368 	} else {
3369 		*effective_page_mask = PAGE_MASK;
3370 		return PAGE_SIZE;
3371 	}
3372 }
3373 
3374 /*
3375  * Returns the physical address of the specified map:target address,
3376  * using the kdp fault path if requested and the page is not resident.
3377  */
3378 vm_offset_t
kdp_find_phys(vm_map_t map,vm_offset_t target_addr,boolean_t try_fault,uint32_t * kdp_fault_results)3379 kdp_find_phys(vm_map_t map, vm_offset_t target_addr, boolean_t try_fault, uint32_t *kdp_fault_results)
3380 {
3381 	vm_offset_t cur_phys_addr;
3382 
3383 	if (map == VM_MAP_NULL) {
3384 		return 0;
3385 	}
3386 
3387 	cur_phys_addr = kdp_vtophys(map->pmap, target_addr);
3388 	if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
3389 		if (!try_fault || fault_stats.sfs_stopped_faulting) {
3390 			if (kdp_fault_results) {
3391 				*kdp_fault_results |= KDP_FAULT_RESULT_PAGED_OUT;
3392 			}
3393 
3394 			return 0;
3395 		}
3396 
3397 		/*
3398 		 * The pmap doesn't have a valid page so we start at the top level
3399 		 * vm map and try a lightweight fault. Update fault path usage stats.
3400 		 */
3401 		uint64_t fault_start_time = mach_absolute_time();
3402 		size_t effective_page_mask;
3403 		(void)_stackshot_get_page_size(map, &effective_page_mask);
3404 
3405 		cur_phys_addr = kdp_lightweight_fault(map, (target_addr & ~effective_page_mask));
3406 		fault_stats.sfs_time_spent_faulting += (mach_absolute_time() - fault_start_time);
3407 
3408 		if ((fault_stats.sfs_time_spent_faulting >= fault_stats.sfs_system_max_fault_time) && !panic_stackshot) {
3409 			fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
3410 		}
3411 
3412 		cur_phys_addr += (target_addr & effective_page_mask);
3413 
3414 		if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
3415 			if (kdp_fault_results) {
3416 				*kdp_fault_results |= (KDP_FAULT_RESULT_TRIED_FAULT | KDP_FAULT_RESULT_PAGED_OUT);
3417 			}
3418 
3419 			return 0;
3420 		}
3421 
3422 		if (kdp_fault_results) {
3423 			*kdp_fault_results |= KDP_FAULT_RESULT_FAULTED_IN;
3424 		}
3425 
3426 		fault_stats.sfs_pages_faulted_in++;
3427 	} else {
3428 		/*
3429 		 * This check is done in kdp_lightweight_fault for the fault path.
3430 		 */
3431 		unsigned int cur_wimg_bits = pmap_cache_attributes((ppnum_t) atop(cur_phys_addr));
3432 
3433 		if ((cur_wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
3434 			return 0;
3435 		}
3436 	}
3437 
3438 	return cur_phys_addr;
3439 }
3440 
3441 boolean_t
kdp_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,uint32_t * kdp_fault_results)3442 kdp_copyin_word(
3443 	task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results)
3444 {
3445 	if (task_has_64Bit_addr(task)) {
3446 		return kdp_copyin(task->map, addr, result, sizeof(uint64_t), try_fault, kdp_fault_results);
3447 	} else {
3448 		uint32_t buf;
3449 		boolean_t r = kdp_copyin(task->map, addr, &buf, sizeof(uint32_t), try_fault, kdp_fault_results);
3450 		*result = buf;
3451 		return r;
3452 	}
3453 }
3454 
3455 static int
kdp_copyin_string_slowpath(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,uint32_t * kdp_fault_results)3456 kdp_copyin_string_slowpath(
3457 	task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results)
3458 {
3459 	int i;
3460 	uint64_t validated = 0, valid_from;
3461 	uint64_t phys_src, phys_dest;
3462 	vm_map_t map = task->map;
3463 	size_t effective_page_mask;
3464 	size_t effective_page_size = _stackshot_get_page_size(map, &effective_page_mask);
3465 
3466 	for (i = 0; i < buf_sz; i++) {
3467 		if (validated == 0) {
3468 			valid_from = i;
3469 			phys_src = kdp_find_phys(map, addr + i, try_fault, kdp_fault_results);
3470 			phys_dest = kvtophys((vm_offset_t)&buf[i]);
3471 			uint64_t src_rem = effective_page_size - (phys_src & effective_page_mask);
3472 			uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
3473 			if (phys_src && phys_dest) {
3474 				validated = MIN(src_rem, dst_rem);
3475 				if (validated) {
3476 					bcopy_phys(phys_src, phys_dest, 1);
3477 					validated--;
3478 				} else {
3479 					return 0;
3480 				}
3481 			} else {
3482 				return 0;
3483 			}
3484 		} else {
3485 			bcopy_phys(phys_src + (i - valid_from), phys_dest + (i - valid_from), 1);
3486 			validated--;
3487 		}
3488 
3489 		if (buf[i] == '\0') {
3490 			return i + 1;
3491 		}
3492 	}
3493 
3494 	/* ran out of space */
3495 	return -1;
3496 }
3497 
3498 int
kdp_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,uint32_t * kdp_fault_results)3499 kdp_copyin_string(
3500 	task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results)
3501 {
3502 	/* try to opportunistically copyin 32 bytes, most strings should fit */
3503 	char optbuffer[32];
3504 	boolean_t res;
3505 
3506 	bzero(optbuffer, sizeof(optbuffer));
3507 	res = kdp_copyin(task->map, addr, optbuffer, sizeof(optbuffer), try_fault, kdp_fault_results);
3508 	if (res == FALSE || strnlen(optbuffer, sizeof(optbuffer)) == sizeof(optbuffer)) {
3509 		/* try the slowpath */
3510 		return kdp_copyin_string_slowpath(task, addr, buf, buf_sz, try_fault, kdp_fault_results);
3511 	}
3512 
3513 	/* success */
3514 	return (int) strlcpy(buf, optbuffer, buf_sz) + 1;
3515 }
3516 
3517 boolean_t
kdp_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,uint32_t * kdp_fault_results)3518 kdp_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_results)
3519 {
3520 	size_t rem = size;
3521 	char *kvaddr = dest;
3522 	size_t effective_page_mask;
3523 	size_t effective_page_size = _stackshot_get_page_size(map, &effective_page_mask);
3524 
3525 #if defined(__arm__) || defined(__arm64__)
3526 	/* Identify if destination buffer is in panic storage area */
3527 	if (panic_stackshot && ((vm_offset_t)dest >= gPanicBase) && ((vm_offset_t)dest < (gPanicBase + gPanicSize))) {
3528 		if (((vm_offset_t)dest + size) > (gPanicBase + gPanicSize)) {
3529 			return FALSE;
3530 		}
3531 	}
3532 #endif
3533 
3534 	while (rem) {
3535 		uint64_t phys_src = kdp_find_phys(map, uaddr, try_fault, kdp_fault_results);
3536 		uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr);
3537 		uint64_t src_rem = effective_page_size - (phys_src & effective_page_mask);
3538 		uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
3539 		size_t cur_size = (uint32_t) MIN(src_rem, dst_rem);
3540 		cur_size = MIN(cur_size, rem);
3541 
3542 		if (phys_src && phys_dest) {
3543 #if defined(__arm__) || defined(__arm64__)
3544 			/*
3545 			 * On arm devices the panic buffer is mapped as device memory and doesn't allow
3546 			 * unaligned accesses. To prevent these, we copy over bytes individually here.
3547 			 */
3548 			if (panic_stackshot) {
3549 				stackshot_memcpy(kvaddr, (const void *)phystokv(phys_src), cur_size);
3550 			} else
3551 #endif /* defined(__arm__) || defined(__arm64__) */
3552 			bcopy_phys(phys_src, phys_dest, cur_size);
3553 		} else {
3554 			break;
3555 		}
3556 
3557 		uaddr += cur_size;
3558 		kvaddr += cur_size;
3559 		rem -= cur_size;
3560 	}
3561 
3562 	return rem == 0;
3563 }
3564 
3565 kern_return_t
do_stackshot(void * context)3566 do_stackshot(void *context)
3567 {
3568 #pragma unused(context)
3569 	kdp_snapshot++;
3570 
3571 	stack_snapshot_ret = kdp_stackshot_kcdata_format(stack_snapshot_pid,
3572 	    stack_snapshot_flags,
3573 	    &stack_snapshot_bytes_traced,
3574 	    &stack_snapshot_bytes_uncompressed);
3575 
3576 	if (stack_snapshot_ret == KERN_SUCCESS) {
3577 		/* releases and zeros and kcdata_end_alloc()s done */
3578 		kcdata_finish(stackshot_kcdata_p);
3579 	}
3580 
3581 	kdp_snapshot--;
3582 	return stack_snapshot_ret;
3583 }
3584 
3585 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)3586 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
3587 {
3588 	/* When the pthread kext puts a worker thread to sleep, it will
3589 	 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
3590 	 * struct. See parkit() in kern/kern_support.c in libpthread.
3591 	 */
3592 	return (thread->state & TH_WAIT) &&
3593 	       (thread->block_hint == kThreadWaitParkedWorkQueue);
3594 }
3595 
3596 #if CONFIG_COALITIONS
3597 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)3598 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
3599 {
3600 #pragma unused(i, coal)
3601 	unsigned int *coalition_count = (unsigned int*)arg;
3602 	(*coalition_count)++;
3603 }
3604 
3605 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)3606 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
3607 {
3608 	if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
3609 		return;
3610 	}
3611 
3612 	struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
3613 	struct jetsam_coalition_snapshot *jcs = &coalitions[i];
3614 	task_t leader = TASK_NULL;
3615 	jcs->jcs_id = coalition_id(coal);
3616 	jcs->jcs_flags = 0;
3617 	jcs->jcs_thread_group = 0;
3618 
3619 	if (coalition_term_requested(coal)) {
3620 		jcs->jcs_flags |= kCoalitionTermRequested;
3621 	}
3622 	if (coalition_is_terminated(coal)) {
3623 		jcs->jcs_flags |= kCoalitionTerminated;
3624 	}
3625 	if (coalition_is_reaped(coal)) {
3626 		jcs->jcs_flags |= kCoalitionReaped;
3627 	}
3628 	if (coalition_is_privileged(coal)) {
3629 		jcs->jcs_flags |= kCoalitionPrivileged;
3630 	}
3631 
3632 #if CONFIG_THREAD_GROUPS
3633 	struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
3634 	if (thread_group) {
3635 		jcs->jcs_thread_group = thread_group_get_id(thread_group);
3636 	}
3637 #endif /* CONFIG_THREAD_GROUPS */
3638 
3639 	leader = kdp_coalition_get_leader(coal);
3640 	if (leader) {
3641 		jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
3642 	} else {
3643 		jcs->jcs_leader_task_uniqueid = 0;
3644 	}
3645 }
3646 #endif /* CONFIG_COALITIONS */
3647 
3648 #if CONFIG_THREAD_GROUPS
3649 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)3650 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
3651 {
3652 #pragma unused(i, tg)
3653 	unsigned int *n = (unsigned int*)arg;
3654 	(*n)++;
3655 }
3656 
3657 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)3658 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
3659 {
3660 	struct thread_group_snapshot_v3 *thread_groups = arg;
3661 	struct thread_group_snapshot_v3 *tgs = &thread_groups[i];
3662 	const char *name = thread_group_get_name(tg);
3663 	uint32_t flags = thread_group_get_flags(tg);
3664 	tgs->tgs_id = thread_group_get_id(tg);
3665 	static_assert(THREAD_GROUP_MAXNAME > sizeof(tgs->tgs_name));
3666 	stackshot_memcpy(tgs->tgs_name, name, sizeof(tgs->tgs_name));
3667 	stackshot_memcpy(tgs->tgs_name_cont, name + sizeof(tgs->tgs_name),
3668 	    sizeof(tgs->tgs_name_cont));
3669 	tgs->tgs_flags = ((flags & THREAD_GROUP_FLAGS_EFFICIENT) ? kThreadGroupEfficient : 0) |
3670 	    ((flags & THREAD_GROUP_FLAGS_UI_APP) ? kThreadGroupUIApp : 0);
3671 }
3672 #endif /* CONFIG_THREAD_GROUPS */
3673 
3674 /* Determine if a thread has waitinfo that stackshot can provide */
3675 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)3676 stackshot_thread_has_valid_waitinfo(thread_t thread)
3677 {
3678 	if (!(thread->state & TH_WAIT)) {
3679 		return 0;
3680 	}
3681 
3682 	switch (thread->block_hint) {
3683 	// If set to None or is a parked work queue, ignore it
3684 	case kThreadWaitParkedWorkQueue:
3685 	case kThreadWaitNone:
3686 		return 0;
3687 	// There is a short window where the pthread kext removes a thread
3688 	// from its ksyn wait queue before waking the thread up
3689 	case kThreadWaitPThreadMutex:
3690 	case kThreadWaitPThreadRWLockRead:
3691 	case kThreadWaitPThreadRWLockWrite:
3692 	case kThreadWaitPThreadCondVar:
3693 		return kdp_pthread_get_thread_kwq(thread) != NULL;
3694 	// All other cases are valid block hints if in a wait state
3695 	default:
3696 		return 1;
3697 	}
3698 }
3699 
3700 /* Determine if a thread has turnstileinfo that stackshot can provide */
3701 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)3702 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
3703 {
3704 	struct turnstile *ts = thread_get_waiting_turnstile(thread);
3705 
3706 	return stackshot_thread_has_valid_waitinfo(thread) &&
3707 	       ts != TURNSTILE_NULL;
3708 }
3709 
3710 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_v2_t * tsinfo)3711 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo)
3712 {
3713 	struct turnstile *ts;
3714 	struct ipc_service_port_label *ispl = NULL;
3715 
3716 	/* acquire turnstile information and store it in the stackshot */
3717 	ts = thread_get_waiting_turnstile(thread);
3718 	tsinfo->waiter = thread_tid(thread);
3719 	kdp_turnstile_fill_tsinfo(ts, tsinfo, &ispl);
3720 	tsinfo->portlabel_id = stackshot_plh_lookup(ispl,
3721 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_SENDPORT) ? STACKSHOT_PLH_LOOKUP_SEND :
3722 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_RECEIVEPORT) ? STACKSHOT_PLH_LOOKUP_RECEIVE :
3723 	    STACKSHOT_PLH_LOOKUP_UNKNOWN);
3724 }
3725 
3726 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_v2_t * waitinfo)3727 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t *waitinfo)
3728 {
3729 	thread_waitinfo_t *waitinfo_v1 = (thread_waitinfo_t *)waitinfo;
3730 	struct ipc_service_port_label *ispl = NULL;
3731 
3732 	waitinfo->waiter        = thread_tid(thread);
3733 	waitinfo->wait_type     = thread->block_hint;
3734 	waitinfo->wait_flags    = 0;
3735 
3736 	switch (waitinfo->wait_type) {
3737 	case kThreadWaitKernelMutex:
3738 		kdp_lck_mtx_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
3739 		break;
3740 	case kThreadWaitPortReceive:
3741 		kdp_mqueue_recv_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
3742 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_RECEIVE);
3743 		break;
3744 	case kThreadWaitPortSend:
3745 		kdp_mqueue_send_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
3746 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_SEND);
3747 		break;
3748 	case kThreadWaitSemaphore:
3749 		kdp_sema_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
3750 		break;
3751 	case kThreadWaitUserLock:
3752 		kdp_ulock_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
3753 		break;
3754 	case kThreadWaitKernelRWLockRead:
3755 	case kThreadWaitKernelRWLockWrite:
3756 	case kThreadWaitKernelRWLockUpgrade:
3757 		kdp_rwlck_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
3758 		break;
3759 	case kThreadWaitPThreadMutex:
3760 	case kThreadWaitPThreadRWLockRead:
3761 	case kThreadWaitPThreadRWLockWrite:
3762 	case kThreadWaitPThreadCondVar:
3763 		kdp_pthread_find_owner(thread, waitinfo_v1);
3764 		break;
3765 	case kThreadWaitWorkloopSyncWait:
3766 		kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo_v1);
3767 		break;
3768 	case kThreadWaitOnProcess:
3769 		kdp_wait4_find_process(thread, thread->wait_event, waitinfo_v1);
3770 		break;
3771 	case kThreadWaitSleepWithInheritor:
3772 		kdp_sleep_with_inheritor_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
3773 		break;
3774 	case kThreadWaitEventlink:
3775 		kdp_eventlink_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
3776 		break;
3777 	case kThreadWaitCompressor:
3778 		kdp_compressor_busy_find_owner(thread->wait_event, waitinfo_v1);
3779 		break;
3780 	default:
3781 		waitinfo->owner = 0;
3782 		waitinfo->context = 0;
3783 		break;
3784 	}
3785 }
3786