xref: /xnu-10063.121.3/osfmk/kern/kern_stackshot.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/vm_param.h>
31 #include <mach/mach_vm.h>
32 #include <mach/clock_types.h>
33 #include <sys/code_signing.h>
34 #include <sys/errno.h>
35 #include <sys/stackshot.h>
36 #ifdef IMPORTANCE_INHERITANCE
37 #include <ipc/ipc_importance.h>
38 #endif
39 #include <sys/appleapiopts.h>
40 #include <kern/debug.h>
41 #include <kern/block_hint.h>
42 #include <uuid/uuid.h>
43 
44 #include <kdp/kdp_dyld.h>
45 #include <kdp/kdp_en_debugger.h>
46 #include <kdp/processor_core.h>
47 #include <kdp/kdp_common.h>
48 
49 #include <libsa/types.h>
50 #include <libkern/version.h>
51 #include <libkern/section_keywords.h>
52 
53 #include <string.h> /* bcopy */
54 
55 #include <kern/backtrace.h>
56 #include <kern/coalition.h>
57 #include <kern/exclaves_stackshot.h>
58 #include <kern/exclaves_inspection.h>
59 #include <kern/processor.h>
60 #include <kern/host_statistics.h>
61 #include <kern/counter.h>
62 #include <kern/thread.h>
63 #include <kern/thread_group.h>
64 #include <kern/task.h>
65 #include <kern/telemetry.h>
66 #include <kern/clock.h>
67 #include <kern/policy_internal.h>
68 #include <kern/socd_client.h>
69 #include <vm/vm_map.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_pageout.h>
72 #include <vm/vm_fault.h>
73 #include <vm/vm_shared_region.h>
74 #include <vm/vm_compressor.h>
75 #include <libkern/OSKextLibPrivate.h>
76 #include <os/log.h>
77 
78 #ifdef CONFIG_EXCLAVES
79 #include <kern/exclaves.tightbeam.h>
80 #endif /* CONFIG_EXCLAVES */
81 
82 #include <kern/exclaves_test_stackshot.h>
83 
84 #if defined(__x86_64__)
85 #include <i386/mp.h>
86 #include <i386/cpu_threads.h>
87 #endif
88 
89 #include <pexpert/pexpert.h>
90 
91 #if CONFIG_PERVASIVE_CPI
92 #include <kern/monotonic.h>
93 #endif /* CONFIG_PERVASIVE_CPI */
94 
95 #include <san/kasan.h>
96 
97 #if DEBUG || DEVELOPMENT
98 # define STACKSHOT_COLLECTS_LATENCY_INFO 1
99 #else
100 # define STACKSHOT_COLLECTS_LATENCY_INFO 0
101 #endif /* DEBUG || DEVELOPMENT */
102 
103 extern unsigned int not_in_kdp;
104 
105 /* indicate to the compiler that some accesses are unaligned */
106 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
107 
108 int kdp_snapshot                            = 0;
109 static kern_return_t stack_snapshot_ret     = 0;
110 static uint32_t stack_snapshot_bytes_traced = 0;
111 static uint32_t stack_snapshot_bytes_uncompressed  = 0;
112 
113 #if STACKSHOT_COLLECTS_LATENCY_INFO
114 static bool collect_latency_info = true;
115 #endif
116 static kcdata_descriptor_t stackshot_kcdata_p = NULL;
117 static void *stack_snapshot_buf;
118 static uint32_t stack_snapshot_bufsize;
119 int stack_snapshot_pid;
120 static uint64_t stack_snapshot_flags;
121 static uint64_t stackshot_out_flags;
122 static uint64_t stack_snapshot_delta_since_timestamp;
123 static uint32_t stack_snapshot_pagetable_mask;
124 static boolean_t panic_stackshot;
125 
126 static boolean_t stack_enable_faulting = FALSE;
127 static struct stackshot_fault_stats fault_stats;
128 
129 static uint64_t stackshot_last_abs_start;       /* start time of last stackshot */
130 static uint64_t stackshot_last_abs_end;         /* end time of last stackshot */
131 static uint64_t stackshots_taken;               /* total stackshots taken since boot */
132 static uint64_t stackshots_duration;            /* total abs time spent in stackshot_trap() since boot */
133 
134 /*
135  * Experimentally, our current estimates are 40% short 77% of the time; adding
136  * 75% to the estimate gets us into 99%+ territory.  In the longer run, we need
137  * to make stackshot estimates use a better approach (rdar://78880038); this is
138  * intended to be a short-term fix.
139  */
140 uint32_t stackshot_estimate_adj = 75; /* experiment factor: 0-100, adjust our estimate up by this amount */
141 
142 static uint32_t stackshot_initial_estimate;
143 static uint32_t stackshot_initial_estimate_adj;
144 static uint64_t stackshot_duration_prior_abs;   /* prior attempts, abs */
145 static unaligned_u64 * stackshot_duration_outer;
146 static uint64_t stackshot_microsecs;
147 
148 void * kernel_stackshot_buf   = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
149 int kernel_stackshot_buf_size = 0;
150 
151 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
152 
153 #if CONFIG_EXCLAVES
154 static ctid_t *stackshot_exclave_inspect_ctids = NULL;
155 static size_t stackshot_exclave_inspect_ctid_count = 0;
156 static size_t stackshot_exclave_inspect_ctid_capacity = 0;
157 
158 static kern_return_t stackshot_exclave_kr = KERN_SUCCESS;
159 #endif /* CONFIG_EXCLAVES */
160 
161 #if DEBUG || DEVELOPMENT
162 TUNABLE(bool, disable_exclave_stackshot, "-disable_exclave_stackshot", false);
163 #else
164 const bool disable_exclave_stackshot = false;
165 #endif
166 
167 __private_extern__ void stackshot_init( void );
168 static boolean_t memory_iszero(void *addr, size_t size);
169 uint32_t                get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj);
170 kern_return_t           kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config,
171     size_t stackshot_config_size, boolean_t stackshot_from_user);
172 kern_return_t           do_stackshot(void *);
173 void                    kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags, kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask);
174 boolean_t               stackshot_thread_is_idle_worker_unsafe(thread_t thread);
175 static int              kdp_stackshot_kcdata_format(int pid, uint64_t * trace_flags);
176 uint32_t                kdp_stack_snapshot_bytes_traced(void);
177 uint32_t                kdp_stack_snapshot_bytes_uncompressed(void);
178 static void             kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
179 static vm_offset_t      stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags);
180 static boolean_t        stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
181 static int              stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
182 static boolean_t        stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
183 static uint64_t         proc_was_throttled_from_task(task_t task);
184 static void             stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t * waitinfo);
185 static int              stackshot_thread_has_valid_waitinfo(thread_t thread);
186 static void             stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo);
187 static int              stackshot_thread_has_valid_turnstileinfo(thread_t thread);
188 
189 #if CONFIG_COALITIONS
190 static void             stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
191 static void             stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
192 #endif /* CONFIG_COALITIONS */
193 
194 #if CONFIG_THREAD_GROUPS
195 static void             stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
196 static void             stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
197 #endif /* CONFIG_THREAD_GROUPS */
198 
199 extern uint32_t         workqueue_get_pwq_state_kdp(void *proc);
200 
201 struct proc;
202 extern int              proc_pid(struct proc *p);
203 extern uint64_t         proc_uniqueid(void *p);
204 extern uint64_t         proc_was_throttled(void *p);
205 extern uint64_t         proc_did_throttle(void *p);
206 extern int              proc_exiting(void *p);
207 extern int              proc_in_teardown(void *p);
208 static uint64_t         proc_did_throttle_from_task(task_t task);
209 extern void             proc_name_kdp(struct proc *p, char * buf, int size);
210 extern int              proc_threadname_kdp(void * uth, char * buf, size_t size);
211 extern void             proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
212 extern void             proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
213 extern uint64_t         proc_getcsflags_kdp(void * p);
214 extern boolean_t        proc_binary_uuid_kdp(task_t task, uuid_t uuid);
215 extern int              memorystatus_get_pressure_status_kdp(void);
216 extern void             memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
217 
218 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
219 
220 #if CONFIG_TELEMETRY
221 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
222 #endif /* CONFIG_TELEMETRY */
223 
224 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
225 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
226 
227 static size_t stackshot_plh_est_size(void);
228 
229 #if CONFIG_EXCLAVES
230 static kern_return_t collect_exclave_threads(uint64_t);
231 #endif
232 
233 /*
234  * Validates that the given address for a word is both a valid page and has
235  * default caching attributes for the current map.
236  */
237 bool machine_trace_thread_validate_kva(vm_offset_t);
238 /*
239  * Validates a region that stackshot will potentially inspect.
240  */
241 static bool _stackshot_validate_kva(vm_offset_t, size_t);
242 /*
243  * Must be called whenever stackshot is re-driven.
244  */
245 static void _stackshot_validation_reset(void);
246 /*
247  * A kdp-safe strlen() call.  Returns:
248  *      -1 if we reach maxlen or a bad address before the end of the string, or
249  *      strlen(s)
250  */
251 static long _stackshot_strlen(const char *s, size_t maxlen);
252 
253 #define MAX_FRAMES 1000
254 #define MAX_LOADINFOS 500
255 #define MAX_DYLD_COMPACTINFO (20 * 1024)  // max bytes of compactinfo to include per proc/shared region
256 #define TASK_IMP_WALK_LIMIT 20
257 
258 typedef struct thread_snapshot *thread_snapshot_t;
259 typedef struct task_snapshot *task_snapshot_t;
260 
261 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
262 extern kdp_send_t    kdp_en_send_pkt;
263 #endif
264 
265 /*
266  * Stackshot locking and other defines.
267  */
268 static LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
269 static LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
270 
271 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
272 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
273 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
274 #define STACKSHOT_SUBSYS_ASSERT_LOCKED() lck_mtx_assert(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
275 
276 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
277 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
278 
279 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
280 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
281 
282 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
283 
284 /*
285  * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
286  * for non-panic stackshots where faulting is requested.
287  */
288 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
289 
290 #define STACKSHOT_SUPP_SIZE (16 * 1024) /* Minimum stackshot size */
291 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
292 
293 #ifndef ROUNDUP
294 #define ROUNDUP(x, y)            ((((x)+(y)-1)/(y))*(y))
295 #endif
296 
297 #define STACKSHOT_QUEUE_LABEL_MAXSIZE  64
298 
299 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
300 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
301 /*
302  * Use of the kcd_exit_on_error(action) macro requires a local
303  * 'kern_return_t error' variable and 'error_exit' label.
304  */
305 #define kcd_exit_on_error(action)                      \
306 	do {                                               \
307 	        if (KERN_SUCCESS != (error = (action))) {      \
308 	                if (error == KERN_RESOURCE_SHORTAGE) {     \
309 	                        error = KERN_INSUFFICIENT_BUFFER_SIZE; \
310 	                }                                          \
311 	                goto error_exit;                           \
312 	        }                                              \
313 	} while (0); /* end kcd_exit_on_error */
314 
315 /*
316  * Initialize the mutex governing access to the stack snapshot subsystem
317  * and other stackshot related bits.
318  */
319 __private_extern__ void
stackshot_init(void)320 stackshot_init(void)
321 {
322 	mach_timebase_info_data_t timebase;
323 
324 	clock_timebase_info(&timebase);
325 	fault_stats.sfs_system_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
326 
327 	max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
328 
329 	PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
330 }
331 
332 /*
333  * Called with interrupts disabled after stackshot context has been
334  * initialized. Updates stack_snapshot_ret.
335  */
336 static kern_return_t
stackshot_trap(void)337 stackshot_trap(void)
338 {
339 	kern_return_t   rv;
340 
341 #if defined(__x86_64__)
342 	/*
343 	 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
344 	 * operation, it's essential to apply mutual exclusion to the other when one
345 	 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
346 	 * capture CPUs.
347 	 *
348 	 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
349 	 * allowed, so we check to ensure there there is no rendezvous in progress before
350 	 * trying to grab the lock (if there is, a deadlock will occur when we try to
351 	 * grab the lock).  This is accomplished by setting cpu_rendezvous_in_progress to
352 	 * TRUE in the mp rendezvous action function.  If stackshot_trap() is called by
353 	 * a subordinate of the call chain within the mp rendezvous action, this flag will
354 	 * be set and can be used to detect the inevitable deadlock that would occur
355 	 * if this thread tried to grab the rendezvous lock.
356 	 */
357 
358 	if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
359 		panic("Calling stackshot from a rendezvous is not allowed!");
360 	}
361 
362 	mp_rendezvous_lock();
363 #endif
364 
365 	stackshot_last_abs_start = mach_absolute_time();
366 	stackshot_last_abs_end = 0;
367 
368 	rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0);
369 
370 	stackshot_last_abs_end = mach_absolute_time();
371 	stackshots_taken++;
372 	stackshots_duration += (stackshot_last_abs_end - stackshot_last_abs_start);
373 
374 #if defined(__x86_64__)
375 	mp_rendezvous_unlock();
376 #endif
377 	return rv;
378 }
379 
380 extern void stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration);
381 void
stackshot_get_timing(uint64_t * last_abs_start,uint64_t * last_abs_end,uint64_t * count,uint64_t * total_duration)382 stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration)
383 {
384 	STACKSHOT_SUBSYS_LOCK();
385 	*last_abs_start = stackshot_last_abs_start;
386 	*last_abs_end = stackshot_last_abs_end;
387 	*count = stackshots_taken;
388 	*total_duration = stackshots_duration;
389 	STACKSHOT_SUBSYS_UNLOCK();
390 }
391 
392 static kern_return_t
finalize_kcdata(kcdata_descriptor_t kcdata)393 finalize_kcdata(kcdata_descriptor_t kcdata)
394 {
395 	kern_return_t error = KERN_SUCCESS;
396 
397 	kcd_finalize_compression(kcdata);
398 	kcd_exit_on_error(kcdata_add_uint64_with_description(kcdata, stackshot_out_flags, "stackshot_out_flags"));
399 	kcd_exit_on_error(kcdata_write_buffer_end(kcdata));
400 	stack_snapshot_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(kcdata);
401 	stack_snapshot_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(kcdata);
402 	kcdata_finish(kcdata);
403 error_exit:
404 	return error;
405 }
406 
407 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)408 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
409 {
410 	kern_return_t error = KERN_SUCCESS;
411 	boolean_t istate;
412 
413 #if DEVELOPMENT || DEBUG
414 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
415 		return KERN_NOT_SUPPORTED;
416 	}
417 #endif
418 	if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
419 		return KERN_INVALID_ARGUMENT;
420 	}
421 
422 	/* cap in individual stackshot to max_tracebuf_size */
423 	if (size > max_tracebuf_size) {
424 		size = max_tracebuf_size;
425 	}
426 
427 	/* Serialize tracing */
428 	if (flags & STACKSHOT_TRYLOCK) {
429 		if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
430 			return KERN_LOCK_OWNED;
431 		}
432 	} else {
433 		STACKSHOT_SUBSYS_LOCK();
434 	}
435 
436 #if CONFIG_EXCLAVES
437 	assert(!stackshot_exclave_inspect_ctids);
438 #endif
439 
440 	struct kcdata_descriptor kcdata;
441 	uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ?
442 	    KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT : KCDATA_BUFFER_BEGIN_STACKSHOT;
443 
444 	error = kcdata_memory_static_init(&kcdata, (mach_vm_address_t)buf, hdr_tag, size,
445 	    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
446 	if (error) {
447 		goto out;
448 	}
449 
450 	stackshot_initial_estimate = 0;
451 	stackshot_duration_prior_abs = 0;
452 	stackshot_duration_outer = NULL;
453 
454 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
455 	    flags, size, pid, delta_since_timestamp);
456 
457 	istate = ml_set_interrupts_enabled(FALSE);
458 	uint64_t time_start      = mach_absolute_time();
459 
460 	/* Emit a SOCD tracepoint that we are initiating a stackshot */
461 	SOCD_TRACE_XNU_START(STACKSHOT);
462 
463 	/* Preload trace parameters*/
464 	kdp_snapshot_preflight(pid, buf, size, flags, &kcdata,
465 	    delta_since_timestamp, pagetable_mask);
466 
467 	/*
468 	 * Trap to the debugger to obtain a coherent stack snapshot; this populates
469 	 * the trace buffer
470 	 */
471 	error = stackshot_trap();
472 
473 	uint64_t time_end = mach_absolute_time();
474 
475 	/* Emit a SOCD tracepoint that we have completed the stackshot */
476 	SOCD_TRACE_XNU_END(STACKSHOT);
477 
478 	ml_set_interrupts_enabled(istate);
479 
480 #if CONFIG_EXCLAVES
481 	/* stackshot trap should only finish successfully or with no pending Exclave threads */
482 	assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
483 	if (stackshot_exclave_inspect_ctids) {
484 		error = collect_exclave_threads(flags);
485 	}
486 #endif /* CONFIG_EXCLAVES */
487 	if (error == KERN_SUCCESS) {
488 		error = finalize_kcdata(stackshot_kcdata_p);
489 	}
490 
491 	if (stackshot_duration_outer) {
492 		*stackshot_duration_outer = time_end - time_start;
493 	}
494 	*bytes_traced = kdp_stack_snapshot_bytes_traced();
495 
496 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
497 	    error, (time_end - time_start), size, *bytes_traced);
498 out:
499 
500 	stackshot_kcdata_p = NULL;
501 	STACKSHOT_SUBSYS_UNLOCK();
502 	return error;
503 }
504 
505 #if CONFIG_TELEMETRY
506 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)507 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
508 {
509 	int error = KERN_SUCCESS;
510 	uint32_t bytes_traced = 0;
511 
512 	*retval = -1;
513 
514 	/*
515 	 * Control related operations
516 	 */
517 	if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
518 		telemetry_global_ctl(1);
519 		*retval = 0;
520 		goto exit;
521 	} else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
522 		telemetry_global_ctl(0);
523 		*retval = 0;
524 		goto exit;
525 	}
526 
527 	/*
528 	 * Data related operations
529 	 */
530 	*retval = -1;
531 
532 	if ((((void*)tracebuf) == NULL) || (tracebuf_size == 0)) {
533 		error = KERN_INVALID_ARGUMENT;
534 		goto exit;
535 	}
536 
537 	STACKSHOT_SUBSYS_LOCK();
538 
539 	if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
540 		if (tracebuf_size > max_tracebuf_size) {
541 			error = KERN_INVALID_ARGUMENT;
542 			goto unlock_exit;
543 		}
544 
545 		bytes_traced = tracebuf_size;
546 		error = telemetry_gather(tracebuf, &bytes_traced,
547 		    (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? true : false);
548 		*retval = (int)bytes_traced;
549 		goto unlock_exit;
550 	}
551 
552 unlock_exit:
553 	STACKSHOT_SUBSYS_UNLOCK();
554 exit:
555 	return error;
556 }
557 #endif /* CONFIG_TELEMETRY */
558 
559 /*
560  * Return the estimated size of a stackshot based on the
561  * number of currently running threads and tasks.
562  *
563  * adj is an adjustment in units of percentage
564  *
565  * This function is mostly unhinged from reality; struct thread_snapshot and
566  * struct task_stackshot are legacy, much larger versions of the structures we
567  * actually use, and there's no accounting for how we actually generate
568  * task & thread information.  rdar://78880038 intends to replace this all.
569  */
570 uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj)571 get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj)
572 {
573 	vm_size_t thread_total;
574 	vm_size_t task_total;
575 	uint64_t size;
576 	uint32_t estimated_size;
577 	size_t est_thread_size = sizeof(struct thread_snapshot);
578 	size_t est_task_size = sizeof(struct task_snapshot) + TASK_UUID_AVG_SIZE;
579 
580 	adj = MIN(adj, 100u);   /* no more than double our estimate */
581 
582 #if STACKSHOT_COLLECTS_LATENCY_INFO
583 	if (collect_latency_info) {
584 		est_thread_size += sizeof(struct stackshot_latency_thread);
585 		est_task_size += sizeof(struct stackshot_latency_task);
586 	}
587 #endif
588 
589 	thread_total = (threads_count * est_thread_size);
590 	task_total = (tasks_count  * est_task_size);
591 
592 	size = thread_total + task_total + STACKSHOT_SUPP_SIZE;                 /* estimate */
593 	size += (size * adj) / 100;                                                                     /* add adj */
594 	size = MAX(size, prev_size_hint);                                                               /* allow hint to increase */
595 	size += stackshot_plh_est_size(); /* add space for the port label hash */
596 	size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK));             /* avoid overflow */
597 	estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
598 
599 	return estimated_size;
600 }
601 
602 /*
603  * stackshot_remap_buffer:	Utility function to remap bytes_traced bytes starting at stackshotbuf
604  *				into the current task's user space and subsequently copy out the address
605  *				at which the buffer has been mapped in user space to out_buffer_addr.
606  *
607  * Inputs:			stackshotbuf - pointer to the original buffer in the kernel's address space
608  *				bytes_traced - length of the buffer to remap starting from stackshotbuf
609  *				out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
610  *				out_size_addr - pointer to be filled in with the size of the buffer
611  *
612  * Outputs:			ENOSPC if there is not enough free space in the task's address space to remap the buffer
613  *				EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
614  *				an error from copyout
615  */
616 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)617 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
618 {
619 	int                     error = 0;
620 	mach_vm_offset_t        stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
621 	vm_prot_t               cur_prot, max_prot;
622 
623 	error = mach_vm_remap_kernel(get_task_map(current_task()), &stackshotbuf_user_addr, bytes_traced, 0,
624 	    VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE, &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
625 	/*
626 	 * If the call to mach_vm_remap fails, we return the appropriate converted error
627 	 */
628 	if (error == KERN_SUCCESS) {
629 		/*
630 		 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
631 		 * we just made in the task's user space.
632 		 */
633 		error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
634 		if (error != KERN_SUCCESS) {
635 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
636 			return error;
637 		}
638 		error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
639 		if (error != KERN_SUCCESS) {
640 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
641 			return error;
642 		}
643 	}
644 	return error;
645 }
646 
647 #if CONFIG_EXCLAVES
648 
649 static kern_return_t
stackshot_setup_exclave_waitlist(kcdata_descriptor_t kcdata)650 stackshot_setup_exclave_waitlist(kcdata_descriptor_t kcdata)
651 {
652 	kern_return_t error = KERN_SUCCESS;
653 	size_t exclave_threads_max = exclaves_ipc_buffer_count();
654 	size_t waitlist_size = 0;
655 
656 	assert(!stackshot_exclave_inspect_ctids);
657 
658 	if (exclaves_inspection_is_initialized() && exclave_threads_max) {
659 		if (os_mul_overflow(exclave_threads_max, sizeof(ctid_t), &waitlist_size)) {
660 			error = KERN_INVALID_ARGUMENT;
661 			goto error;
662 		}
663 		stackshot_exclave_inspect_ctids = kcdata_endalloc(kcdata, waitlist_size);
664 		if (!stackshot_exclave_inspect_ctids) {
665 			error = KERN_RESOURCE_SHORTAGE;
666 			goto error;
667 		}
668 		stackshot_exclave_inspect_ctid_count = 0;
669 		stackshot_exclave_inspect_ctid_capacity = exclave_threads_max;
670 	}
671 
672 error:
673 	return error;
674 }
675 
676 static kern_return_t
collect_exclave_threads(uint64_t stackshot_flags)677 collect_exclave_threads(uint64_t stackshot_flags)
678 {
679 	size_t i;
680 	ctid_t ctid;
681 	thread_t thread;
682 	kern_return_t kr = KERN_SUCCESS;
683 	STACKSHOT_SUBSYS_ASSERT_LOCKED();
684 
685 	lck_mtx_lock(&exclaves_collect_mtx);
686 
687 	if (stackshot_exclave_inspect_ctid_count == 0) {
688 		/* Nothing to do */
689 		goto out;
690 	}
691 
692 	// When asking for ASIDs, make sure we get all exclaves asids and mappings as well
693 	exclaves_stackshot_raw_addresses = (stackshot_flags & STACKSHOT_ASID);
694 	exclaves_stackshot_all_address_spaces = (stackshot_flags & STACKSHOT_ASID);
695 
696 	/* This error is intentionally ignored: we are now committed to collecting
697 	 * these threads, or at least properly waking them. If this fails, the first
698 	 * collected thread should also fail to append to the kcdata, and will abort
699 	 * further collection, properly clearing the AST and waking these threads.
700 	 */
701 	kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
702 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
703 
704 	for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
705 		ctid = stackshot_exclave_inspect_ctids[i];
706 		thread = ctid_get_thread(ctid);
707 		assert(thread);
708 		exclaves_inspection_queue_add(&exclaves_inspection_queue_stackshot, &thread->th_exclaves_inspection_queue_stackshot);
709 	}
710 	exclaves_inspection_begin_collecting();
711 	exclaves_inspection_wait_complete(&exclaves_inspection_queue_stackshot);
712 	kr = stackshot_exclave_kr; /* Read the result of work done on our behalf, by collection thread */
713 	if (kr != KERN_SUCCESS) {
714 		goto out;
715 	}
716 
717 	kr = kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
718 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
719 	if (kr != KERN_SUCCESS) {
720 		goto out;
721 	}
722 out:
723 	/* clear Exclave buffer now that it's been used */
724 	stackshot_exclave_inspect_ctids = NULL;
725 	stackshot_exclave_inspect_ctid_capacity = 0;
726 	stackshot_exclave_inspect_ctid_count = 0;
727 
728 	lck_mtx_unlock(&exclaves_collect_mtx);
729 	return kr;
730 }
731 
732 static kern_return_t
stackshot_exclaves_process_stacktrace(const address_v__opt_s * _Nonnull st,void * kcdata_ptr)733 stackshot_exclaves_process_stacktrace(const address_v__opt_s *_Nonnull st, void *kcdata_ptr)
734 {
735 	kern_return_t error = KERN_SUCCESS;
736 	exclave_ecstackentry_addr_t * addr = NULL;
737 	__block size_t count = 0;
738 
739 	if (!st->has_value) {
740 		goto error_exit;
741 	}
742 
743 	address__v_visit(&st->value, ^(size_t __unused i, const stackshot_address_s __unused item) {
744 		count++;
745 	});
746 
747 	kcdata_compression_window_open(kcdata_ptr);
748 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_ECSTACK,
749 	    sizeof(exclave_ecstackentry_addr_t), count, (mach_vm_address_t*)&addr));
750 
751 	address__v_visit(&st->value, ^(size_t i, const stackshot_address_s item) {
752 		addr[i] = (exclave_ecstackentry_addr_t)item;
753 	});
754 
755 	kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
756 
757 error_exit:
758 	return error;
759 }
760 
761 static kern_return_t
stackshot_exclaves_process_ipcstackentry(uint64_t index,const stackshot_ipcstackentry_s * _Nonnull ise,void * kcdata_ptr)762 stackshot_exclaves_process_ipcstackentry(uint64_t index, const stackshot_ipcstackentry_s *_Nonnull ise, void *kcdata_ptr)
763 {
764 	kern_return_t error = KERN_SUCCESS;
765 
766 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
767 	    STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
768 
769 	struct exclave_ipcstackentry_info info = { 0 };
770 	info.eise_asid = ise->asid;
771 
772 	info.eise_tnid = ise->tnid;
773 
774 	if (ise->invocationid.has_value) {
775 		info.eise_flags |= kExclaveIpcStackEntryHaveInvocationID;
776 		info.eise_invocationid = ise->invocationid.value;
777 	} else {
778 		info.eise_invocationid = 0;
779 	}
780 
781 	info.eise_flags |= (ise->stacktrace.has_value ? kExclaveIpcStackEntryHaveStack : 0);
782 
783 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_INFO, sizeof(struct exclave_ipcstackentry_info), &info));
784 
785 	if (ise->stacktrace.has_value) {
786 		kcd_exit_on_error(stackshot_exclaves_process_stacktrace(&ise->stacktrace, kcdata_ptr));
787 	}
788 
789 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
790 	    STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
791 
792 error_exit:
793 	return error;
794 }
795 
796 static kern_return_t
stackshot_exclaves_process_ipcstack(const stackshot_ipcstackentry_v__opt_s * _Nonnull ipcstack,void * kcdata_ptr)797 stackshot_exclaves_process_ipcstack(const stackshot_ipcstackentry_v__opt_s *_Nonnull ipcstack, void *kcdata_ptr)
798 {
799 	__block kern_return_t kr = KERN_SUCCESS;
800 
801 	if (!ipcstack->has_value) {
802 		goto error_exit;
803 	}
804 
805 	stackshot_ipcstackentry__v_visit(&ipcstack->value, ^(size_t i, const stackshot_ipcstackentry_s *_Nonnull item) {
806 		if (kr == KERN_SUCCESS) {
807 		        kr = stackshot_exclaves_process_ipcstackentry(i, item, kcdata_ptr);
808 		}
809 	});
810 
811 error_exit:
812 	return kr;
813 }
814 
815 static kern_return_t
stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s * _Nonnull se,void * kcdata_ptr)816 stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s *_Nonnull se, void *kcdata_ptr)
817 {
818 	kern_return_t error = KERN_SUCCESS;
819 
820 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
821 	    STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
822 
823 	struct exclave_scresult_info info = { 0 };
824 	info.esc_id = se->scid;
825 	info.esc_flags = se->ipcstack.has_value ? kExclaveScresultHaveIPCStack : 0;
826 
827 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_SCRESULT_INFO, sizeof(struct exclave_scresult_info), &info));
828 
829 	if (se->ipcstack.has_value) {
830 		kcd_exit_on_error(stackshot_exclaves_process_ipcstack(&se->ipcstack, kcdata_ptr));
831 	}
832 
833 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
834 	    STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
835 
836 error_exit:
837 	return error;
838 }
839 
840 static kern_return_t
stackshot_exclaves_process_textlayout_segments(const stackshot_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)841 stackshot_exclaves_process_textlayout_segments(const stackshot_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
842 {
843 	kern_return_t error = KERN_SUCCESS;
844 	__block struct exclave_textlayout_segment * info = NULL;
845 
846 	__block size_t count = 0;
847 	stackshot_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshot_textsegment_s __unused *_Nonnull item) {
848 		count++;
849 	});
850 
851 	if (!count) {
852 		goto error_exit;
853 	}
854 
855 	kcdata_compression_window_open(kcdata_ptr);
856 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_SEGMENTS,
857 	    sizeof(struct exclave_textlayout_segment), count, (mach_vm_address_t*)&info));
858 
859 	stackshot_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshot_textsegment_s *_Nonnull item) {
860 		memcpy(&info->layoutSegment_uuid, item->uuid, sizeof(uuid_t));
861 		if (want_raw_addresses) {
862 		        info->layoutSegment_loadAddress = item->rawloadaddress.has_value ? item->rawloadaddress.value: 0;
863 		} else {
864 		        info->layoutSegment_loadAddress = item->loadaddress;
865 		}
866 		info++;
867 	});
868 
869 	kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
870 
871 error_exit:
872 	return error;
873 }
874 
875 static kern_return_t
stackshot_exclaves_process_textlayout(uint64_t index,const stackshot_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)876 stackshot_exclaves_process_textlayout(uint64_t index, const stackshot_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
877 {
878 	kern_return_t error = KERN_SUCCESS;
879 	__block struct exclave_textlayout_info info = { 0 };
880 
881 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
882 	    STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, index));
883 
884 	info.layout_id = tl->textlayoutid;
885 
886 	info.etl_flags = want_raw_addresses ? 0 : kExclaveTextLayoutLoadAddressesUnslid;
887 
888 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_INFO, sizeof(struct exclave_textlayout_info), &info));
889 	kcd_exit_on_error(stackshot_exclaves_process_textlayout_segments(tl, kcdata_ptr, want_raw_addresses));
890 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
891 	    STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, index));
892 error_exit:
893 	return error;
894 }
895 
896 static kern_return_t
stackshot_exclaves_process_addressspace(const stackshot_addressspace_s * _Nonnull as,void * kcdata_ptr,bool want_raw_addresses)897 stackshot_exclaves_process_addressspace(const stackshot_addressspace_s *_Nonnull as, void *kcdata_ptr, bool want_raw_addresses)
898 {
899 	kern_return_t error = KERN_SUCCESS;
900 	struct exclave_addressspace_info info = { 0 };
901 	__block size_t name_len = 0;
902 	uint8_t * name = NULL;
903 
904 	u8__v_visit(&as->name, ^(size_t __unused i, const uint8_t __unused item) {
905 		name_len++;
906 	});
907 
908 	info.eas_id = as->asid;
909 
910 	if (want_raw_addresses && as->rawaddressslide.has_value) {
911 		info.eas_flags = kExclaveAddressSpaceHaveSlide;
912 		info.eas_slide = as->rawaddressslide.value;
913 	} else {
914 		info.eas_flags = 0;
915 		info.eas_slide = UINT64_MAX;
916 	}
917 
918 	info.eas_layoutid = as->textlayoutid; // text layout for this address space
919 	info.eas_asroot = as->asroot.has_value ? as->asroot.value : 0;
920 
921 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
922 	    STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
923 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_INFO, sizeof(struct exclave_addressspace_info), &info));
924 
925 	if (name_len > 0) {
926 		kcdata_compression_window_open(kcdata_ptr);
927 		kcd_exit_on_error(kcdata_get_memory_addr(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_NAME, name_len + 1, (mach_vm_address_t*)&name));
928 
929 		u8__v_visit(&as->name, ^(size_t i, const uint8_t item) {
930 			name[i] = item;
931 		});
932 		name[name_len] = 0;
933 
934 		kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
935 	}
936 
937 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
938 	    STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
939 error_exit:
940 	return error;
941 }
942 
943 kern_return_t
944 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses);
945 
946 kern_return_t
stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s * result,void * kcdata_ptr,bool want_raw_addresses)947 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses)
948 {
949 	__block kern_return_t kr = KERN_SUCCESS;
950 
951 	stackshot_stackshotentry__v_visit(&result->stackshotentries, ^(size_t __unused i, const stackshot_stackshotentry_s *_Nonnull item) {
952 		if (kr == KERN_SUCCESS) {
953 		        kr = stackshot_exclaves_process_stackshotentry(item, kcdata_ptr);
954 		}
955 	});
956 
957 	stackshot_addressspace__v_visit(&result->addressspaces, ^(size_t __unused i, const stackshot_addressspace_s *_Nonnull item) {
958 		if (kr == KERN_SUCCESS) {
959 		        kr = stackshot_exclaves_process_addressspace(item, kcdata_ptr, want_raw_addresses);
960 		}
961 	});
962 
963 	stackshot_textlayout__v_visit(&result->textlayouts, ^(size_t i, const stackshot_textlayout_s *_Nonnull item) {
964 		if (kr == KERN_SUCCESS) {
965 		        kr = stackshot_exclaves_process_textlayout(i, item, kcdata_ptr, want_raw_addresses);
966 		}
967 	});
968 
969 	return kr;
970 }
971 
972 kern_return_t
973 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses);
974 
975 kern_return_t
stackshot_exclaves_process_result(kern_return_t collect_kr,const stackshot_stackshotresult_s * result,bool want_raw_addresses)976 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses)
977 {
978 	kern_return_t kr = KERN_SUCCESS;
979 	if (result == NULL) {
980 		return collect_kr;
981 	}
982 
983 	kr = stackshot_exclaves_process_stackshot(result, stackshot_kcdata_p, want_raw_addresses);
984 
985 	stackshot_exclave_kr = kr;
986 
987 	return kr;
988 }
989 
990 
991 static void
commit_exclaves_ast(void)992 commit_exclaves_ast(void)
993 {
994 	size_t i = 0;
995 	thread_t thread = NULL;
996 
997 	assert(debug_mode_active());
998 
999 	if (stackshot_exclave_inspect_ctids && stackshot_exclave_inspect_ctid_count > 0) {
1000 		for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
1001 			thread = ctid_get_thread(stackshot_exclave_inspect_ctids[i]);
1002 			thread_reference(thread);
1003 			os_atomic_or(&thread->th_exclaves_inspection_state, TH_EXCLAVES_INSPECTION_STACKSHOT, relaxed);
1004 		}
1005 	}
1006 }
1007 
1008 #endif /* CONFIG_EXCLAVES */
1009 
1010 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)1011 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
1012 {
1013 	int error = 0;
1014 	boolean_t prev_interrupt_state;
1015 	uint32_t bytes_traced = 0;
1016 	uint32_t stackshot_estimate = 0;
1017 	uint32_t stackshotbuf_size = 0;
1018 	void * stackshotbuf = NULL;
1019 	kcdata_descriptor_t kcdata_p = NULL;
1020 
1021 	void * buf_to_free = NULL;
1022 	int size_to_free = 0;
1023 	bool is_traced = false;    /* has FUNC_START tracepoint fired? */
1024 	uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
1025 
1026 	/* Parsed arguments */
1027 	uint64_t                out_buffer_addr;
1028 	uint64_t                out_size_addr;
1029 	int                     pid = -1;
1030 	uint64_t                flags;
1031 	uint64_t                since_timestamp;
1032 	uint32_t                size_hint = 0;
1033 	uint32_t                pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
1034 
1035 	if (stackshot_config == NULL) {
1036 		return KERN_INVALID_ARGUMENT;
1037 	}
1038 #if DEVELOPMENT || DEBUG
1039 	/* TBD: ask stackshot clients to avoid issuing stackshots in this
1040 	 * configuration in lieu of the kernel feature override.
1041 	 */
1042 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
1043 		return KERN_NOT_SUPPORTED;
1044 	}
1045 #endif
1046 
1047 	switch (stackshot_config_version) {
1048 	case STACKSHOT_CONFIG_TYPE:
1049 		if (stackshot_config_size != sizeof(stackshot_config_t)) {
1050 			return KERN_INVALID_ARGUMENT;
1051 		}
1052 		stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
1053 		out_buffer_addr = config->sc_out_buffer_addr;
1054 		out_size_addr = config->sc_out_size_addr;
1055 		pid = config->sc_pid;
1056 		flags = config->sc_flags;
1057 		since_timestamp = config->sc_delta_timestamp;
1058 		if (config->sc_size <= max_tracebuf_size) {
1059 			size_hint = config->sc_size;
1060 		}
1061 		/*
1062 		 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
1063 		 * dump every level if the pagetable_mask is not set
1064 		 */
1065 		if (flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
1066 			pagetable_mask = config->sc_pagetable_mask;
1067 		}
1068 		break;
1069 	default:
1070 		return KERN_NOT_SUPPORTED;
1071 	}
1072 
1073 	/*
1074 	 * Currently saving a kernel buffer and trylock are only supported from the
1075 	 * internal/KEXT API.
1076 	 */
1077 	if (stackshot_from_user) {
1078 		if (flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
1079 			return KERN_NO_ACCESS;
1080 		}
1081 #if !DEVELOPMENT && !DEBUG
1082 		if (flags & (STACKSHOT_DO_COMPRESS)) {
1083 			return KERN_NO_ACCESS;
1084 		}
1085 #endif
1086 	} else {
1087 		if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
1088 			return KERN_NOT_SUPPORTED;
1089 		}
1090 	}
1091 
1092 	if (!((flags & STACKSHOT_KCDATA_FORMAT) || (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
1093 		return KERN_NOT_SUPPORTED;
1094 	}
1095 
1096 	/* Compresssed delta stackshots or page dumps are not yet supported */
1097 	if (((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (flags & STACKSHOT_PAGE_TABLES))
1098 	    && (flags & STACKSHOT_DO_COMPRESS)) {
1099 		return KERN_NOT_SUPPORTED;
1100 	}
1101 
1102 	/*
1103 	 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
1104 	 */
1105 	if ((!out_buffer_addr || !out_size_addr) && !(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
1106 		return KERN_INVALID_ARGUMENT;
1107 	}
1108 
1109 	if (since_timestamp != 0 && ((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
1110 		return KERN_INVALID_ARGUMENT;
1111 	}
1112 
1113 #if CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS
1114 	if (!mt_core_supported) {
1115 		flags &= ~STACKSHOT_INSTRS_CYCLES;
1116 	}
1117 #else /* CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS */
1118 	flags &= ~STACKSHOT_INSTRS_CYCLES;
1119 #endif /* !CONFIG_PERVASIVE_CPI || !CONFIG_CPU_COUNTERS */
1120 
1121 	STACKSHOT_TESTPOINT(TP_WAIT_START_STACKSHOT);
1122 	STACKSHOT_SUBSYS_LOCK();
1123 
1124 	if (flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
1125 		/*
1126 		 * Don't overwrite an existing stackshot
1127 		 */
1128 		if (kernel_stackshot_buf != NULL) {
1129 			error = KERN_MEMORY_PRESENT;
1130 			goto error_early_exit;
1131 		}
1132 	} else if (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
1133 		if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
1134 			error = KERN_NOT_IN_SET;
1135 			goto error_early_exit;
1136 		}
1137 		error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
1138 		    out_buffer_addr, out_size_addr);
1139 		/*
1140 		 * If we successfully remapped the buffer into the user's address space, we
1141 		 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
1142 		 * and then clear the kernel stackshot pointer and associated size.
1143 		 */
1144 		if (error == KERN_SUCCESS) {
1145 			buf_to_free = kernel_stackshot_buf;
1146 			size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
1147 			kernel_stackshot_buf = NULL;
1148 			kernel_stackshot_buf_size = 0;
1149 		}
1150 
1151 		goto error_early_exit;
1152 	}
1153 
1154 	if (flags & STACKSHOT_GET_BOOT_PROFILE) {
1155 		void *bootprofile = NULL;
1156 		uint32_t len = 0;
1157 #if CONFIG_TELEMETRY
1158 		bootprofile_get(&bootprofile, &len);
1159 #endif
1160 		if (!bootprofile || !len) {
1161 			error = KERN_NOT_IN_SET;
1162 			goto error_early_exit;
1163 		}
1164 		error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
1165 		goto error_early_exit;
1166 	}
1167 
1168 	stackshot_duration_prior_abs = 0;
1169 	stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
1170 	stackshotbuf_size = stackshot_estimate =
1171 	    get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj);
1172 	stackshot_initial_estimate = stackshot_estimate;
1173 
1174 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
1175 	    flags, stackshotbuf_size, pid, since_timestamp);
1176 	is_traced = true;
1177 
1178 #if CONFIG_EXCLAVES
1179 	assert(!stackshot_exclave_inspect_ctids);
1180 #endif
1181 
1182 	for (; stackshotbuf_size <= max_tracebuf_size; stackshotbuf_size <<= 1) {
1183 		if (kmem_alloc(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size,
1184 		    KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
1185 			error = KERN_RESOURCE_SHORTAGE;
1186 			goto error_exit;
1187 		}
1188 
1189 
1190 		uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1191 		    : (flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
1192 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
1193 		kcdata_p = kcdata_memory_alloc_init((mach_vm_address_t)stackshotbuf, hdr_tag, stackshotbuf_size,
1194 		    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
1195 
1196 		stackshot_duration_outer = NULL;
1197 
1198 		/* if compression was requested, allocate the extra zlib scratch area */
1199 		if (flags & STACKSHOT_DO_COMPRESS) {
1200 			hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1201 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
1202 			error = kcdata_init_compress(kcdata_p, hdr_tag, kdp_memcpy, KCDCT_ZLIB);
1203 			if (error != KERN_SUCCESS) {
1204 				os_log(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
1205 				    (int) error);
1206 				goto error_exit;
1207 			}
1208 		}
1209 
1210 		/*
1211 		 * Disable interrupts and save the current interrupt state.
1212 		 */
1213 		prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
1214 		uint64_t time_start      = mach_absolute_time();
1215 
1216 		/* Emit a SOCD tracepoint that we are initiating a stackshot */
1217 		SOCD_TRACE_XNU_START(STACKSHOT);
1218 
1219 		/*
1220 		 * Load stackshot parameters.
1221 		 */
1222 		kdp_snapshot_preflight(pid, stackshotbuf, stackshotbuf_size, flags, kcdata_p, since_timestamp,
1223 		    pagetable_mask);
1224 
1225 		error = stackshot_trap();
1226 
1227 		/* record the duration that interupts were disabled */
1228 		uint64_t time_end = mach_absolute_time();
1229 
1230 		/* Emit a SOCD tracepoint that we have completed the stackshot */
1231 		SOCD_TRACE_XNU_END(STACKSHOT);
1232 		ml_set_interrupts_enabled(prev_interrupt_state);
1233 
1234 #if CONFIG_EXCLAVES
1235 		/* trigger Exclave thread collection if any are queued */
1236 		assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
1237 		if (stackshot_exclave_inspect_ctids) {
1238 			if (stackshot_exclave_inspect_ctid_count > 0) {
1239 				STACKSHOT_TESTPOINT(TP_START_COLLECTION);
1240 			}
1241 			error = collect_exclave_threads(flags);
1242 		}
1243 #endif /* CONFIG_EXCLAVES */
1244 
1245 		if (stackshot_duration_outer) {
1246 			*stackshot_duration_outer = time_end - time_start;
1247 		}
1248 		tot_interrupts_off_abs += time_end - time_start;
1249 
1250 		if (error != KERN_SUCCESS) {
1251 			if (kcdata_p != NULL) {
1252 				kcdata_memory_destroy(kcdata_p);
1253 				kcdata_p = NULL;
1254 				stackshot_kcdata_p = NULL;
1255 			}
1256 			kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
1257 			stackshotbuf = NULL;
1258 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
1259 				/*
1260 				 * If we didn't allocate a big enough buffer, deallocate and try again.
1261 				 */
1262 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
1263 				    time_end - time_start, stackshot_estimate, stackshotbuf_size);
1264 				stackshot_duration_prior_abs += (time_end - time_start);
1265 				continue;
1266 			} else {
1267 				goto error_exit;
1268 			}
1269 		}
1270 
1271 		kcd_exit_on_error(finalize_kcdata(stackshot_kcdata_p));
1272 
1273 		bytes_traced = kdp_stack_snapshot_bytes_traced();
1274 		if (bytes_traced <= 0) {
1275 			error = KERN_ABORTED;
1276 			goto error_exit;
1277 		}
1278 
1279 		assert(bytes_traced <= stackshotbuf_size);
1280 		if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
1281 			error = stackshot_remap_buffer(stackshotbuf, bytes_traced, out_buffer_addr, out_size_addr);
1282 			goto error_exit;
1283 		}
1284 
1285 		/*
1286 		 * Save the stackshot in the kernel buffer.
1287 		 */
1288 		kernel_stackshot_buf = stackshotbuf;
1289 		kernel_stackshot_buf_size =  bytes_traced;
1290 		/*
1291 		 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
1292 		 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
1293 		 * update size_to_free for kmem_free accordingly.
1294 		 */
1295 		size_to_free = stackshotbuf_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
1296 
1297 		assert(size_to_free >= 0);
1298 
1299 		if (size_to_free != 0) {
1300 			buf_to_free = (void *)((uint64_t)stackshotbuf + stackshotbuf_size - size_to_free);
1301 		}
1302 
1303 		stackshotbuf = NULL;
1304 		stackshotbuf_size = 0;
1305 		goto error_exit;
1306 	}
1307 
1308 	if (stackshotbuf_size > max_tracebuf_size) {
1309 		error = KERN_RESOURCE_SHORTAGE;
1310 	}
1311 
1312 error_exit:
1313 	if (is_traced) {
1314 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
1315 		    error, tot_interrupts_off_abs, stackshotbuf_size, bytes_traced);
1316 	}
1317 
1318 error_early_exit:
1319 	if (kcdata_p != NULL) {
1320 		kcdata_memory_destroy(kcdata_p);
1321 		kcdata_p = NULL;
1322 		stackshot_kcdata_p = NULL;
1323 	}
1324 
1325 	if (stackshotbuf != NULL) {
1326 		kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
1327 	}
1328 	if (buf_to_free != NULL) {
1329 		kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
1330 	}
1331 
1332 	STACKSHOT_SUBSYS_UNLOCK();
1333 	STACKSHOT_TESTPOINT(TP_STACKSHOT_DONE);
1334 
1335 	return error;
1336 }
1337 
1338 /*
1339  * Cache stack snapshot parameters in preparation for a trace.
1340  */
1341 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)1342 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
1343     kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
1344 {
1345 	uint64_t microsecs = 0, secs = 0;
1346 	clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)&microsecs);
1347 
1348 	stackshot_microsecs = microsecs + (secs * USEC_PER_SEC);
1349 	stack_snapshot_pid = pid;
1350 	stack_snapshot_buf = tracebuf;
1351 	stack_snapshot_bufsize = tracebuf_size;
1352 	stack_snapshot_flags = flags;
1353 	stack_snapshot_delta_since_timestamp = since_timestamp;
1354 	stack_snapshot_pagetable_mask = pagetable_mask;
1355 
1356 	panic_stackshot = ((flags & STACKSHOT_FROM_PANIC) != 0);
1357 
1358 	assert(data_p != NULL);
1359 	assert(stackshot_kcdata_p == NULL);
1360 	stackshot_kcdata_p = data_p;
1361 
1362 	stack_snapshot_bytes_traced = 0;
1363 	stack_snapshot_bytes_uncompressed = 0;
1364 }
1365 
1366 void
panic_stackshot_reset_state(void)1367 panic_stackshot_reset_state(void)
1368 {
1369 	stackshot_kcdata_p = NULL;
1370 }
1371 
1372 boolean_t
stackshot_active(void)1373 stackshot_active(void)
1374 {
1375 	return stackshot_kcdata_p != NULL;
1376 }
1377 
1378 uint32_t
kdp_stack_snapshot_bytes_traced(void)1379 kdp_stack_snapshot_bytes_traced(void)
1380 {
1381 	return stack_snapshot_bytes_traced;
1382 }
1383 
1384 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)1385 kdp_stack_snapshot_bytes_uncompressed(void)
1386 {
1387 	return stack_snapshot_bytes_uncompressed;
1388 }
1389 
1390 static boolean_t
memory_iszero(void * addr,size_t size)1391 memory_iszero(void *addr, size_t size)
1392 {
1393 	char *data = (char *)addr;
1394 	for (size_t i = 0; i < size; i++) {
1395 		if (data[i] != 0) {
1396 			return FALSE;
1397 		}
1398 	}
1399 	return TRUE;
1400 }
1401 
1402 /*
1403  * Keep a simple cache of the most recent validation done at a page granularity
1404  * to avoid the expensive software KVA-to-phys translation in the VM.
1405  */
1406 
1407 struct _stackshot_validation_state {
1408 	vm_offset_t last_valid_page_kva;
1409 	size_t last_valid_size;
1410 } g_validation_state;
1411 
1412 static void
_stackshot_validation_reset(void)1413 _stackshot_validation_reset(void)
1414 {
1415 	g_validation_state.last_valid_page_kva = -1;
1416 	g_validation_state.last_valid_size = 0;
1417 }
1418 
1419 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)1420 _stackshot_validate_kva(vm_offset_t addr, size_t size)
1421 {
1422 	vm_offset_t page_addr = atop_kernel(addr);
1423 	if (g_validation_state.last_valid_page_kva == page_addr &&
1424 	    g_validation_state.last_valid_size <= size) {
1425 		return true;
1426 	}
1427 
1428 	if (ml_validate_nofault(addr, size)) {
1429 		g_validation_state.last_valid_page_kva = page_addr;
1430 		g_validation_state.last_valid_size = size;
1431 		return true;
1432 	}
1433 	return false;
1434 }
1435 
1436 static long
_stackshot_strlen(const char * s,size_t maxlen)1437 _stackshot_strlen(const char *s, size_t maxlen)
1438 {
1439 	size_t len = 0;
1440 	for (len = 0; _stackshot_validate_kva((vm_offset_t)s, 1); len++, s++) {
1441 		if (*s == 0) {
1442 			return len;
1443 		}
1444 		if (len >= maxlen) {
1445 			return -1;
1446 		}
1447 	}
1448 	return -1; /* failed before end of string */
1449 }
1450 
1451 /*
1452  * For port labels, we have a small hash table we use to track the
1453  * struct ipc_service_port_label pointers we see along the way.
1454  * This structure encapsulates the global state.
1455  *
1456  * The hash table is insert-only, similar to "intern"ing strings.  It's
1457  * only used an manipulated in during the stackshot collection.  We use
1458  * seperate chaining, with the hash elements and chains being int16_ts
1459  * indexes into the parallel arrays, with -1 ending the chain.  Array indices are
1460  * allocated using a bump allocator.
1461  *
1462  * The parallel arrays contain:
1463  *      - plh_array[idx]	the pointer entered
1464  *      - plh_chains[idx]	the hash chain
1465  *      - plh_gen[idx]		the last 'generation #' seen
1466  *
1467  * Generation IDs are used to track entries looked up in the current
1468  * task; 0 is never used, and the plh_gen array is cleared to 0 on
1469  * rollover.
1470  *
1471  * The portlabel_ids we report externally are just the index in the array,
1472  * plus 1 to avoid 0 as a value.  0 is NONE, -1 is UNKNOWN (e.g. there is
1473  * one, but we ran out of space)
1474  */
1475 struct port_label_hash {
1476 	uint16_t                plh_size;       /* size of allocations; 0 disables tracking */
1477 	uint16_t                plh_count;      /* count of used entries in plh_array */
1478 	struct ipc_service_port_label **plh_array; /* _size allocated, _count used */
1479 	int16_t                *plh_chains;    /* _size allocated */
1480 	uint8_t                *plh_gen;       /* last 'gen #' seen in */
1481 	int16_t                *plh_hash;      /* (1 << STACKSHOT_PLH_SHIFT) entry hash table: hash(ptr) -> array index */
1482 	int16_t                 plh_curgen_min; /* min idx seen for this gen */
1483 	int16_t                 plh_curgen_max; /* max idx seen for this gen */
1484 	uint8_t                 plh_curgen;     /* current gen */
1485 #if DEVELOPMENT || DEBUG
1486 	/* statistics */
1487 	uint32_t                plh_lookups;    /* # lookups or inserts */
1488 	uint32_t                plh_found;
1489 	uint32_t                plh_found_depth;
1490 	uint32_t                plh_insert;
1491 	uint32_t                plh_insert_depth;
1492 	uint32_t                plh_bad;
1493 	uint32_t                plh_bad_depth;
1494 	uint32_t                plh_lookup_send;
1495 	uint32_t                plh_lookup_receive;
1496 #define PLH_STAT_OP(...)    (void)(__VA_ARGS__)
1497 #else /* DEVELOPMENT || DEBUG */
1498 #define PLH_STAT_OP(...)    (void)(0)
1499 #endif /* DEVELOPMENT || DEBUG */
1500 } port_label_hash;
1501 
1502 #define STACKSHOT_PLH_SHIFT    7
1503 #define STACKSHOT_PLH_SIZE_MAX ((kdp_ipc_have_splabel)? 1024 : 0)
1504 size_t stackshot_port_label_size = (2 * (1u << STACKSHOT_PLH_SHIFT));
1505 #define STASKSHOT_PLH_SIZE(x) MIN((x), STACKSHOT_PLH_SIZE_MAX)
1506 
1507 static size_t
stackshot_plh_est_size(void)1508 stackshot_plh_est_size(void)
1509 {
1510 	struct port_label_hash *plh = &port_label_hash;
1511 	size_t size = STASKSHOT_PLH_SIZE(stackshot_port_label_size);
1512 
1513 	if (size == 0) {
1514 		return 0;
1515 	}
1516 #define SIZE_EST(x) ROUNDUP((x), sizeof (uintptr_t))
1517 	return SIZE_EST(size * sizeof(*plh->plh_array)) +
1518 	       SIZE_EST(size * sizeof(*plh->plh_chains)) +
1519 	       SIZE_EST(size * sizeof(*plh->plh_gen)) +
1520 	       SIZE_EST((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh->plh_hash));
1521 #undef SIZE_EST
1522 }
1523 
1524 static void
stackshot_plh_reset(void)1525 stackshot_plh_reset(void)
1526 {
1527 	port_label_hash = (struct port_label_hash){.plh_size = 0};  /* structure assignment */
1528 }
1529 
1530 static void
stackshot_plh_setup(kcdata_descriptor_t data)1531 stackshot_plh_setup(kcdata_descriptor_t data)
1532 {
1533 	struct port_label_hash plh = {
1534 		.plh_size = STASKSHOT_PLH_SIZE(stackshot_port_label_size),
1535 		.plh_count = 0,
1536 		.plh_curgen = 1,
1537 		.plh_curgen_min = STACKSHOT_PLH_SIZE_MAX,
1538 		.plh_curgen_max = 0,
1539 	};
1540 	stackshot_plh_reset();
1541 	size_t size = plh.plh_size;
1542 	if (size == 0) {
1543 		return;
1544 	}
1545 	plh.plh_array = kcdata_endalloc(data, size * sizeof(*plh.plh_array));
1546 	plh.plh_chains = kcdata_endalloc(data, size * sizeof(*plh.plh_chains));
1547 	plh.plh_gen = kcdata_endalloc(data, size * sizeof(*plh.plh_gen));
1548 	plh.plh_hash = kcdata_endalloc(data, (1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh.plh_hash));
1549 	if (plh.plh_array == NULL || plh.plh_chains == NULL || plh.plh_gen == NULL || plh.plh_hash == NULL) {
1550 		PLH_STAT_OP(port_label_hash.plh_bad++);
1551 		return;
1552 	}
1553 	for (int x = 0; x < size; x++) {
1554 		plh.plh_array[x] = NULL;
1555 		plh.plh_chains[x] = -1;
1556 		plh.plh_gen[x] = 0;
1557 	}
1558 	for (int x = 0; x < (1ul << STACKSHOT_PLH_SHIFT); x++) {
1559 		plh.plh_hash[x] = -1;
1560 	}
1561 	port_label_hash = plh;  /* structure assignment */
1562 }
1563 
1564 static int16_t
stackshot_plh_hash(struct ipc_service_port_label * ispl)1565 stackshot_plh_hash(struct ipc_service_port_label *ispl)
1566 {
1567 	uintptr_t ptr = (uintptr_t)ispl;
1568 	static_assert(STACKSHOT_PLH_SHIFT < 16, "plh_hash must fit in 15 bits");
1569 #define PLH_HASH_STEP(ptr, x) \
1570 	    ((((x) * STACKSHOT_PLH_SHIFT) < (sizeof(ispl) * CHAR_BIT)) ? ((ptr) >> ((x) * STACKSHOT_PLH_SHIFT)) : 0)
1571 	ptr ^= PLH_HASH_STEP(ptr, 16);
1572 	ptr ^= PLH_HASH_STEP(ptr, 8);
1573 	ptr ^= PLH_HASH_STEP(ptr, 4);
1574 	ptr ^= PLH_HASH_STEP(ptr, 2);
1575 	ptr ^= PLH_HASH_STEP(ptr, 1);
1576 #undef PLH_HASH_STEP
1577 	return (int16_t)(ptr & ((1ul << STACKSHOT_PLH_SHIFT) - 1));
1578 }
1579 
1580 enum stackshot_plh_lookup_type {
1581 	STACKSHOT_PLH_LOOKUP_UNKNOWN,
1582 	STACKSHOT_PLH_LOOKUP_SEND,
1583 	STACKSHOT_PLH_LOOKUP_RECEIVE,
1584 };
1585 
1586 static void
stackshot_plh_resetgen(void)1587 stackshot_plh_resetgen(void)
1588 {
1589 	struct port_label_hash *plh = &port_label_hash;
1590 	if (plh->plh_curgen_min == STACKSHOT_PLH_SIZE_MAX && plh->plh_curgen_max == 0) {
1591 		return;  // no lookups, nothing using the current generation
1592 	}
1593 	plh->plh_curgen++;
1594 	plh->plh_curgen_min = STACKSHOT_PLH_SIZE_MAX;
1595 	plh->plh_curgen_max = 0;
1596 	if (plh->plh_curgen == 0) { // wrapped, zero the array and increment the generation
1597 		for (int x = 0; x < plh->plh_size; x++) {
1598 			plh->plh_gen[x] = 0;
1599 		}
1600 		plh->plh_curgen = 1;
1601 	}
1602 }
1603 
1604 static int16_t
stackshot_plh_lookup(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)1605 stackshot_plh_lookup(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
1606 {
1607 	struct port_label_hash *plh = &port_label_hash;
1608 	int depth;
1609 	int16_t cur;
1610 	if (ispl == NULL) {
1611 		return STACKSHOT_PORTLABELID_NONE;
1612 	}
1613 	switch (type) {
1614 	case STACKSHOT_PLH_LOOKUP_SEND:
1615 		PLH_STAT_OP(plh->plh_lookup_send++);
1616 		break;
1617 	case STACKSHOT_PLH_LOOKUP_RECEIVE:
1618 		PLH_STAT_OP(plh->plh_lookup_receive++);
1619 		break;
1620 	default:
1621 		break;
1622 	}
1623 	PLH_STAT_OP(plh->plh_lookups++);
1624 	if (plh->plh_size == 0) {
1625 		return STACKSHOT_PORTLABELID_MISSING;
1626 	}
1627 	int16_t hash = stackshot_plh_hash(ispl);
1628 	assert(hash >= 0 && hash < (1ul << STACKSHOT_PLH_SHIFT));
1629 	depth = 0;
1630 	for (cur = plh->plh_hash[hash]; cur >= 0; cur = plh->plh_chains[cur]) {
1631 		/* cur must be in-range, and chain depth can never be above our # allocated */
1632 		if (cur >= plh->plh_count || depth > plh->plh_count || depth > plh->plh_size) {
1633 			PLH_STAT_OP((plh->plh_bad++), (plh->plh_bad_depth += depth));
1634 			return STACKSHOT_PORTLABELID_MISSING;
1635 		}
1636 		assert(cur < plh->plh_count);
1637 		if (plh->plh_array[cur] == ispl) {
1638 			PLH_STAT_OP((plh->plh_found++), (plh->plh_found_depth += depth));
1639 			goto found;
1640 		}
1641 		depth++;
1642 	}
1643 	/* not found in hash table, so alloc and insert it */
1644 	if (cur != -1) {
1645 		PLH_STAT_OP((plh->plh_bad++), (plh->plh_bad_depth += depth));
1646 		return STACKSHOT_PORTLABELID_MISSING; /* bad end of chain */
1647 	}
1648 	PLH_STAT_OP((plh->plh_insert++), (plh->plh_insert_depth += depth));
1649 	if (plh->plh_count >= plh->plh_size) {
1650 		return STACKSHOT_PORTLABELID_MISSING; /* no space */
1651 	}
1652 	cur = plh->plh_count;
1653 	plh->plh_count++;
1654 	plh->plh_array[cur] = ispl;
1655 	plh->plh_chains[cur] = plh->plh_hash[hash];
1656 	plh->plh_hash[hash] = cur;
1657 found:
1658 	plh->plh_gen[cur] = plh->plh_curgen;
1659 	if (plh->plh_curgen_min > cur) {
1660 		plh->plh_curgen_min = cur;
1661 	}
1662 	if (plh->plh_curgen_max < cur) {
1663 		plh->plh_curgen_max = cur;
1664 	}
1665 	return cur + 1;   /* offset to avoid 0 */
1666 }
1667 
1668 // record any PLH referenced since the last stackshot_plh_resetgen() call
1669 static kern_return_t
kdp_stackshot_plh_record(void)1670 kdp_stackshot_plh_record(void)
1671 {
1672 	kern_return_t error = KERN_SUCCESS;
1673 	struct port_label_hash *plh = &port_label_hash;
1674 	uint16_t count = plh->plh_count;
1675 	uint8_t curgen = plh->plh_curgen;
1676 	int16_t curgen_min = plh->plh_curgen_min;
1677 	int16_t curgen_max = plh->plh_curgen_max;
1678 	if (curgen_min <= curgen_max && curgen_max < count &&
1679 	    count <= plh->plh_size && plh->plh_size <= STACKSHOT_PLH_SIZE_MAX) {
1680 		struct ipc_service_port_label **arr = plh->plh_array;
1681 		size_t ispl_size, max_namelen;
1682 		kdp_ipc_splabel_size(&ispl_size, &max_namelen);
1683 		for (int idx = curgen_min; idx <= curgen_max; idx++) {
1684 			struct ipc_service_port_label *ispl = arr[idx];
1685 			struct portlabel_info spl = {
1686 				.portlabel_id = (idx + 1),
1687 			};
1688 			const char *name = NULL;
1689 			long name_sz = 0;
1690 			if (plh->plh_gen[idx] != curgen) {
1691 				continue;
1692 			}
1693 			if (_stackshot_validate_kva((vm_offset_t)ispl, ispl_size)) {
1694 				kdp_ipc_fill_splabel(ispl, &spl, &name);
1695 			}
1696 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
1697 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
1698 			if (name != NULL && (name_sz = _stackshot_strlen(name, max_namelen)) > 0) {   /* validates the kva */
1699 				kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL_NAME, name_sz + 1, name));
1700 			} else {
1701 				spl.portlabel_flags |= STACKSHOT_PORTLABEL_READFAILED;
1702 			}
1703 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL, sizeof(spl), &spl));
1704 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
1705 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
1706 		}
1707 	}
1708 
1709 error_exit:
1710 	return error;
1711 }
1712 
1713 #if DEVELOPMENT || DEBUG
1714 static kern_return_t
kdp_stackshot_plh_stats(void)1715 kdp_stackshot_plh_stats(void)
1716 {
1717 	kern_return_t error = KERN_SUCCESS;
1718 	struct port_label_hash *plh = &port_label_hash;
1719 
1720 #define PLH_STAT(x) do { if (plh->x != 0) { \
1721 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, plh->x, "stackshot_" #x)); \
1722 } } while (0)
1723 	PLH_STAT(plh_size);
1724 	PLH_STAT(plh_lookups);
1725 	PLH_STAT(plh_found);
1726 	PLH_STAT(plh_found_depth);
1727 	PLH_STAT(plh_insert);
1728 	PLH_STAT(plh_insert_depth);
1729 	PLH_STAT(plh_bad);
1730 	PLH_STAT(plh_bad_depth);
1731 	PLH_STAT(plh_lookup_send);
1732 	PLH_STAT(plh_lookup_receive);
1733 #undef PLH_STAT
1734 
1735 error_exit:
1736 	return error;
1737 }
1738 #endif /* DEVELOPMENT || DEBUG */
1739 
1740 static uint64_t
kcdata_get_task_ss_flags(task_t task)1741 kcdata_get_task_ss_flags(task_t task)
1742 {
1743 	uint64_t ss_flags = 0;
1744 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
1745 	void *bsd_info = get_bsdtask_info(task);
1746 
1747 	if (task_64bit_addr) {
1748 		ss_flags |= kUser64_p;
1749 	}
1750 	if (!task->active || task_is_a_corpse(task) || proc_exiting(bsd_info)) {
1751 		ss_flags |= kTerminatedSnapshot;
1752 	}
1753 	if (task->pidsuspended) {
1754 		ss_flags |= kPidSuspended;
1755 	}
1756 	if (task->frozen) {
1757 		ss_flags |= kFrozen;
1758 	}
1759 	if (task->effective_policy.tep_darwinbg == 1) {
1760 		ss_flags |= kTaskDarwinBG;
1761 	}
1762 	if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
1763 		ss_flags |= kTaskIsForeground;
1764 	}
1765 	if (task->requested_policy.trp_boosted == 1) {
1766 		ss_flags |= kTaskIsBoosted;
1767 	}
1768 	if (task->effective_policy.tep_sup_active == 1) {
1769 		ss_flags |= kTaskIsSuppressed;
1770 	}
1771 #if CONFIG_MEMORYSTATUS
1772 
1773 	boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
1774 	memorystatus_proc_flags_unsafe(bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
1775 	if (dirty) {
1776 		ss_flags |= kTaskIsDirty;
1777 	}
1778 	if (dirty_tracked) {
1779 		ss_flags |= kTaskIsDirtyTracked;
1780 	}
1781 	if (allow_idle_exit) {
1782 		ss_flags |= kTaskAllowIdleExit;
1783 	}
1784 
1785 #endif
1786 	if (task->effective_policy.tep_tal_engaged) {
1787 		ss_flags |= kTaskTALEngaged;
1788 	}
1789 
1790 	ss_flags |= (0x7 & workqueue_get_pwq_state_kdp(bsd_info)) << 17;
1791 
1792 #if IMPORTANCE_INHERITANCE
1793 	if (task->task_imp_base) {
1794 		if (task->task_imp_base->iit_donor) {
1795 			ss_flags |= kTaskIsImpDonor;
1796 		}
1797 		if (task->task_imp_base->iit_live_donor) {
1798 			ss_flags |= kTaskIsLiveImpDonor;
1799 		}
1800 	}
1801 #endif
1802 	return ss_flags;
1803 }
1804 
1805 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)1806 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
1807 {
1808 	kern_return_t error = KERN_SUCCESS;
1809 
1810 	uint64_t shared_cache_slide = 0;
1811 	uint64_t shared_cache_first_mapping = 0;
1812 	uint32_t kdp_fault_results = 0;
1813 	uint32_t shared_cache_id = 0;
1814 	struct dyld_shared_cache_loadinfo shared_cache_data = {0};
1815 
1816 
1817 	assert(task_snap_ss_flags != NULL);
1818 
1819 	/* Get basic info about the shared region pointer, regardless of any failures */
1820 	if (task->shared_region == NULL) {
1821 		*task_snap_ss_flags |= kTaskSharedRegionNone;
1822 	} else if (task->shared_region == primary_system_shared_region) {
1823 		*task_snap_ss_flags |= kTaskSharedRegionSystem;
1824 	} else {
1825 		*task_snap_ss_flags |= kTaskSharedRegionOther;
1826 	}
1827 
1828 	if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
1829 		struct vm_shared_region *sr = task->shared_region;
1830 		shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
1831 
1832 		shared_cache_id = sr->sr_id;
1833 	} else {
1834 		*task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
1835 		goto error_exit;
1836 	}
1837 
1838 	/* We haven't copied in the shared region UUID yet as part of setup */
1839 	if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
1840 		goto error_exit;
1841 	}
1842 
1843 
1844 	/*
1845 	 * No refcounting here, but we are in debugger context, so that should be safe.
1846 	 */
1847 	shared_cache_slide = task->shared_region->sr_slide;
1848 
1849 	if (task->shared_region == primary_system_shared_region) {
1850 		/* skip adding shared cache info -- it's the same as the system level one */
1851 		goto error_exit;
1852 	}
1853 	/*
1854 	 * New-style shared cache reference: for non-primary shared regions,
1855 	 * just include the ID of the shared cache we're attached to.  Consumers
1856 	 * should use the following info from the task's ts_ss_flags as well:
1857 	 *
1858 	 * kTaskSharedRegionNone - task is not attached to a shared region
1859 	 * kTaskSharedRegionSystem - task is attached to the shared region
1860 	 *     with kSharedCacheSystemPrimary set in sharedCacheFlags.
1861 	 * kTaskSharedRegionOther - task is attached to the shared region with
1862 	 *     sharedCacheID matching the STACKSHOT_KCTYPE_SHAREDCACHE_ID entry.
1863 	 */
1864 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_ID, sizeof(shared_cache_id), &shared_cache_id));
1865 
1866 	/*
1867 	 * For backwards compatibility; this should eventually be removed.
1868 	 *
1869 	 * Historically, this data was in a dyld_uuid_info_64 structure, but the
1870 	 * naming of both the structure and fields for this use wasn't great.  The
1871 	 * dyld_shared_cache_loadinfo structure has better names, but the same
1872 	 * layout and content as the original.
1873 	 *
1874 	 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
1875 	 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
1876 	 * entries; here, it's the slid first mapping, and we leave it that way
1877 	 * for backwards compatibility.
1878 	 */
1879 	shared_cache_data.sharedCacheSlide = shared_cache_slide;
1880 	kdp_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
1881 	shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
1882 	shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
1883 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
1884 
1885 error_exit:
1886 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
1887 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
1888 	}
1889 
1890 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
1891 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
1892 	}
1893 
1894 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
1895 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
1896 	}
1897 
1898 	return error;
1899 }
1900 
1901 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)1902 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
1903 {
1904 	bool save_loadinfo_p         = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
1905 	bool save_kextloadinfo_p     = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
1906 	bool save_compactinfo_p      = ((trace_flags & STACKSHOT_SAVE_DYLD_COMPACTINFO) != 0);
1907 	bool should_fault            = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
1908 
1909 	kern_return_t error        = KERN_SUCCESS;
1910 	mach_vm_address_t out_addr = 0;
1911 
1912 	mach_vm_address_t dyld_compactinfo_addr = 0;
1913 	uint32_t dyld_compactinfo_size = 0;
1914 
1915 	uint32_t uuid_info_count         = 0;
1916 	mach_vm_address_t uuid_info_addr = 0;
1917 	uint64_t uuid_info_timestamp     = 0;
1918 	kdp_fault_result_flags_t kdp_fault_results = 0;
1919 
1920 
1921 	assert(task_snap_ss_flags != NULL);
1922 
1923 	int task_pid     = pid_from_task(task);
1924 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
1925 
1926 	if ((save_loadinfo_p || save_compactinfo_p) && have_pmap && task->active && task_pid > 0) {
1927 		/* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
1928 		if (task_64bit_addr) {
1929 			struct user64_dyld_all_image_infos task_image_infos;
1930 			if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1931 			    sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1932 				uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
1933 				uuid_info_addr = task_image_infos.uuidArray;
1934 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1935 					uuid_info_timestamp = task_image_infos.timestamp;
1936 				}
1937 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
1938 					dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
1939 					dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
1940 				}
1941 
1942 			}
1943 		} else {
1944 			struct user32_dyld_all_image_infos task_image_infos;
1945 			if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1946 			    sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1947 				uuid_info_count = task_image_infos.uuidArrayCount;
1948 				uuid_info_addr = task_image_infos.uuidArray;
1949 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1950 					uuid_info_timestamp = task_image_infos.timestamp;
1951 				}
1952 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
1953 					dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
1954 					dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
1955 				}
1956 			}
1957 		}
1958 
1959 		/*
1960 		 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
1961 		 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
1962 		 * for this task.
1963 		 */
1964 		if (!uuid_info_addr) {
1965 			uuid_info_count = 0;
1966 		}
1967 
1968 		if (!dyld_compactinfo_addr) {
1969 			dyld_compactinfo_size = 0;
1970 		}
1971 
1972 	}
1973 
1974 	if (have_pmap && task_pid == 0) {
1975 		if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
1976 			uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
1977 		} else {
1978 			uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
1979 		}
1980 	}
1981 
1982 	if (save_compactinfo_p && task_pid > 0) {
1983 		if (dyld_compactinfo_size == 0) {
1984 			*task_snap_ss_flags |= kTaskDyldCompactInfoNone;
1985 		} else if (dyld_compactinfo_size > MAX_DYLD_COMPACTINFO) {
1986 			*task_snap_ss_flags |= kTaskDyldCompactInfoTooBig;
1987 		} else {
1988 			kdp_fault_result_flags_t ci_kdp_fault_results = 0;
1989 
1990 			/* Open a compression window to avoid overflowing the stack */
1991 			kcdata_compression_window_open(kcd);
1992 			kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_DYLD_COMPACTINFO,
1993 			    dyld_compactinfo_size, &out_addr));
1994 
1995 			if (!stackshot_copyin(task->map, dyld_compactinfo_addr, (void *)out_addr,
1996 			    dyld_compactinfo_size, should_fault, &ci_kdp_fault_results)) {
1997 				bzero((void *)out_addr, dyld_compactinfo_size);
1998 			}
1999 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
2000 				*task_snap_ss_flags |= kTaskDyldCompactInfoMissing;
2001 			}
2002 
2003 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
2004 				*task_snap_ss_flags |= kTaskDyldCompactInfoTriedFault;
2005 			}
2006 
2007 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
2008 				*task_snap_ss_flags |= kTaskDyldCompactInfoFaultedIn;
2009 			}
2010 
2011 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
2012 		}
2013 	}
2014 	if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
2015 		uint32_t copied_uuid_count = 0;
2016 		uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
2017 		uint32_t uuid_info_array_size = 0;
2018 
2019 		/* Open a compression window to avoid overflowing the stack */
2020 		kcdata_compression_window_open(kcd);
2021 
2022 		/* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
2023 		if (uuid_info_count > 0) {
2024 			uuid_info_array_size = uuid_info_count * uuid_info_size;
2025 
2026 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
2027 			    uuid_info_size, uuid_info_count, &out_addr));
2028 
2029 			if (!stackshot_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
2030 				bzero((void *)out_addr, uuid_info_array_size);
2031 			} else {
2032 				copied_uuid_count = uuid_info_count;
2033 			}
2034 		}
2035 
2036 		uuid_t binary_uuid;
2037 		if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
2038 			/* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
2039 			if (uuid_info_array_size == 0) {
2040 				/* We just need to store one UUID */
2041 				uuid_info_array_size = uuid_info_size;
2042 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
2043 				    uuid_info_size, 1, &out_addr));
2044 			}
2045 
2046 			if (task_64bit_addr) {
2047 				struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
2048 				uint64_t image_load_address = task->mach_header_vm_address;
2049 
2050 				kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
2051 				kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
2052 			} else {
2053 				struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
2054 				uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
2055 
2056 				kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
2057 				kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
2058 			}
2059 		}
2060 
2061 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
2062 	} else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
2063 		uintptr_t image_load_address;
2064 
2065 		do {
2066 #if defined(__arm64__)
2067 			if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
2068 				struct dyld_uuid_info_64 kc_uuid = {0};
2069 				kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
2070 				kdp_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
2071 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
2072 				break;
2073 			}
2074 #endif /* defined(__arm64__) */
2075 
2076 			if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
2077 				/* Kernel UUID not found or inaccessible */
2078 				break;
2079 			}
2080 
2081 			uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
2082 			if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
2083 				uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
2084 #if  defined(__arm64__)
2085 				kc_format_t primary_kc_type = KCFormatUnknown;
2086 				if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
2087 					/* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
2088 					uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
2089 				}
2090 #endif
2091 			}
2092 
2093 			/*
2094 			 * The element count of the array can vary - avoid overflowing the
2095 			 * stack by opening a window.
2096 			 */
2097 			kcdata_compression_window_open(kcd);
2098 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
2099 			    sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
2100 			kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
2101 
2102 			image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
2103 #if defined(__arm64__)
2104 			if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
2105 				/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
2106 				extern vm_offset_t segTEXTEXECB;
2107 				image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
2108 			}
2109 #endif
2110 			uuid_info_array[0].imageLoadAddress = image_load_address;
2111 			kdp_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
2112 
2113 			if (save_kextloadinfo_p &&
2114 			    _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
2115 			    _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
2116 			    gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
2117 				uint32_t kexti;
2118 				for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
2119 					image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
2120 #if defined(__arm64__)
2121 					if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
2122 						/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
2123 						image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
2124 					}
2125 #endif
2126 					uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
2127 					kdp_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
2128 				}
2129 			}
2130 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
2131 		} while (0);
2132 	}
2133 
2134 error_exit:
2135 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
2136 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
2137 	}
2138 
2139 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
2140 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
2141 	}
2142 
2143 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
2144 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
2145 	}
2146 
2147 	return error;
2148 }
2149 
2150 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)2151 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
2152 {
2153 	kern_return_t error = KERN_SUCCESS;
2154 	mach_vm_address_t out_addr = 0;
2155 
2156 	/* I/O Statistics if any counters are non zero */
2157 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
2158 	if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
2159 		/* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
2160 		kcdata_compression_window_open(kcd);
2161 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
2162 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
2163 		_iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
2164 		_iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
2165 		_iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
2166 		_iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
2167 		_iostat->ss_paging_count = task->task_io_stats->paging.count;
2168 		_iostat->ss_paging_size = task->task_io_stats->paging.size;
2169 		_iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
2170 		_iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
2171 		_iostat->ss_metadata_count = task->task_io_stats->metadata.count;
2172 		_iostat->ss_metadata_size = task->task_io_stats->metadata.size;
2173 		_iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
2174 		_iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
2175 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
2176 			_iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
2177 			_iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
2178 		}
2179 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
2180 	}
2181 
2182 
2183 error_exit:
2184 	return error;
2185 }
2186 
2187 #if CONFIG_PERVASIVE_CPI
2188 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)2189 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
2190 {
2191 	struct instrs_cycles_snapshot_v2 instrs_cycles = { 0 };
2192 	struct recount_usage usage = { 0 };
2193 	struct recount_usage perf_only = { 0 };
2194 	recount_task_terminated_usage_perf_only(task, &usage, &perf_only);
2195 	instrs_cycles.ics_instructions = recount_usage_instructions(&usage);
2196 	instrs_cycles.ics_cycles = recount_usage_cycles(&usage);
2197 	instrs_cycles.ics_p_instructions = recount_usage_instructions(&perf_only);
2198 	instrs_cycles.ics_p_cycles = recount_usage_cycles(&perf_only);
2199 
2200 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
2201 }
2202 #endif /* CONFIG_PERVASIVE_CPI */
2203 
2204 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)2205 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
2206 {
2207 	struct stackshot_cpu_architecture cpu_architecture = {0};
2208 	int32_t cputype;
2209 	int32_t cpusubtype;
2210 
2211 	proc_archinfo_kdp(get_bsdtask_info(task), &cputype, &cpusubtype);
2212 	cpu_architecture.cputype = cputype;
2213 	cpu_architecture.cpusubtype = cpusubtype;
2214 
2215 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
2216 }
2217 
2218 static kern_return_t
kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd,task_t task)2219 kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd, task_t task)
2220 {
2221 	struct stackshot_task_codesigning_info codesigning_info = {};
2222 	void * bsdtask_info = NULL;
2223 	uint32_t trust = 0;
2224 	kern_return_t ret = 0;
2225 	pmap_t pmap = get_task_pmap(task);
2226 	if (task != kernel_task) {
2227 		bsdtask_info = get_bsdtask_info(task);
2228 		codesigning_info.csflags = proc_getcsflags_kdp(bsdtask_info);
2229 		ret = get_trust_level_kdp(pmap, &trust);
2230 		if (ret != KERN_SUCCESS) {
2231 			trust = KCDATA_INVALID_CS_TRUST_LEVEL;
2232 		}
2233 		codesigning_info.cs_trust_level = trust;
2234 	} else {
2235 		return KERN_SUCCESS;
2236 	}
2237 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_CODESIGNING_INFO, sizeof(struct stackshot_task_codesigning_info), &codesigning_info);
2238 }
2239 #if CONFIG_TASK_SUSPEND_STATS
2240 static kern_return_t
kcdata_record_task_suspension_info(kcdata_descriptor_t kcd,task_t task)2241 kcdata_record_task_suspension_info(kcdata_descriptor_t kcd, task_t task)
2242 {
2243 	kern_return_t ret = KERN_SUCCESS;
2244 	struct stackshot_suspension_info suspension_info = {};
2245 	task_suspend_stats_data_t suspend_stats;
2246 	task_suspend_source_array_t suspend_sources;
2247 	struct stackshot_suspension_source suspension_sources[TASK_SUSPEND_SOURCES_MAX];
2248 	int i;
2249 
2250 	if (task == kernel_task) {
2251 		return KERN_SUCCESS;
2252 	}
2253 
2254 	ret = task_get_suspend_stats_kdp(task, &suspend_stats);
2255 	if (ret != KERN_SUCCESS) {
2256 		return ret;
2257 	}
2258 
2259 	suspension_info.tss_count = suspend_stats.tss_count;
2260 	suspension_info.tss_duration = suspend_stats.tss_duration;
2261 	suspension_info.tss_last_end = suspend_stats.tss_last_end;
2262 	suspension_info.tss_last_start = suspend_stats.tss_last_start;
2263 	ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_SUSPENSION_INFO, sizeof(suspension_info), &suspension_info);
2264 	if (ret != KERN_SUCCESS) {
2265 		return ret;
2266 	}
2267 
2268 	ret = task_get_suspend_sources_kdp(task, suspend_sources);
2269 	if (ret != KERN_SUCCESS) {
2270 		return ret;
2271 	}
2272 
2273 	for (i = 0; i < TASK_SUSPEND_SOURCES_MAX; ++i) {
2274 		suspension_sources[i].tss_pid = suspend_sources[i].tss_pid;
2275 		strlcpy(suspension_sources[i].tss_procname, suspend_sources[i].tss_procname, sizeof(suspend_sources[i].tss_procname));
2276 		suspension_sources[i].tss_tid = suspend_sources[i].tss_tid;
2277 		suspension_sources[i].tss_time = suspend_sources[i].tss_time;
2278 	}
2279 	return kcdata_push_array(kcd, STACKSHOT_KCTYPE_SUSPENSION_SOURCE, sizeof(suspension_sources[0]), TASK_SUSPEND_SOURCES_MAX, &suspension_sources);
2280 }
2281 #endif /* CONFIG_TASK_SUSPEND_STATS */
2282 
2283 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)2284 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
2285 {
2286 	kern_return_t error                 = KERN_SUCCESS;
2287 	mach_vm_address_t out_addr          = 0;
2288 	struct transitioning_task_snapshot * cur_tsnap = NULL;
2289 
2290 	int task_pid           = pid_from_task(task);
2291 	/* Is returning -1 ok for terminating task ok ??? */
2292 	uint64_t task_uniqueid = get_task_uniqueid(task);
2293 
2294 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
2295 		/*
2296 		 * if this task is a transit task from another one, show the pid as
2297 		 * negative
2298 		 */
2299 		task_pid = 0 - task_pid;
2300 	}
2301 
2302 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
2303 	kcdata_compression_window_open(kcd);
2304 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
2305 	cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
2306 	bzero(cur_tsnap, sizeof(*cur_tsnap));
2307 
2308 	cur_tsnap->tts_unique_pid = task_uniqueid;
2309 	cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task);
2310 	cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
2311 	cur_tsnap->tts_transition_type = transition_type;
2312 	cur_tsnap->tts_pid = task_pid;
2313 
2314 	/* Add the BSD process identifiers */
2315 	if (task_pid != -1 && get_bsdtask_info(task) != NULL) {
2316 		proc_name_kdp(get_bsdtask_info(task), cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
2317 	} else {
2318 		cur_tsnap->tts_p_comm[0] = '\0';
2319 	}
2320 
2321 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
2322 
2323 error_exit:
2324 	return error;
2325 }
2326 
2327 static kern_return_t
2328 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)2329 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
2330 #else
2331 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
2332 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2333 {
2334 	bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2335 	bool collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
2336 #if CONFIG_PERVASIVE_CPI
2337 	bool collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
2338 #endif /* CONFIG_PERVASIVE_CPI */
2339 #if __arm64__
2340 	bool collect_asid            = ((trace_flags & STACKSHOT_ASID) != 0);
2341 #endif
2342 	bool collect_pagetables      = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
2343 
2344 
2345 	kern_return_t error                 = KERN_SUCCESS;
2346 	mach_vm_address_t out_addr          = 0;
2347 	struct task_snapshot_v2 * cur_tsnap = NULL;
2348 #if STACKSHOT_COLLECTS_LATENCY_INFO
2349 	latency_info->cur_tsnap_latency = mach_absolute_time();
2350 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2351 
2352 	int task_pid           = pid_from_task(task);
2353 	uint64_t task_uniqueid = get_task_uniqueid(task);
2354 	void *bsd_info = get_bsdtask_info(task);
2355 	uint64_t proc_starttime_secs = 0;
2356 
2357 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
2358 		/*
2359 		 * if this task is a transit task from another one, show the pid as
2360 		 * negative
2361 		 */
2362 		task_pid = 0 - task_pid;
2363 	}
2364 
2365 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
2366 	kcdata_compression_window_open(kcd);
2367 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
2368 	cur_tsnap = (struct task_snapshot_v2 *)out_addr;
2369 	bzero(cur_tsnap, sizeof(*cur_tsnap));
2370 
2371 	cur_tsnap->ts_unique_pid = task_uniqueid;
2372 	cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
2373 	cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
2374 
2375 	struct recount_usage term_usage = { 0 };
2376 	recount_task_terminated_usage(task, &term_usage);
2377 	struct recount_times_mach term_times = recount_usage_times_mach(&term_usage);
2378 	cur_tsnap->ts_user_time_in_terminated_threads = term_times.rtm_user;
2379 	cur_tsnap->ts_system_time_in_terminated_threads = term_times.rtm_system;
2380 
2381 	proc_starttime_kdp(bsd_info, &proc_starttime_secs, NULL, NULL);
2382 	cur_tsnap->ts_p_start_sec = proc_starttime_secs;
2383 	cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
2384 	cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
2385 	cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
2386 	cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
2387 
2388 	cur_tsnap->ts_suspend_count = task->suspend_count;
2389 	cur_tsnap->ts_faults = counter_load(&task->faults);
2390 	cur_tsnap->ts_pageins = counter_load(&task->pageins);
2391 	cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
2392 	cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
2393 	    LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
2394 	cur_tsnap->ts_pid = task_pid;
2395 
2396 	/* Add the BSD process identifiers */
2397 	if (task_pid != -1 && bsd_info != NULL) {
2398 		proc_name_kdp(bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
2399 	} else {
2400 		cur_tsnap->ts_p_comm[0] = '\0';
2401 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
2402 		if (task->task_imp_base != NULL) {
2403 			kdp_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
2404 			    MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
2405 		}
2406 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
2407 	}
2408 
2409 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
2410 
2411 #if CONFIG_COALITIONS
2412 	if (task_pid != -1 && bsd_info != NULL &&
2413 	    (task->coalition[COALITION_TYPE_JETSAM] != NULL)) {
2414 		/*
2415 		 * The jetsam coalition ID is always saved, even if
2416 		 * STACKSHOT_SAVE_JETSAM_COALITIONS is not set.
2417 		 */
2418 		uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
2419 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
2420 	}
2421 #endif /* CONFIG_COALITIONS */
2422 
2423 #if __arm64__
2424 	if (collect_asid && have_pmap) {
2425 		uint32_t asid = PMAP_VASID(task->map->pmap);
2426 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
2427 	}
2428 #endif
2429 
2430 #if STACKSHOT_COLLECTS_LATENCY_INFO
2431 	latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
2432 	latency_info->pmap_latency = mach_absolute_time();
2433 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2434 
2435 	if (collect_pagetables && have_pmap) {
2436 #if SCHED_HYGIENE_DEBUG
2437 		// pagetable dumps can be large; reset the interrupt timeout to avoid a panic
2438 		ml_spin_debug_clear_self();
2439 #endif
2440 		size_t bytes_dumped = 0;
2441 		error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stack_snapshot_pagetable_mask, &bytes_dumped);
2442 		if (error != KERN_SUCCESS) {
2443 			goto error_exit;
2444 		} else {
2445 			/* Variable size array - better not have it on the stack. */
2446 			kcdata_compression_window_open(kcd);
2447 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
2448 			    sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
2449 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
2450 		}
2451 	}
2452 
2453 #if STACKSHOT_COLLECTS_LATENCY_INFO
2454 	latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
2455 	latency_info->bsd_proc_ids_latency = mach_absolute_time();
2456 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2457 
2458 #if STACKSHOT_COLLECTS_LATENCY_INFO
2459 	latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
2460 	latency_info->end_latency = mach_absolute_time();
2461 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2462 
2463 	if (collect_iostats) {
2464 		kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
2465 	}
2466 
2467 #if CONFIG_PERVASIVE_CPI
2468 	if (collect_instrs_cycles) {
2469 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
2470 	}
2471 #endif /* CONFIG_PERVASIVE_CPI */
2472 
2473 	kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
2474 	kcd_exit_on_error(kcdata_record_task_codesigning_info(kcd, task));
2475 
2476 #if CONFIG_TASK_SUSPEND_STATS
2477 	kcd_exit_on_error(kcdata_record_task_suspension_info(kcd, task));
2478 #endif /* CONFIG_TASK_SUSPEND_STATS */
2479 
2480 #if STACKSHOT_COLLECTS_LATENCY_INFO
2481 	latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
2482 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2483 
2484 error_exit:
2485 	return error;
2486 }
2487 
2488 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)2489 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
2490 {
2491 #if !CONFIG_PERVASIVE_CPI
2492 #pragma unused(trace_flags)
2493 #endif /* !CONFIG_PERVASIVE_CPI */
2494 	kern_return_t error                       = KERN_SUCCESS;
2495 	struct task_delta_snapshot_v2 * cur_tsnap = NULL;
2496 	mach_vm_address_t out_addr                = 0;
2497 	(void) trace_flags;
2498 #if __arm64__
2499 	boolean_t collect_asid                    = ((trace_flags & STACKSHOT_ASID) != 0);
2500 #endif
2501 #if CONFIG_PERVASIVE_CPI
2502 	boolean_t collect_instrs_cycles           = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
2503 #endif /* CONFIG_PERVASIVE_CPI */
2504 
2505 	uint64_t task_uniqueid = get_task_uniqueid(task);
2506 
2507 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
2508 
2509 	cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
2510 
2511 	cur_tsnap->tds_unique_pid = task_uniqueid;
2512 	cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
2513 	cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
2514 
2515 	struct recount_usage usage = { 0 };
2516 	recount_task_terminated_usage(task, &usage);
2517 	struct recount_times_mach term_times = recount_usage_times_mach(&usage);
2518 
2519 	cur_tsnap->tds_user_time_in_terminated_threads = term_times.rtm_user;
2520 	cur_tsnap->tds_system_time_in_terminated_threads = term_times.rtm_system;
2521 
2522 	cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
2523 
2524 	cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
2525 	cur_tsnap->tds_suspend_count = task->suspend_count;
2526 	cur_tsnap->tds_faults            = counter_load(&task->faults);
2527 	cur_tsnap->tds_pageins           = counter_load(&task->pageins);
2528 	cur_tsnap->tds_cow_faults        = counter_load(&task->cow_faults);
2529 	cur_tsnap->tds_was_throttled     = (uint32_t)proc_was_throttled_from_task(task);
2530 	cur_tsnap->tds_did_throttle      = (uint32_t)proc_did_throttle_from_task(task);
2531 	cur_tsnap->tds_latency_qos       = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
2532 	    ? LATENCY_QOS_TIER_UNSPECIFIED
2533 	    : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
2534 
2535 #if __arm64__
2536 	if (collect_asid && have_pmap) {
2537 		uint32_t asid = PMAP_VASID(task->map->pmap);
2538 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
2539 		kdp_memcpy((void*)out_addr, &asid, sizeof(asid));
2540 	}
2541 #endif
2542 
2543 #if CONFIG_PERVASIVE_CPI
2544 	if (collect_instrs_cycles) {
2545 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
2546 	}
2547 #endif /* CONFIG_PERVASIVE_CPI */
2548 
2549 error_exit:
2550 	return error;
2551 }
2552 
2553 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)2554 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
2555 {
2556 	kern_return_t error = KERN_SUCCESS;
2557 	mach_vm_address_t out_addr = 0;
2558 
2559 	/* I/O Statistics */
2560 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
2561 	if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
2562 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
2563 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
2564 		_iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
2565 		_iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
2566 		_iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
2567 		_iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
2568 		_iostat->ss_paging_count = thread->thread_io_stats->paging.count;
2569 		_iostat->ss_paging_size = thread->thread_io_stats->paging.size;
2570 		_iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
2571 		_iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
2572 		_iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
2573 		_iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
2574 		_iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
2575 		_iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
2576 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
2577 			_iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
2578 			_iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
2579 		}
2580 	}
2581 
2582 error_exit:
2583 	return error;
2584 }
2585 
2586 bool
machine_trace_thread_validate_kva(vm_offset_t addr)2587 machine_trace_thread_validate_kva(vm_offset_t addr)
2588 {
2589 	return _stackshot_validate_kva(addr, sizeof(uintptr_t));
2590 }
2591 
2592 struct _stackshot_backtrace_context {
2593 	vm_map_t sbc_map;
2594 	vm_offset_t sbc_prev_page;
2595 	vm_offset_t sbc_prev_kva;
2596 	uint32_t sbc_flags;
2597 	bool sbc_allow_faulting;
2598 };
2599 
2600 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)2601 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
2602 {
2603 	struct _stackshot_backtrace_context *ctx = vctx;
2604 	size_t map_page_mask = 0;
2605 	size_t __assert_only map_page_size = kdp_vm_map_get_page_size(ctx->sbc_map,
2606 	    &map_page_mask);
2607 	assert(size < map_page_size);
2608 	if (src & (size - 1)) {
2609 		// The source should be aligned to the size passed in, like a stack
2610 		// frame or word.
2611 		return EINVAL;
2612 	}
2613 
2614 	vm_offset_t src_page = src & ~map_page_mask;
2615 	vm_offset_t src_kva = 0;
2616 
2617 	if (src_page != ctx->sbc_prev_page) {
2618 		uint32_t res = 0;
2619 		uint32_t flags = 0;
2620 		vm_offset_t src_pa = stackshot_find_phys(ctx->sbc_map, src,
2621 		    ctx->sbc_allow_faulting, &res);
2622 
2623 		flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
2624 		flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
2625 		flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
2626 		ctx->sbc_flags |= flags;
2627 		if (src_pa == 0) {
2628 			return EFAULT;
2629 		}
2630 
2631 		src_kva = phystokv(src_pa);
2632 		ctx->sbc_prev_page = src_page;
2633 		ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
2634 	} else {
2635 		src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
2636 	}
2637 
2638 #if KASAN
2639 	/*
2640 	 * KASan does not monitor accesses to userspace pages. Therefore, it is
2641 	 * pointless to maintain a shadow map for them. Instead, they are all
2642 	 * mapped to a single, always valid shadow map page. This approach saves
2643 	 * a considerable amount of shadow map pages which are limited and
2644 	 * precious.
2645 	 */
2646 	kasan_notify_address_nopoison(src_kva, size);
2647 #endif
2648 	memcpy(dst, (const void *)src_kva, size);
2649 
2650 	return 0;
2651 }
2652 
2653 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)2654 kcdata_record_thread_snapshot(
2655 	kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
2656 {
2657 	boolean_t dispatch_p              = ((trace_flags & STACKSHOT_GET_DQ) != 0);
2658 	boolean_t active_kthreads_only_p  = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
2659 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2660 	boolean_t collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
2661 #if CONFIG_PERVASIVE_CPI
2662 	boolean_t collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
2663 #endif /* CONFIG_PERVASIVE_CPI */
2664 	kern_return_t error        = KERN_SUCCESS;
2665 
2666 #if STACKSHOT_COLLECTS_LATENCY_INFO
2667 	struct stackshot_latency_thread latency_info;
2668 	latency_info.cur_thsnap1_latency = mach_absolute_time();
2669 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2670 
2671 	mach_vm_address_t out_addr = 0;
2672 	int saved_count            = 0;
2673 
2674 	struct thread_snapshot_v4 * cur_thread_snap = NULL;
2675 	char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
2676 
2677 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
2678 	cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
2679 
2680 	/* Populate the thread snapshot header */
2681 	cur_thread_snap->ths_ss_flags = 0;
2682 	cur_thread_snap->ths_thread_id = thread_tid(thread);
2683 	cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
2684 	cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
2685 	cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
2686 
2687 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
2688 		cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
2689 	} else {
2690 		cur_thread_snap->ths_voucher_identifier = 0;
2691 	}
2692 
2693 #if STACKSHOT_COLLECTS_LATENCY_INFO
2694 	latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
2695 	latency_info.dispatch_serial_latency = mach_absolute_time();
2696 	latency_info.dispatch_label_latency = 0;
2697 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2698 
2699 	cur_thread_snap->ths_dqserialnum = 0;
2700 	if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
2701 		uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
2702 		if (dqkeyaddr != 0) {
2703 			uint64_t dqaddr = 0;
2704 			boolean_t copyin_ok = stackshot_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
2705 			if (copyin_ok && dqaddr != 0) {
2706 				uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
2707 				uint64_t dqserialnum = 0;
2708 				copyin_ok = stackshot_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
2709 				if (copyin_ok) {
2710 					cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
2711 					cur_thread_snap->ths_dqserialnum = dqserialnum;
2712 				}
2713 
2714 #if STACKSHOT_COLLECTS_LATENCY_INFO
2715 				latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
2716 				latency_info.dispatch_label_latency = mach_absolute_time();
2717 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2718 
2719 				/* try copying in the queue label */
2720 				uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
2721 				if (label_offs) {
2722 					uint64_t dqlabeladdr = dqaddr + label_offs;
2723 					uint64_t actual_dqlabeladdr = 0;
2724 
2725 					copyin_ok = stackshot_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
2726 					if (copyin_ok && actual_dqlabeladdr != 0) {
2727 						char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
2728 						int len;
2729 
2730 						bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
2731 						len = stackshot_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
2732 						if (len > 0) {
2733 							mach_vm_address_t label_addr = 0;
2734 							kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
2735 							kdp_strlcpy((char*)label_addr, &label_buf[0], len);
2736 						}
2737 					}
2738 				}
2739 #if STACKSHOT_COLLECTS_LATENCY_INFO
2740 				latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
2741 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2742 			}
2743 		}
2744 	}
2745 
2746 #if STACKSHOT_COLLECTS_LATENCY_INFO
2747 	if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
2748 		latency_info.dispatch_serial_latency = 0;
2749 	}
2750 	latency_info.cur_thsnap2_latency = mach_absolute_time();
2751 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2752 
2753 	struct recount_times_mach times = recount_thread_times(thread);
2754 	cur_thread_snap->ths_user_time = times.rtm_user;
2755 	cur_thread_snap->ths_sys_time = times.rtm_system;
2756 
2757 	if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
2758 		cur_thread_snap->ths_ss_flags |= kThreadMain;
2759 	}
2760 	if (thread->effective_policy.thep_darwinbg) {
2761 		cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
2762 	}
2763 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
2764 		cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
2765 	}
2766 	if (thread->suspend_count > 0) {
2767 		cur_thread_snap->ths_ss_flags |= kThreadSuspended;
2768 	}
2769 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
2770 		cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
2771 	}
2772 #if CONFIG_EXCLAVES
2773 	if ((thread->th_exclaves_state & TH_EXCLAVES_RPC) && stackshot_exclave_inspect_ctids && !panic_stackshot) {
2774 		/* save exclave thread for later collection */
2775 		if (stackshot_exclave_inspect_ctid_count < stackshot_exclave_inspect_ctid_capacity) {
2776 			/* certain threads, like the collector, must never be inspected */
2777 			if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) == 0) {
2778 				stackshot_exclave_inspect_ctids[stackshot_exclave_inspect_ctid_count] = thread_get_ctid(thread);
2779 				stackshot_exclave_inspect_ctid_count += 1;
2780 				if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_STACKSHOT) != 0) {
2781 					panic("stackshot: trying to inspect already-queued thread");
2782 				}
2783 			}
2784 		}
2785 	}
2786 #endif /* CONFIG_EXCLAVES */
2787 	if (thread_on_core) {
2788 		cur_thread_snap->ths_ss_flags |= kThreadOnCore;
2789 	}
2790 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
2791 		cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
2792 	}
2793 
2794 	/* make sure state flags defined in kcdata.h still match internal flags */
2795 	static_assert(SS_TH_WAIT == TH_WAIT);
2796 	static_assert(SS_TH_SUSP == TH_SUSP);
2797 	static_assert(SS_TH_RUN == TH_RUN);
2798 	static_assert(SS_TH_UNINT == TH_UNINT);
2799 	static_assert(SS_TH_TERMINATE == TH_TERMINATE);
2800 	static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
2801 	static_assert(SS_TH_IDLE == TH_IDLE);
2802 
2803 	cur_thread_snap->ths_last_run_time           = thread->last_run_time;
2804 	cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
2805 	cur_thread_snap->ths_state                   = thread->state;
2806 	cur_thread_snap->ths_sched_flags             = thread->sched_flags;
2807 	cur_thread_snap->ths_base_priority = thread->base_pri;
2808 	cur_thread_snap->ths_sched_priority = thread->sched_pri;
2809 	cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
2810 	cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
2811 	cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
2812 	    thread->requested_policy.thrp_qos_workq_override);
2813 	cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
2814 	cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
2815 
2816 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
2817 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
2818 	cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
2819 	cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
2820 
2821 #if STACKSHOT_COLLECTS_LATENCY_INFO
2822 	latency_info.cur_thsnap2_latency = mach_absolute_time()  - latency_info.cur_thsnap2_latency;
2823 	latency_info.thread_name_latency = mach_absolute_time();
2824 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2825 
2826 	/* if there is thread name then add to buffer */
2827 	cur_thread_name[0] = '\0';
2828 	proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
2829 	if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
2830 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
2831 		kdp_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
2832 	}
2833 
2834 #if STACKSHOT_COLLECTS_LATENCY_INFO
2835 	latency_info.thread_name_latency = mach_absolute_time()  - latency_info.thread_name_latency;
2836 	latency_info.sur_times_latency = mach_absolute_time();
2837 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2838 
2839 	/* record system, user, and runnable times */
2840 	time_value_t runnable_time;
2841 	thread_read_times(thread, NULL, NULL, &runnable_time);
2842 	clock_sec_t user_sec = 0, system_sec = 0;
2843 	clock_usec_t user_usec = 0, system_usec = 0;
2844 	absolutetime_to_microtime(times.rtm_user, &user_sec, &user_usec);
2845 	absolutetime_to_microtime(times.rtm_system, &system_sec, &system_usec);
2846 
2847 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
2848 	struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
2849 	*stackshot_cpu_times = (struct stackshot_cpu_times_v2){
2850 		.user_usec = user_sec * USEC_PER_SEC + user_usec,
2851 		.system_usec = system_sec * USEC_PER_SEC + system_usec,
2852 		.runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
2853 	};
2854 
2855 #if STACKSHOT_COLLECTS_LATENCY_INFO
2856 	latency_info.sur_times_latency = mach_absolute_time()  - latency_info.sur_times_latency;
2857 	latency_info.user_stack_latency = mach_absolute_time();
2858 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2859 
2860 	/* Trace user stack, if any */
2861 	if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
2862 		uint32_t user_ths_ss_flags = 0;
2863 
2864 		/*
2865 		 * This relies on knowing the "end" address points to the start of the
2866 		 * next elements data and, in the case of arrays, the elements.
2867 		 */
2868 		out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2869 		mach_vm_address_t max_addr = (mach_vm_address_t)kcd_max_address(kcd);
2870 		assert(out_addr <= max_addr);
2871 		size_t avail_frames = (max_addr - out_addr) / sizeof(uintptr_t);
2872 		size_t max_frames = MIN(avail_frames, MAX_FRAMES);
2873 		if (max_frames == 0) {
2874 			error = KERN_RESOURCE_SHORTAGE;
2875 			goto error_exit;
2876 		}
2877 		struct _stackshot_backtrace_context ctx = {
2878 			.sbc_map = task->map,
2879 			.sbc_allow_faulting = stack_enable_faulting,
2880 			.sbc_prev_page = -1,
2881 			.sbc_prev_kva = -1,
2882 		};
2883 		struct backtrace_control ctl = {
2884 			.btc_user_thread = thread,
2885 			.btc_user_copy = _stackshot_backtrace_copy,
2886 			.btc_user_copy_context = &ctx,
2887 		};
2888 		struct backtrace_user_info info = BTUINFO_INIT;
2889 
2890 		saved_count = backtrace_user((uintptr_t *)out_addr, max_frames, &ctl,
2891 		    &info);
2892 		if (saved_count > 0) {
2893 #if __LP64__
2894 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
2895 #else // __LP64__
2896 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
2897 #endif // !__LP64__
2898 			mach_vm_address_t out_addr_array;
2899 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd,
2900 			    STACKLR_WORDS, sizeof(uintptr_t), saved_count,
2901 			    &out_addr_array));
2902 			/*
2903 			 * Ensure the kcd_end_address (above) trick worked.
2904 			 */
2905 			assert(out_addr == out_addr_array);
2906 			if (info.btui_info & BTI_64_BIT) {
2907 				user_ths_ss_flags |= kUser64_p;
2908 			}
2909 			if ((info.btui_info & BTI_TRUNCATED) ||
2910 			    (ctx.sbc_flags & kThreadTruncatedBT)) {
2911 				user_ths_ss_flags |= kThreadTruncatedBT;
2912 				user_ths_ss_flags |= kThreadTruncUserBT;
2913 			}
2914 			user_ths_ss_flags |= ctx.sbc_flags;
2915 			ctx.sbc_flags = 0;
2916 #if __LP64__
2917 			/* We only support async stacks on 64-bit kernels */
2918 			if (info.btui_async_frame_addr != 0) {
2919 				uint32_t async_start_offset = info.btui_async_start_index;
2920 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
2921 				    sizeof(async_start_offset), &async_start_offset));
2922 				out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2923 				assert(out_addr <= max_addr);
2924 
2925 				avail_frames = (max_addr - out_addr) / sizeof(uintptr_t);
2926 				max_frames = MIN(avail_frames, MAX_FRAMES);
2927 				if (max_frames == 0) {
2928 					error = KERN_RESOURCE_SHORTAGE;
2929 					goto error_exit;
2930 				}
2931 				ctl.btc_frame_addr = info.btui_async_frame_addr;
2932 				ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
2933 				info = BTUINFO_INIT;
2934 				unsigned int async_count = backtrace_user((uintptr_t *)out_addr, max_frames, &ctl,
2935 				    &info);
2936 				if (async_count > 0) {
2937 					mach_vm_address_t async_out_addr;
2938 					kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd,
2939 					    STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64, sizeof(uintptr_t), async_count,
2940 					    &async_out_addr));
2941 					/*
2942 					 * Ensure the kcd_end_address (above) trick worked.
2943 					 */
2944 					assert(out_addr == async_out_addr);
2945 					if ((info.btui_info & BTI_TRUNCATED) ||
2946 					    (ctx.sbc_flags & kThreadTruncatedBT)) {
2947 						user_ths_ss_flags |= kThreadTruncatedBT;
2948 						user_ths_ss_flags |= kThreadTruncUserAsyncBT;
2949 					}
2950 					user_ths_ss_flags |= ctx.sbc_flags;
2951 				}
2952 			}
2953 #endif /* _LP64 */
2954 		}
2955 		if (user_ths_ss_flags != 0) {
2956 			cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
2957 		}
2958 	}
2959 
2960 #if STACKSHOT_COLLECTS_LATENCY_INFO
2961 	latency_info.user_stack_latency = mach_absolute_time()  - latency_info.user_stack_latency;
2962 	latency_info.kernel_stack_latency = mach_absolute_time();
2963 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2964 
2965 	/* Call through to the machine specific trace routines
2966 	 * Frames are added past the snapshot header.
2967 	 */
2968 	if (thread->kernel_stack != 0) {
2969 		uint32_t kern_ths_ss_flags = 0;
2970 		out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2971 #if defined(__LP64__)
2972 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
2973 		extern int machine_trace_thread64(thread_t thread, char *tracepos,
2974 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
2975 		saved_count = machine_trace_thread64(
2976 #else
2977 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
2978 		extern int machine_trace_thread(thread_t thread, char *tracepos,
2979 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
2980 		saved_count = machine_trace_thread(
2981 #endif
2982 			thread, (char *)out_addr, (char *)kcd_max_address(kcd), MAX_FRAMES,
2983 			&kern_ths_ss_flags);
2984 		if (saved_count > 0) {
2985 			int frame_size = sizeof(uintptr_t);
2986 #if defined(__LP64__)
2987 			cur_thread_snap->ths_ss_flags |= kKernel64_p;
2988 #endif
2989 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, stack_kcdata_type,
2990 			    frame_size, saved_count / frame_size, &out_addr));
2991 #if CONFIG_EXCLAVES
2992 			if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
2993 				struct thread_exclaves_info info = { 0 };
2994 
2995 				info.tei_flags = kExclaveRPCActive;
2996 				if (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) {
2997 					info.tei_flags |= kExclaveSchedulerRequest;
2998 				}
2999 				if (thread->th_exclaves_state & TH_EXCLAVES_UPCALL) {
3000 					info.tei_flags |= kExclaveUpcallActive;
3001 				}
3002 				info.tei_scid = thread->th_exclaves_scheduling_context_id;
3003 				info.tei_thread_offset = exclaves_stack_offset((uintptr_t *)out_addr, saved_count / frame_size, false);
3004 
3005 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERN_EXCLAVES_THREADINFO, sizeof(struct thread_exclaves_info), &info));
3006 			}
3007 #endif /* CONFIG_EXCLAVES */
3008 		}
3009 		if (kern_ths_ss_flags & kThreadTruncatedBT) {
3010 			kern_ths_ss_flags |= kThreadTruncKernBT;
3011 		}
3012 		if (kern_ths_ss_flags != 0) {
3013 			cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
3014 		}
3015 	}
3016 
3017 #if STACKSHOT_COLLECTS_LATENCY_INFO
3018 	latency_info.kernel_stack_latency = mach_absolute_time()  - latency_info.kernel_stack_latency;
3019 	latency_info.misc_latency = mach_absolute_time();
3020 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3021 
3022 #if CONFIG_THREAD_GROUPS
3023 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
3024 		uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
3025 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
3026 		kdp_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
3027 	}
3028 #endif /* CONFIG_THREAD_GROUPS */
3029 
3030 	if (collect_iostats) {
3031 		kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
3032 	}
3033 
3034 #if CONFIG_PERVASIVE_CPI
3035 	if (collect_instrs_cycles) {
3036 		struct recount_usage usage = { 0 };
3037 		recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
3038 		    &usage);
3039 
3040 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
3041 		struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
3042 		    instrs_cycles->ics_instructions = recount_usage_instructions(&usage);
3043 		    instrs_cycles->ics_cycles = recount_usage_cycles(&usage);
3044 	}
3045 #endif /* CONFIG_PERVASIVE_CPI */
3046 
3047 #if STACKSHOT_COLLECTS_LATENCY_INFO
3048 	latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
3049 	if (collect_latency_info) {
3050 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
3051 	}
3052 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3053 
3054 error_exit:
3055 	return error;
3056 }
3057 
3058 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)3059 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
3060 {
3061 	cur_thread_snap->tds_thread_id = thread_tid(thread);
3062 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
3063 		cur_thread_snap->tds_voucher_identifier  = VM_KERNEL_ADDRPERM(thread->ith_voucher);
3064 	} else {
3065 		cur_thread_snap->tds_voucher_identifier = 0;
3066 	}
3067 
3068 	cur_thread_snap->tds_ss_flags = 0;
3069 	if (thread->effective_policy.thep_darwinbg) {
3070 		cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
3071 	}
3072 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
3073 		cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
3074 	}
3075 	if (thread->suspend_count > 0) {
3076 		cur_thread_snap->tds_ss_flags |= kThreadSuspended;
3077 	}
3078 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
3079 		cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
3080 	}
3081 	if (thread_on_core) {
3082 		cur_thread_snap->tds_ss_flags |= kThreadOnCore;
3083 	}
3084 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
3085 		cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
3086 	}
3087 
3088 	cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
3089 	cur_thread_snap->tds_state                   = thread->state;
3090 	cur_thread_snap->tds_sched_flags             = thread->sched_flags;
3091 	cur_thread_snap->tds_base_priority           = thread->base_pri;
3092 	cur_thread_snap->tds_sched_priority          = thread->sched_pri;
3093 	cur_thread_snap->tds_eqos                    = thread->effective_policy.thep_qos;
3094 	cur_thread_snap->tds_rqos                    = thread->requested_policy.thrp_qos;
3095 	cur_thread_snap->tds_rqos_override           = MAX(thread->requested_policy.thrp_qos_override,
3096 	    thread->requested_policy.thrp_qos_workq_override);
3097 	cur_thread_snap->tds_io_tier                 = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
3098 
3099 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
3100 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
3101 	cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
3102 	cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
3103 
3104 	return 0;
3105 }
3106 
3107 /*
3108  * Why 12?  12 strikes a decent balance between allocating a large array on
3109  * the stack and having large kcdata item overheads for recording nonrunable
3110  * tasks.
3111  */
3112 #define UNIQUEIDSPERFLUSH 12
3113 
3114 struct saved_uniqueids {
3115 	uint64_t ids[UNIQUEIDSPERFLUSH];
3116 	unsigned count;
3117 };
3118 
3119 enum thread_classification {
3120 	tc_full_snapshot,  /* take a full snapshot */
3121 	tc_delta_snapshot, /* take a delta snapshot */
3122 };
3123 
3124 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)3125 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
3126 {
3127 	processor_t last_processor = thread->last_processor;
3128 
3129 	boolean_t thread_on_core = FALSE;
3130 	if (last_processor != PROCESSOR_NULL) {
3131 		/* Idle threads are always treated as on-core, since the processor state can change while they are running. */
3132 		thread_on_core = (thread == last_processor->idle_thread) ||
3133 		    ((last_processor->state == PROCESSOR_SHUTDOWN || last_processor->state == PROCESSOR_RUNNING) &&
3134 		    last_processor->active_thread == thread);
3135 	}
3136 
3137 	*thread_on_core_p = thread_on_core;
3138 
3139 	/* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
3140 	 * previous full stackshot */
3141 	if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stack_snapshot_delta_since_timestamp)) {
3142 		return tc_full_snapshot;
3143 	} else {
3144 		return tc_delta_snapshot;
3145 	}
3146 }
3147 
3148 struct stackshot_context {
3149 	int pid;
3150 	uint64_t trace_flags;
3151 	bool include_drivers;
3152 };
3153 
3154 static kern_return_t
kdp_stackshot_record_task(struct stackshot_context * ctx,task_t task)3155 kdp_stackshot_record_task(struct stackshot_context *ctx, task_t task)
3156 {
3157 	boolean_t active_kthreads_only_p  = ((ctx->trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
3158 	boolean_t save_donating_pids_p    = ((ctx->trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
3159 	boolean_t collect_delta_stackshot = ((ctx->trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3160 	boolean_t save_owner_info         = ((ctx->trace_flags & STACKSHOT_THREAD_WAITINFO) != 0);
3161 
3162 	kern_return_t error = KERN_SUCCESS;
3163 	mach_vm_address_t out_addr = 0;
3164 	int saved_count = 0;
3165 
3166 	int task_pid                   = 0;
3167 	uint64_t task_uniqueid         = 0;
3168 	int num_delta_thread_snapshots = 0;
3169 	int num_waitinfo_threads       = 0;
3170 	int num_turnstileinfo_threads  = 0;
3171 
3172 	uint64_t task_start_abstime    = 0;
3173 	boolean_t have_map = FALSE, have_pmap = FALSE;
3174 	boolean_t some_thread_ran = FALSE;
3175 	unaligned_u64 task_snap_ss_flags = 0;
3176 #if STACKSHOT_COLLECTS_LATENCY_INFO
3177 	struct stackshot_latency_task latency_info;
3178 	latency_info.setup_latency = mach_absolute_time();
3179 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3180 
3181 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3182 	uint64_t task_begin_cpu_cycle_count = 0;
3183 	if (!panic_stackshot) {
3184 		task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3185 	}
3186 #endif
3187 
3188 	if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
3189 		error = KERN_FAILURE;
3190 		goto error_exit;
3191 	}
3192 
3193 	void *bsd_info = get_bsdtask_info(task);
3194 	boolean_t task_in_teardown        = (bsd_info == NULL) || proc_in_teardown(bsd_info);// has P_LPEXIT set during proc_exit()
3195 	boolean_t task_in_transition      = task_in_teardown;         // here we can add other types of transition.
3196 	uint32_t  container_type          = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
3197 	uint32_t  transition_type         = (task_in_teardown) ? kTaskIsTerminated : 0;
3198 
3199 	if (task_in_transition) {
3200 		collect_delta_stackshot = FALSE;
3201 	}
3202 
3203 	have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
3204 	have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
3205 
3206 	task_pid = pid_from_task(task);
3207 	/* Is returning -1 ok for terminating task ok ??? */
3208 	task_uniqueid = get_task_uniqueid(task);
3209 
3210 	if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
3211 		/*
3212 		 * Not interested in terminated tasks without threads.
3213 		 */
3214 		if (queue_empty(&task->threads) || task_pid == -1) {
3215 			return KERN_SUCCESS;
3216 		}
3217 	}
3218 
3219 	/* All PIDs should have the MSB unset */
3220 	assert((task_pid & (1ULL << 31)) == 0);
3221 
3222 #if STACKSHOT_COLLECTS_LATENCY_INFO
3223 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
3224 	latency_info.task_uniqueid = task_uniqueid;
3225 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3226 
3227 	/* Trace everything, unless a process was specified. Add in driver tasks if requested. */
3228 	if ((ctx->pid == -1) || (ctx->pid == task_pid) || (ctx->include_drivers && task_is_driver(task))) {
3229 		/* add task snapshot marker */
3230 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3231 		    container_type, task_uniqueid));
3232 
3233 		if (collect_delta_stackshot) {
3234 			/*
3235 			 * For delta stackshots we need to know if a thread from this task has run since the
3236 			 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
3237 			 */
3238 			thread_t thread = THREAD_NULL;
3239 			queue_iterate(&task->threads, thread, thread_t, task_threads)
3240 			{
3241 				if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
3242 					error = KERN_FAILURE;
3243 					goto error_exit;
3244 				}
3245 
3246 				if (active_kthreads_only_p && thread->kernel_stack == 0) {
3247 					continue;
3248 				}
3249 
3250 				boolean_t thread_on_core;
3251 				enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
3252 
3253 				switch (thread_classification) {
3254 				case tc_full_snapshot:
3255 					some_thread_ran = TRUE;
3256 					break;
3257 				case tc_delta_snapshot:
3258 					num_delta_thread_snapshots++;
3259 					break;
3260 				}
3261 			}
3262 		}
3263 
3264 		if (collect_delta_stackshot) {
3265 			proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &task_start_abstime);
3266 		}
3267 
3268 		/* Next record any relevant UUID info and store the task snapshot */
3269 		if (task_in_transition ||
3270 		    !collect_delta_stackshot ||
3271 		    (task_start_abstime == 0) ||
3272 		    (task_start_abstime > stack_snapshot_delta_since_timestamp) ||
3273 		    some_thread_ran) {
3274 			/*
3275 			 * Collect full task information in these scenarios:
3276 			 *
3277 			 * 1) a full stackshot or the task is in transition
3278 			 * 2) a delta stackshot where the task started after the previous full stackshot
3279 			 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
3280 			 *
3281 			 * because the task may have exec'ed, changing its name, architecture, load info, etc
3282 			 */
3283 
3284 			kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
3285 			kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, &task_snap_ss_flags));
3286 #if STACKSHOT_COLLECTS_LATENCY_INFO
3287 			if (!task_in_transition) {
3288 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags, &latency_info));
3289 			} else {
3290 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
3291 			}
3292 #else
3293 			if (!task_in_transition) {
3294 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
3295 			} else {
3296 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
3297 			}
3298 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3299 		} else {
3300 			kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
3301 		}
3302 
3303 #if STACKSHOT_COLLECTS_LATENCY_INFO
3304 		latency_info.misc_latency = mach_absolute_time();
3305 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3306 
3307 		struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
3308 		int current_delta_snapshot_index                  = 0;
3309 		if (num_delta_thread_snapshots > 0) {
3310 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
3311 			    sizeof(struct thread_delta_snapshot_v3),
3312 			    num_delta_thread_snapshots, &out_addr));
3313 			delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
3314 		}
3315 
3316 #if STACKSHOT_COLLECTS_LATENCY_INFO
3317 		latency_info.task_thread_count_loop_latency = mach_absolute_time();
3318 #endif
3319 		/*
3320 		 * Iterate over the task threads to save thread snapshots and determine
3321 		 * how much space we need for waitinfo and turnstile info
3322 		 */
3323 		thread_t thread = THREAD_NULL;
3324 		queue_iterate(&task->threads, thread, thread_t, task_threads)
3325 		{
3326 			if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
3327 				error = KERN_FAILURE;
3328 				goto error_exit;
3329 			}
3330 
3331 			uint64_t thread_uniqueid;
3332 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
3333 				continue;
3334 			}
3335 			thread_uniqueid = thread_tid(thread);
3336 
3337 			boolean_t thread_on_core;
3338 			enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
3339 
3340 			switch (thread_classification) {
3341 			case tc_full_snapshot:
3342 				/* add thread marker */
3343 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3344 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
3345 
3346 				/* thread snapshot can be large, including strings, avoid overflowing the stack. */
3347 				kcdata_compression_window_open(stackshot_kcdata_p);
3348 
3349 				kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, ctx->trace_flags, have_pmap, thread_on_core));
3350 
3351 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3352 
3353 				/* mark end of thread snapshot data */
3354 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3355 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
3356 				break;
3357 			case tc_delta_snapshot:
3358 				kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
3359 				break;
3360 			}
3361 
3362 			/*
3363 			 * We want to report owner information regardless of whether a thread
3364 			 * has changed since the last delta, whether it's a normal stackshot,
3365 			 * or whether it's nonrunnable
3366 			 */
3367 			if (save_owner_info) {
3368 				if (stackshot_thread_has_valid_waitinfo(thread)) {
3369 					num_waitinfo_threads++;
3370 				}
3371 
3372 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
3373 					num_turnstileinfo_threads++;
3374 				}
3375 			}
3376 		}
3377 #if STACKSHOT_COLLECTS_LATENCY_INFO
3378 		latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
3379 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3380 
3381 
3382 		thread_waitinfo_v2_t *thread_waitinfo           = NULL;
3383 		thread_turnstileinfo_v2_t *thread_turnstileinfo = NULL;
3384 		int current_waitinfo_index              = 0;
3385 		int current_turnstileinfo_index         = 0;
3386 		/* allocate space for the wait and turnstil info */
3387 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
3388 			/* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
3389 			kcdata_compression_window_open(stackshot_kcdata_p);
3390 
3391 			if (num_waitinfo_threads > 0) {
3392 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
3393 				    sizeof(thread_waitinfo_v2_t), num_waitinfo_threads, &out_addr));
3394 				thread_waitinfo = (thread_waitinfo_v2_t *)out_addr;
3395 			}
3396 
3397 			if (num_turnstileinfo_threads > 0) {
3398 				/* get space for the turnstile info */
3399 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
3400 				    sizeof(thread_turnstileinfo_v2_t), num_turnstileinfo_threads, &out_addr));
3401 				thread_turnstileinfo = (thread_turnstileinfo_v2_t *)out_addr;
3402 			}
3403 
3404 			stackshot_plh_resetgen();  // so we know which portlabel_ids are referenced
3405 		}
3406 
3407 #if STACKSHOT_COLLECTS_LATENCY_INFO
3408 		latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
3409 		latency_info.task_thread_data_loop_latency = mach_absolute_time();
3410 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3411 
3412 		/* Iterate over the task's threads to save the wait and turnstile info */
3413 		queue_iterate(&task->threads, thread, thread_t, task_threads)
3414 		{
3415 			uint64_t thread_uniqueid;
3416 
3417 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
3418 				continue;
3419 			}
3420 
3421 			thread_uniqueid = thread_tid(thread);
3422 
3423 			/* If we want owner info, we should capture it regardless of its classification */
3424 			if (save_owner_info) {
3425 				if (stackshot_thread_has_valid_waitinfo(thread)) {
3426 					stackshot_thread_wait_owner_info(
3427 						thread,
3428 						&thread_waitinfo[current_waitinfo_index++]);
3429 				}
3430 
3431 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
3432 					stackshot_thread_turnstileinfo(
3433 						thread,
3434 						&thread_turnstileinfo[current_turnstileinfo_index++]);
3435 				}
3436 			}
3437 		}
3438 
3439 #if STACKSHOT_COLLECTS_LATENCY_INFO
3440 		latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
3441 		latency_info.misc2_latency = mach_absolute_time();
3442 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3443 
3444 #if DEBUG || DEVELOPMENT
3445 		if (current_delta_snapshot_index != num_delta_thread_snapshots) {
3446 			panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
3447 			    num_delta_thread_snapshots, current_delta_snapshot_index);
3448 		}
3449 		if (current_waitinfo_index != num_waitinfo_threads) {
3450 			panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
3451 			    num_waitinfo_threads, current_waitinfo_index);
3452 		}
3453 #endif
3454 
3455 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
3456 			kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3457 			// now, record the portlabel hashes.
3458 			kcd_exit_on_error(kdp_stackshot_plh_record());
3459 		}
3460 
3461 #if IMPORTANCE_INHERITANCE
3462 		if (save_donating_pids_p) {
3463 			kcd_exit_on_error(
3464 				((((mach_vm_address_t)kcd_end_address(stackshot_kcdata_p) + (TASK_IMP_WALK_LIMIT * sizeof(int32_t))) <
3465 				(mach_vm_address_t)kcd_max_address(stackshot_kcdata_p))
3466 				? KERN_SUCCESS
3467 				: KERN_RESOURCE_SHORTAGE));
3468 			saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
3469 			    (void *)kcd_end_address(stackshot_kcdata_p), TASK_IMP_WALK_LIMIT);
3470 			if (saved_count > 0) {
3471 				/* Variable size array - better not have it on the stack. */
3472 				kcdata_compression_window_open(stackshot_kcdata_p);
3473 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
3474 				    sizeof(int32_t), saved_count, &out_addr));
3475 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3476 			}
3477 		}
3478 #endif
3479 
3480 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3481 		if (!panic_stackshot) {
3482 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
3483 			    "task_cpu_cycle_count"));
3484 		}
3485 #endif
3486 
3487 #if STACKSHOT_COLLECTS_LATENCY_INFO
3488 		latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
3489 		if (collect_latency_info) {
3490 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
3491 		}
3492 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3493 
3494 		/* mark end of task snapshot data */
3495 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
3496 		    task_uniqueid));
3497 	}
3498 
3499 
3500 error_exit:
3501 	return error;
3502 }
3503 
3504 /* Record global shared regions */
3505 static kern_return_t
kdp_stackshot_shared_regions(uint64_t trace_flags)3506 kdp_stackshot_shared_regions(uint64_t trace_flags)
3507 {
3508 	kern_return_t error        = KERN_SUCCESS;
3509 
3510 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3511 	extern queue_head_t vm_shared_region_queue;
3512 	vm_shared_region_t sr;
3513 
3514 	extern queue_head_t vm_shared_region_queue;
3515 	queue_iterate(&vm_shared_region_queue,
3516 	    sr,
3517 	    vm_shared_region_t,
3518 	    sr_q) {
3519 		struct dyld_shared_cache_loadinfo_v2 scinfo = {0};
3520 		if (!_stackshot_validate_kva((vm_offset_t)sr, sizeof(*sr))) {
3521 			break;
3522 		}
3523 		if (collect_delta_stackshot && sr->sr_install_time < stack_snapshot_delta_since_timestamp) {
3524 			continue; // only include new shared caches in delta stackshots
3525 		}
3526 		uint32_t sharedCacheFlags = ((sr == primary_system_shared_region) ? kSharedCacheSystemPrimary : 0) |
3527 		    (sr->sr_driverkit ? kSharedCacheDriverkit : 0);
3528 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3529 		    STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
3530 		kdp_memcpy(scinfo.sharedCacheUUID, sr->sr_uuid, sizeof(sr->sr_uuid));
3531 		scinfo.sharedCacheSlide = sr->sr_slide;
3532 		scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_base_address + sr->sr_first_mapping;
3533 		scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
3534 		scinfo.sharedCacheID = sr->sr_id;
3535 		scinfo.sharedCacheFlags = sharedCacheFlags;
3536 
3537 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_INFO,
3538 		    sizeof(scinfo), &scinfo));
3539 
3540 		if ((trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) && sr->sr_images != NULL &&
3541 		    _stackshot_validate_kva((vm_offset_t)sr->sr_images, sr->sr_images_count * sizeof(struct dyld_uuid_info_64))) {
3542 			assert(sr->sr_images_count != 0);
3543 			kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
3544 		}
3545 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3546 		    STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
3547 	}
3548 
3549 	/*
3550 	 * For backwards compatibility; this will eventually be removed.
3551 	 * Another copy of the Primary System Shared Region, for older readers.
3552 	 */
3553 	sr = primary_system_shared_region;
3554 	/* record system level shared cache load info (if available) */
3555 	if (!collect_delta_stackshot && sr &&
3556 	    _stackshot_validate_kva((vm_offset_t)sr, sizeof(struct vm_shared_region))) {
3557 		struct dyld_shared_cache_loadinfo scinfo = {0};
3558 
3559 		/*
3560 		 * Historically, this data was in a dyld_uuid_info_64 structure, but the
3561 		 * naming of both the structure and fields for this use isn't great.  The
3562 		 * dyld_shared_cache_loadinfo structure has better names, but the same
3563 		 * layout and content as the original.
3564 		 *
3565 		 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
3566 		 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
3567 		 * entries; here, it's the slid base address, and we leave it that way
3568 		 * for backwards compatibility.
3569 		 */
3570 		kdp_memcpy(scinfo.sharedCacheUUID, &sr->sr_uuid, sizeof(sr->sr_uuid));
3571 		scinfo.sharedCacheSlide = sr->sr_slide;
3572 		scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_slide + sr->sr_base_address;
3573 		scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
3574 
3575 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
3576 		    sizeof(scinfo), &scinfo));
3577 
3578 		if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
3579 			/*
3580 			 * Include a map of the system shared cache layout if it has been populated
3581 			 * (which is only when the system is using a custom shared cache).
3582 			 */
3583 			if (sr->sr_images && _stackshot_validate_kva((vm_offset_t)sr->sr_images,
3584 			    (sr->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
3585 				assert(sr->sr_images_count != 0);
3586 				kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
3587 			}
3588 		}
3589 	}
3590 
3591 error_exit:
3592 	return error;
3593 }
3594 
3595 static kern_return_t
kdp_stackshot_kcdata_format(int pid,uint64_t * trace_flags_p)3596 kdp_stackshot_kcdata_format(int pid, uint64_t * trace_flags_p)
3597 {
3598 	kern_return_t error        = KERN_SUCCESS;
3599 	mach_vm_address_t out_addr = 0;
3600 	uint64_t abs_time = 0, abs_time_end = 0;
3601 	uint64_t system_state_flags = 0;
3602 	task_t task = TASK_NULL;
3603 	mach_timebase_info_data_t timebase = {0, 0};
3604 	uint32_t length_to_copy = 0, tmp32 = 0;
3605 	abs_time = mach_absolute_time();
3606 	uint64_t last_task_start_time = 0;
3607 	uint64_t trace_flags = 0;
3608 
3609 	if (!trace_flags_p) {
3610 		panic("Invalid kdp_stackshot_kcdata_format trace_flags_p value");
3611 	}
3612 	trace_flags = *trace_flags_p;
3613 
3614 #if STACKSHOT_COLLECTS_LATENCY_INFO
3615 	struct stackshot_latency_collection latency_info;
3616 #endif
3617 
3618 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3619 	uint64_t stackshot_begin_cpu_cycle_count = 0;
3620 
3621 	if (!panic_stackshot) {
3622 		stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3623 	}
3624 #endif
3625 
3626 #if STACKSHOT_COLLECTS_LATENCY_INFO
3627 	collect_latency_info = trace_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
3628 #endif
3629 	/* process the flags */
3630 	bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3631 	bool use_fault_path          = ((trace_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
3632 	bool collect_exclaves        = !disable_exclave_stackshot && ((trace_flags & STACKSHOT_SKIP_EXCLAVES) == 0);
3633 	stack_enable_faulting        = (trace_flags & (STACKSHOT_ENABLE_BT_FAULTING));
3634 
3635 	/* Currently we only support returning explicit KEXT load info on fileset kernels */
3636 	kc_format_t primary_kc_type = KCFormatUnknown;
3637 	if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
3638 		trace_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
3639 	}
3640 
3641 	struct stackshot_context ctx = {};
3642 	ctx.trace_flags = trace_flags;
3643 	ctx.pid = pid;
3644 	ctx.include_drivers = (pid == 0 && (trace_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) != 0);
3645 
3646 	if (use_fault_path) {
3647 		fault_stats.sfs_pages_faulted_in = 0;
3648 		fault_stats.sfs_time_spent_faulting = 0;
3649 		fault_stats.sfs_stopped_faulting = (uint8_t) FALSE;
3650 	}
3651 
3652 	if (sizeof(void *) == 8) {
3653 		system_state_flags |= kKernel64_p;
3654 	}
3655 
3656 	if (stackshot_kcdata_p == NULL) {
3657 		error = KERN_INVALID_ARGUMENT;
3658 		goto error_exit;
3659 	}
3660 
3661 	_stackshot_validation_reset();
3662 #if CONFIG_EXCLAVES
3663 	if (!panic_stackshot && collect_exclaves) {
3664 		kcd_exit_on_error(stackshot_setup_exclave_waitlist(stackshot_kcdata_p)); /* Allocate list of exclave threads */
3665 	}
3666 #else /* CONFIG_EXCLAVES */
3667 #pragma unused(collect_exclaves)
3668 #endif /* CONFIG_EXCLAVES */
3669 	stackshot_plh_setup(stackshot_kcdata_p); /* set up port label hash */
3670 
3671 
3672 	/* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
3673 	clock_timebase_info(&timebase);
3674 
3675 	/* begin saving data into the buffer */
3676 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, trace_flags, "stackshot_in_flags"));
3677 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)pid, "stackshot_in_pid"));
3678 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
3679 	if (trace_flags & STACKSHOT_PAGE_TABLES) {
3680 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stack_snapshot_pagetable_mask, "stackshot_pagetable_mask"));
3681 	}
3682 	if (stackshot_initial_estimate != 0) {
3683 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
3684 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
3685 	}
3686 
3687 #if STACKSHOT_COLLECTS_LATENCY_INFO
3688 	latency_info.setup_latency = mach_absolute_time();
3689 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3690 
3691 #if CONFIG_JETSAM
3692 	tmp32 = memorystatus_get_pressure_status_kdp();
3693 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
3694 #endif
3695 
3696 	if (!collect_delta_stackshot) {
3697 		tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
3698 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
3699 
3700 		tmp32 = PAGE_SIZE;
3701 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
3702 
3703 		/* save boot-args and osversion string */
3704 		length_to_copy =  MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
3705 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
3706 
3707 
3708 		length_to_copy =  MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
3709 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
3710 
3711 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
3712 	} else {
3713 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stack_snapshot_delta_since_timestamp));
3714 	}
3715 
3716 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
3717 
3718 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_microsecs));
3719 
3720 	kcd_exit_on_error(kdp_stackshot_shared_regions(trace_flags));
3721 
3722 	/* Add requested information first */
3723 	if (trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
3724 		struct mem_and_io_snapshot mais = {0};
3725 		kdp_mem_and_io_snapshot(&mais);
3726 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
3727 	}
3728 
3729 #if CONFIG_THREAD_GROUPS
3730 	struct thread_group_snapshot_v3 *thread_groups = NULL;
3731 	int num_thread_groups = 0;
3732 
3733 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3734 	uint64_t thread_group_begin_cpu_cycle_count = 0;
3735 
3736 	if (!panic_stackshot && (trace_flags & STACKSHOT_THREAD_GROUP)) {
3737 		thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3738 	}
3739 #endif
3740 
3741 	/* Iterate over thread group names */
3742 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
3743 		/* Variable size array - better not have it on the stack. */
3744 		kcdata_compression_window_open(stackshot_kcdata_p);
3745 
3746 		if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
3747 			trace_flags &= ~(STACKSHOT_THREAD_GROUP);
3748 		}
3749 
3750 		if (num_thread_groups > 0) {
3751 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v3), num_thread_groups, &out_addr));
3752 			thread_groups = (struct thread_group_snapshot_v3 *)out_addr;
3753 		}
3754 
3755 		if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
3756 			error = KERN_FAILURE;
3757 			goto error_exit;
3758 		}
3759 
3760 		kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3761 	}
3762 
3763 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3764 	if (!panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
3765 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
3766 		    "thread_groups_cpu_cycle_count"));
3767 	}
3768 #endif
3769 #else
3770 	trace_flags &= ~(STACKSHOT_THREAD_GROUP);
3771 #endif /* CONFIG_THREAD_GROUPS */
3772 
3773 
3774 #if STACKSHOT_COLLECTS_LATENCY_INFO
3775 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
3776 	latency_info.total_task_iteration_latency = mach_absolute_time();
3777 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3778 
3779 	bool const process_scoped = (ctx.pid != -1) && !ctx.include_drivers;
3780 
3781 	/* Iterate over tasks */
3782 	queue_iterate(&tasks, task, task_t, tasks)
3783 	{
3784 		if (collect_delta_stackshot) {
3785 			uint64_t abstime;
3786 			proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &abstime);
3787 
3788 			if (abstime > last_task_start_time) {
3789 				last_task_start_time = abstime;
3790 			}
3791 		}
3792 
3793 		if (process_scoped && (pid_from_task(task) != ctx.pid)) {
3794 			continue;
3795 		}
3796 
3797 		error = kdp_stackshot_record_task(&ctx, task);
3798 		if (error) {
3799 			goto error_exit;
3800 		} else if (process_scoped) {
3801 			/* Only targeting one process, we're done now. */
3802 			break;
3803 		}
3804 	}
3805 
3806 
3807 #if STACKSHOT_COLLECTS_LATENCY_INFO
3808 	latency_info.total_task_iteration_latency = mach_absolute_time() - latency_info.total_task_iteration_latency;
3809 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3810 
3811 #if CONFIG_COALITIONS
3812 	/* Don't collect jetsam coalition snapshots in delta stackshots - these don't change */
3813 	if (!collect_delta_stackshot || (last_task_start_time > stack_snapshot_delta_since_timestamp)) {
3814 		int num_coalitions = 0;
3815 		struct jetsam_coalition_snapshot *coalitions = NULL;
3816 
3817 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3818 		uint64_t coalition_begin_cpu_cycle_count = 0;
3819 
3820 		if (!panic_stackshot && (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
3821 			coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3822 		}
3823 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
3824 
3825 		/* Iterate over coalitions */
3826 		if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
3827 			if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
3828 				trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
3829 			}
3830 		}
3831 		if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
3832 			if (num_coalitions > 0) {
3833 				/* Variable size array - better not have it on the stack. */
3834 				kcdata_compression_window_open(stackshot_kcdata_p);
3835 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
3836 				coalitions = (struct jetsam_coalition_snapshot*)out_addr;
3837 
3838 				if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
3839 					error = KERN_FAILURE;
3840 					goto error_exit;
3841 				}
3842 
3843 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3844 			}
3845 		}
3846 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3847 		if (!panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
3848 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
3849 			    "coalitions_cpu_cycle_count"));
3850 		}
3851 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
3852 	}
3853 #else
3854 	trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
3855 #endif /* CONFIG_COALITIONS */
3856 
3857 #if STACKSHOT_COLLECTS_LATENCY_INFO
3858 	latency_info.total_terminated_task_iteration_latency = mach_absolute_time();
3859 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3860 
3861 	/*
3862 	 * Iterate over the tasks in the terminated tasks list. We only inspect
3863 	 * tasks that have a valid bsd_info pointer. The check for task transition
3864 	 * like past P_LPEXIT during proc_exit() is now checked for inside the
3865 	 * kdp_stackshot_record_task(), and then a safer and minimal
3866 	 * transitioning_task_snapshot struct is collected via
3867 	 * kcdata_record_transitioning_task_snapshot()
3868 	 */
3869 	queue_iterate(&terminated_tasks, task, task_t, tasks)
3870 	{
3871 		error = kdp_stackshot_record_task(&ctx, task);
3872 		if (error) {
3873 			goto error_exit;
3874 		}
3875 	}
3876 #if DEVELOPMENT || DEBUG
3877 	kcd_exit_on_error(kdp_stackshot_plh_stats());
3878 #endif /* DEVELOPMENT || DEBUG */
3879 
3880 #if STACKSHOT_COLLECTS_LATENCY_INFO
3881 	latency_info.total_terminated_task_iteration_latency = mach_absolute_time() - latency_info.total_terminated_task_iteration_latency;
3882 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3883 
3884 	if (use_fault_path) {
3885 		kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
3886 		    sizeof(struct stackshot_fault_stats), &fault_stats);
3887 	}
3888 
3889 #if STACKSHOT_COLLECTS_LATENCY_INFO
3890 	if (collect_latency_info) {
3891 		latency_info.latency_version = 1;
3892 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO, sizeof(latency_info), &latency_info));
3893 	}
3894 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3895 
3896 	/* update timestamp of the stackshot */
3897 	abs_time_end = mach_absolute_time();
3898 	struct stackshot_duration_v2 stackshot_duration = {
3899 		.stackshot_duration         = (abs_time_end - abs_time),
3900 		.stackshot_duration_outer   = 0,
3901 		.stackshot_duration_prior   = stackshot_duration_prior_abs,
3902 	};
3903 
3904 	if ((trace_flags & STACKSHOT_DO_COMPRESS) == 0) {
3905 		kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
3906 		    sizeof(struct stackshot_duration_v2), &out_addr));
3907 		struct stackshot_duration_v2 *duration_p = (void *) out_addr;
3908 		kdp_memcpy(duration_p, &stackshot_duration, sizeof(*duration_p));
3909 		stackshot_duration_outer                   = (unaligned_u64 *)&duration_p->stackshot_duration_outer;
3910 	} else {
3911 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_duration), &stackshot_duration));
3912 		stackshot_duration_outer = NULL;
3913 	}
3914 
3915 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3916 	if (!panic_stackshot) {
3917 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
3918 		    "stackshot_total_cpu_cycle_cnt"));
3919 	}
3920 #endif
3921 
3922 #if CONFIG_EXCLAVES
3923 	/* Avoid setting AST until as late as possible, in case the stackshot fails */
3924 	commit_exclaves_ast();
3925 
3926 	/* If this is the panic stackshot, check if Exclaves panic left its stackshot in the shared region */
3927 	if (panic_stackshot) {
3928 		struct exclaves_panic_stackshot excl_ss;
3929 		kdp_read_panic_exclaves_stackshot(&excl_ss);
3930 
3931 		if (excl_ss.stackshot_buffer != NULL && excl_ss.stackshot_buffer_size != 0) {
3932 			tb_error_t tberr = TB_ERROR_SUCCESS;
3933 			exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_FOUND;
3934 
3935 			/* this block does not escape, so this is okay... */
3936 			kern_return_t *error_in_block = &error;
3937 			kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3938 			    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
3939 			tberr = stackshot_stackshotresult__unmarshal(excl_ss.stackshot_buffer, excl_ss.stackshot_buffer_size, ^(stackshot_stackshotresult_s result){
3940 				*error_in_block = stackshot_exclaves_process_stackshot(&result, stackshot_kcdata_p, true);
3941 			});
3942 			kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3943 			    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
3944 			if (tberr != TB_ERROR_SUCCESS) {
3945 				exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_DECODE_FAILED;
3946 			}
3947 		} else {
3948 			exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_NOT_FOUND;
3949 		}
3950 
3951 		/* check error from the block */
3952 		kcd_exit_on_error(error);
3953 	}
3954 #endif
3955 
3956 	*trace_flags_p = trace_flags;
3957 
3958 error_exit:;
3959 
3960 #if CONFIG_EXCLAVES
3961 	if (error != KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
3962 		/* Clear inspection CTID list: no need to wait for these threads */
3963 		stackshot_exclave_inspect_ctid_count = 0;
3964 		stackshot_exclave_inspect_ctid_capacity = 0;
3965 		stackshot_exclave_inspect_ctids = NULL;
3966 	}
3967 #endif
3968 
3969 #if SCHED_HYGIENE_DEBUG
3970 	bool disable_interrupts_masked_check = kern_feature_override(
3971 		KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
3972 	    (trace_flags & STACKSHOT_DO_COMPRESS) != 0;
3973 
3974 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
3975 	disable_interrupts_masked_check = true;
3976 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
3977 
3978 	if (disable_interrupts_masked_check) {
3979 		ml_spin_debug_clear_self();
3980 	}
3981 
3982 	if (!panic_stackshot && interrupt_masked_debug_mode) {
3983 		/*
3984 		 * Try to catch instances where stackshot takes too long BEFORE returning from
3985 		 * the debugger
3986 		 */
3987 		ml_handle_stackshot_interrupt_disabled_duration(current_thread());
3988 	}
3989 #endif /* SCHED_HYGIENE_DEBUG */
3990 	stackshot_plh_reset();
3991 	stack_enable_faulting = FALSE;
3992 
3993 	return error;
3994 }
3995 
3996 static uint64_t
proc_was_throttled_from_task(task_t task)3997 proc_was_throttled_from_task(task_t task)
3998 {
3999 	uint64_t was_throttled = 0;
4000 	void *bsd_info = get_bsdtask_info(task);
4001 
4002 	if (bsd_info) {
4003 		was_throttled = proc_was_throttled(bsd_info);
4004 	}
4005 
4006 	return was_throttled;
4007 }
4008 
4009 static uint64_t
proc_did_throttle_from_task(task_t task)4010 proc_did_throttle_from_task(task_t task)
4011 {
4012 	uint64_t did_throttle = 0;
4013 	void *bsd_info = get_bsdtask_info(task);
4014 
4015 	if (bsd_info) {
4016 		did_throttle = proc_did_throttle(bsd_info);
4017 	}
4018 
4019 	return did_throttle;
4020 }
4021 
4022 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)4023 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
4024 {
4025 	unsigned int pages_reclaimed;
4026 	unsigned int pages_wanted;
4027 	kern_return_t kErr;
4028 
4029 	uint64_t compressions = 0;
4030 	uint64_t decompressions = 0;
4031 
4032 	compressions = counter_load(&vm_statistics_compressions);
4033 	decompressions = counter_load(&vm_statistics_decompressions);
4034 
4035 	memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
4036 	memio_snap->free_pages = vm_page_free_count;
4037 	memio_snap->active_pages = vm_page_active_count;
4038 	memio_snap->inactive_pages = vm_page_inactive_count;
4039 	memio_snap->purgeable_pages = vm_page_purgeable_count;
4040 	memio_snap->wired_pages = vm_page_wire_count;
4041 	memio_snap->speculative_pages = vm_page_speculative_count;
4042 	memio_snap->throttled_pages = vm_page_throttled_count;
4043 	memio_snap->busy_buffer_count = count_busy_buffers();
4044 	memio_snap->filebacked_pages = vm_page_pageable_external_count;
4045 	memio_snap->compressions = (uint32_t)compressions;
4046 	memio_snap->decompressions = (uint32_t)decompressions;
4047 	memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
4048 	kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
4049 
4050 	if (!kErr) {
4051 		memio_snap->pages_wanted = (uint32_t)pages_wanted;
4052 		memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
4053 		memio_snap->pages_wanted_reclaimed_valid = 1;
4054 	} else {
4055 		memio_snap->pages_wanted = 0;
4056 		memio_snap->pages_reclaimed = 0;
4057 		memio_snap->pages_wanted_reclaimed_valid = 0;
4058 	}
4059 }
4060 
4061 static vm_offset_t
stackshot_find_phys(vm_map_t map,vm_offset_t target_addr,kdp_fault_flags_t fault_flags,uint32_t * kdp_fault_result_flags)4062 stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags)
4063 {
4064 	vm_offset_t result;
4065 	struct kdp_fault_result fault_results = {0};
4066 	if (fault_stats.sfs_stopped_faulting) {
4067 		fault_flags &= ~KDP_FAULT_FLAGS_ENABLE_FAULTING;
4068 	}
4069 
4070 	result = kdp_find_phys(map, target_addr, fault_flags, &fault_results);
4071 
4072 	if ((fault_results.flags & KDP_FAULT_RESULT_TRIED_FAULT) || (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN)) {
4073 		fault_stats.sfs_time_spent_faulting += fault_results.time_spent_faulting;
4074 
4075 		if ((fault_stats.sfs_time_spent_faulting >= fault_stats.sfs_system_max_fault_time) && !panic_stackshot) {
4076 			fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
4077 		}
4078 	}
4079 
4080 	if (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN) {
4081 		fault_stats.sfs_pages_faulted_in++;
4082 	}
4083 
4084 	if (kdp_fault_result_flags) {
4085 		*kdp_fault_result_flags = fault_results.flags;
4086 	}
4087 
4088 	return result;
4089 }
4090 
4091 /*
4092  * Wrappers around kdp_generic_copyin, kdp_generic_copyin_word, kdp_generic_copyin_string that use stackshot_find_phys
4093  * in order to:
4094  *   1. collect statistics on the number of pages faulted in
4095  *   2. stop faulting if the time spent faulting has exceeded the limit.
4096  */
4097 static boolean_t
stackshot_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)4098 stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
4099 {
4100 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
4101 	if (try_fault) {
4102 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
4103 	}
4104 	return kdp_generic_copyin(map, uaddr, dest, size, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
4105 }
4106 static boolean_t
stackshot_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)4107 stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
4108 {
4109 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
4110 	if (try_fault) {
4111 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
4112 	}
4113 	return kdp_generic_copyin_word(task, addr, result, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
4114 }
4115 static int
stackshot_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)4116 stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
4117 {
4118 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
4119 	if (try_fault) {
4120 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
4121 	}
4122 	return kdp_generic_copyin_string(task, addr, buf, buf_sz, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags);
4123 }
4124 
4125 kern_return_t
do_stackshot(void * context)4126 do_stackshot(void *context)
4127 {
4128 #pragma unused(context)
4129 	kdp_snapshot++;
4130 
4131 	stackshot_out_flags = stack_snapshot_flags;
4132 
4133 	stack_snapshot_ret = kdp_stackshot_kcdata_format(stack_snapshot_pid, &stackshot_out_flags);
4134 
4135 	kdp_snapshot--;
4136 	return stack_snapshot_ret;
4137 }
4138 
4139 kern_return_t
4140 do_panic_stackshot(void *context);
4141 
4142 kern_return_t
do_panic_stackshot(void * context)4143 do_panic_stackshot(void *context)
4144 {
4145 	kern_return_t ret = do_stackshot(context);
4146 	kern_return_t error = finalize_kcdata(stackshot_kcdata_p);
4147 
4148 	// Return ret if it's already an error, error otherwise.  Usually both
4149 	// are KERN_SUCCESS.
4150 	return (ret != KERN_SUCCESS) ? ret : error;
4151 }
4152 
4153 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)4154 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
4155 {
4156 	/* When the pthread kext puts a worker thread to sleep, it will
4157 	 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
4158 	 * struct. See parkit() in kern/kern_support.c in libpthread.
4159 	 */
4160 	return (thread->state & TH_WAIT) &&
4161 	       (thread->block_hint == kThreadWaitParkedWorkQueue);
4162 }
4163 
4164 #if CONFIG_COALITIONS
4165 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)4166 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
4167 {
4168 #pragma unused(i, coal)
4169 	unsigned int *coalition_count = (unsigned int*)arg;
4170 	(*coalition_count)++;
4171 }
4172 
4173 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)4174 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
4175 {
4176 	if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
4177 		return;
4178 	}
4179 
4180 	struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
4181 	struct jetsam_coalition_snapshot *jcs = &coalitions[i];
4182 	task_t leader = TASK_NULL;
4183 	jcs->jcs_id = coalition_id(coal);
4184 	jcs->jcs_flags = 0;
4185 	jcs->jcs_thread_group = 0;
4186 
4187 	if (coalition_term_requested(coal)) {
4188 		jcs->jcs_flags |= kCoalitionTermRequested;
4189 	}
4190 	if (coalition_is_terminated(coal)) {
4191 		jcs->jcs_flags |= kCoalitionTerminated;
4192 	}
4193 	if (coalition_is_reaped(coal)) {
4194 		jcs->jcs_flags |= kCoalitionReaped;
4195 	}
4196 	if (coalition_is_privileged(coal)) {
4197 		jcs->jcs_flags |= kCoalitionPrivileged;
4198 	}
4199 
4200 #if CONFIG_THREAD_GROUPS
4201 	struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
4202 	if (thread_group) {
4203 		jcs->jcs_thread_group = thread_group_get_id(thread_group);
4204 	}
4205 #endif /* CONFIG_THREAD_GROUPS */
4206 
4207 	leader = kdp_coalition_get_leader(coal);
4208 	if (leader) {
4209 		jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
4210 	} else {
4211 		jcs->jcs_leader_task_uniqueid = 0;
4212 	}
4213 }
4214 #endif /* CONFIG_COALITIONS */
4215 
4216 #if CONFIG_THREAD_GROUPS
4217 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)4218 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
4219 {
4220 #pragma unused(i, tg)
4221 	unsigned int *n = (unsigned int*)arg;
4222 	(*n)++;
4223 }
4224 
4225 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)4226 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
4227 {
4228 	struct thread_group_snapshot_v3 *thread_groups = arg;
4229 	struct thread_group_snapshot_v3 *tgs = &thread_groups[i];
4230 	const char *name = thread_group_get_name(tg);
4231 	uint32_t flags = thread_group_get_flags(tg);
4232 	tgs->tgs_id = thread_group_get_id(tg);
4233 	static_assert(THREAD_GROUP_MAXNAME > sizeof(tgs->tgs_name));
4234 	kdp_memcpy(tgs->tgs_name, name, sizeof(tgs->tgs_name));
4235 	kdp_memcpy(tgs->tgs_name_cont, name + sizeof(tgs->tgs_name),
4236 	    sizeof(tgs->tgs_name_cont));
4237 	tgs->tgs_flags =
4238 	    ((flags & THREAD_GROUP_FLAGS_EFFICIENT)     ? kThreadGroupEfficient     : 0) |
4239 	    ((flags & THREAD_GROUP_FLAGS_APPLICATION)   ? kThreadGroupApplication   : 0) |
4240 	    ((flags & THREAD_GROUP_FLAGS_CRITICAL)      ? kThreadGroupCritical      : 0) |
4241 	    ((flags & THREAD_GROUP_FLAGS_BEST_EFFORT)   ? kThreadGroupBestEffort    : 0) |
4242 	    ((flags & THREAD_GROUP_FLAGS_UI_APP)        ? kThreadGroupUIApplication : 0) |
4243 	    ((flags & THREAD_GROUP_FLAGS_MANAGED)       ? kThreadGroupManaged       : 0) |
4244 	    ((flags & THREAD_GROUP_FLAGS_STRICT_TIMERS) ? kThreadGroupStrictTimers  : 0) |
4245 	    0;
4246 }
4247 #endif /* CONFIG_THREAD_GROUPS */
4248 
4249 /* Determine if a thread has waitinfo that stackshot can provide */
4250 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)4251 stackshot_thread_has_valid_waitinfo(thread_t thread)
4252 {
4253 	if (!(thread->state & TH_WAIT)) {
4254 		return 0;
4255 	}
4256 
4257 	switch (thread->block_hint) {
4258 	// If set to None or is a parked work queue, ignore it
4259 	case kThreadWaitParkedWorkQueue:
4260 	case kThreadWaitNone:
4261 		return 0;
4262 	// There is a short window where the pthread kext removes a thread
4263 	// from its ksyn wait queue before waking the thread up
4264 	case kThreadWaitPThreadMutex:
4265 	case kThreadWaitPThreadRWLockRead:
4266 	case kThreadWaitPThreadRWLockWrite:
4267 	case kThreadWaitPThreadCondVar:
4268 		return kdp_pthread_get_thread_kwq(thread) != NULL;
4269 	// All other cases are valid block hints if in a wait state
4270 	default:
4271 		return 1;
4272 	}
4273 }
4274 
4275 /* Determine if a thread has turnstileinfo that stackshot can provide */
4276 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)4277 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
4278 {
4279 	struct turnstile *ts = thread_get_waiting_turnstile(thread);
4280 
4281 	return stackshot_thread_has_valid_waitinfo(thread) &&
4282 	       ts != TURNSTILE_NULL;
4283 }
4284 
4285 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_v2_t * tsinfo)4286 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo)
4287 {
4288 	struct turnstile *ts;
4289 	struct ipc_service_port_label *ispl = NULL;
4290 
4291 	/* acquire turnstile information and store it in the stackshot */
4292 	ts = thread_get_waiting_turnstile(thread);
4293 	tsinfo->waiter = thread_tid(thread);
4294 	kdp_turnstile_fill_tsinfo(ts, tsinfo, &ispl);
4295 	tsinfo->portlabel_id = stackshot_plh_lookup(ispl,
4296 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_SENDPORT) ? STACKSHOT_PLH_LOOKUP_SEND :
4297 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_RECEIVEPORT) ? STACKSHOT_PLH_LOOKUP_RECEIVE :
4298 	    STACKSHOT_PLH_LOOKUP_UNKNOWN);
4299 }
4300 
4301 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_v2_t * waitinfo)4302 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t *waitinfo)
4303 {
4304 	thread_waitinfo_t *waitinfo_v1 = (thread_waitinfo_t *)waitinfo;
4305 	struct ipc_service_port_label *ispl = NULL;
4306 
4307 	waitinfo->waiter        = thread_tid(thread);
4308 	waitinfo->wait_type     = thread->block_hint;
4309 	waitinfo->wait_flags    = 0;
4310 
4311 	switch (waitinfo->wait_type) {
4312 	case kThreadWaitKernelMutex:
4313 		kdp_lck_mtx_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4314 		break;
4315 	case kThreadWaitPortReceive:
4316 		kdp_mqueue_recv_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
4317 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_RECEIVE);
4318 		break;
4319 	case kThreadWaitPortSend:
4320 		kdp_mqueue_send_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
4321 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_SEND);
4322 		break;
4323 	case kThreadWaitSemaphore:
4324 		kdp_sema_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4325 		break;
4326 	case kThreadWaitUserLock:
4327 		kdp_ulock_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4328 		break;
4329 	case kThreadWaitKernelRWLockRead:
4330 	case kThreadWaitKernelRWLockWrite:
4331 	case kThreadWaitKernelRWLockUpgrade:
4332 		kdp_rwlck_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4333 		break;
4334 	case kThreadWaitPThreadMutex:
4335 	case kThreadWaitPThreadRWLockRead:
4336 	case kThreadWaitPThreadRWLockWrite:
4337 	case kThreadWaitPThreadCondVar:
4338 		kdp_pthread_find_owner(thread, waitinfo_v1);
4339 		break;
4340 	case kThreadWaitWorkloopSyncWait:
4341 		kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo_v1);
4342 		break;
4343 	case kThreadWaitOnProcess:
4344 		kdp_wait4_find_process(thread, thread->wait_event, waitinfo_v1);
4345 		break;
4346 	case kThreadWaitSleepWithInheritor:
4347 		kdp_sleep_with_inheritor_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4348 		break;
4349 	case kThreadWaitEventlink:
4350 		kdp_eventlink_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4351 		break;
4352 	case kThreadWaitCompressor:
4353 		kdp_compressor_busy_find_owner(thread->wait_event, waitinfo_v1);
4354 		break;
4355 	default:
4356 		waitinfo->owner = 0;
4357 		waitinfo->context = 0;
4358 		break;
4359 	}
4360 }
4361