xref: /xnu-12377.81.4/osfmk/kern/kern_stackshot.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <mach/mach_types.h>
31 #include <mach/vm_param.h>
32 #include <mach/mach_vm.h>
33 #include <mach/clock_types.h>
34 #include <sys/code_signing.h>
35 #include <sys/errno.h>
36 #include <sys/stackshot.h>
37 #if defined(__arm64__)
38 #include <arm/cpu_internal.h>
39 #endif /* __arm64__ */
40 #ifdef IMPORTANCE_INHERITANCE
41 #include <ipc/ipc_importance.h>
42 #endif
43 #include <sys/appleapiopts.h>
44 #include <kern/debug.h>
45 #include <kern/block_hint.h>
46 #include <uuid/uuid.h>
47 
48 #include <kdp/kdp_dyld.h>
49 #include <kdp/kdp_en_debugger.h>
50 #include <kdp/processor_core.h>
51 #include <kdp/kdp_common.h>
52 
53 #include <libsa/types.h>
54 #include <libkern/version.h>
55 #include <libkern/section_keywords.h>
56 
57 #include <string.h> /* bcopy */
58 
59 #include <kern/kern_stackshot.h>
60 #include <kern/kcdata_private.h>
61 #include <kern/backtrace.h>
62 #include <kern/coalition.h>
63 #include <kern/epoch_sync.h>
64 #include <kern/exclaves_stackshot.h>
65 #include <kern/exclaves_inspection.h>
66 #include <kern/processor.h>
67 #include <kern/host_statistics.h>
68 #include <kern/counter.h>
69 #include <kern/thread.h>
70 #include <kern/thread_group.h>
71 #include <kern/task.h>
72 #include <kern/telemetry.h>
73 #include <kern/clock.h>
74 #include <kern/policy_internal.h>
75 #include <kern/socd_client.h>
76 #include <kern/startup.h>
77 #include <vm/pmap.h>
78 #include <vm/vm_map_xnu.h>
79 #include <vm/vm_kern_xnu.h>
80 #include <vm/vm_pageout.h>
81 #include <vm/vm_fault.h>
82 #include <vm/vm_shared_region_xnu.h>
83 #include <vm/vm_compressor_xnu.h>
84 #include <libkern/OSKextLibPrivate.h>
85 #include <os/log.h>
86 
87 #if HAS_MTE
88 #include <vm/vm_mteinfo_internal.h>
89 #endif /* HAS_MTE */
90 
91 
92 #ifdef CONFIG_EXCLAVES
93 #include <kern/exclaves.tightbeam.h>
94 #endif /* CONFIG_EXCLAVES */
95 
96 #include <kern/exclaves_test_stackshot.h>
97 
98 #include <libkern/coreanalytics/coreanalytics.h>
99 
100 #if defined(__x86_64__)
101 #include <i386/mp.h>
102 #include <i386/cpu_threads.h>
103 #endif
104 
105 #include <pexpert/pexpert.h>
106 
107 #if CONFIG_PERVASIVE_CPI
108 #include <kern/monotonic.h>
109 #endif /* CONFIG_PERVASIVE_CPI */
110 
111 #include <san/kasan.h>
112 
113 #if DEBUG || DEVELOPMENT
114 #define STACKSHOT_COLLECTS_DIAGNOSTICS 1
115 #define STACKSHOT_COLLECTS_LATENCY_INFO 1
116 #else
117 #define STACKSHOT_COLLECTS_DIAGNOSTICS 0
118 #define STACKSHOT_COLLECTS_LATENCY_INFO 0
119 #endif /* DEBUG || DEVELOPMENT */
120 
121 #define STACKSHOT_COLLECTS_RDAR_126582377_DATA 0
122 
123 #if defined(__AMP__)
124 #define STACKSHOT_NUM_WORKQUEUES 2
125 #else /* __AMP__ */
126 #define STACKSHOT_NUM_WORKQUEUES 1
127 #endif
128 
129 #if defined(__arm64__)
130 #define STACKSHOT_NUM_BUFFERS MAX_CPU_CLUSTERS
131 #else /* __arm64__ */
132 #define STACKSHOT_NUM_BUFFERS 1
133 #endif /* __arm64__ */
134 
135 /* The number of threads which will land a task in the hardest workqueue. */
136 #define STACKSHOT_HARDEST_THREADCOUNT 10
137 
138 TUNABLE_DEV_WRITEABLE(unsigned int, stackshot_single_thread, "stackshot_single_thread", 0);
139 
140 extern unsigned int not_in_kdp;
141 
142 /* indicate to the compiler that some accesses are unaligned */
143 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
144 
145 int kdp_snapshot                            = 0;
146 
147 #pragma mark ---Stackshot Struct Definitions---
148 
149 typedef struct linked_kcdata_descriptor {
150 	struct kcdata_descriptor          kcdata;
151 	struct linked_kcdata_descriptor  *next;
152 } * linked_kcdata_descriptor_t;
153 
154 struct stackshot_workitem {
155 	task_t                        sswi_task;
156 	linked_kcdata_descriptor_t    sswi_data; /* The kcdata for this task. */
157 	int                           sswi_idx;  /* The index of this job, used for ordering kcdata across multiple queues. */
158 };
159 
160 struct stackshot_workqueue {
161 	uint32_t _Atomic              sswq_num_items; /* Only modified by main CPU */
162 	uint32_t _Atomic              sswq_cur_item; /* Modified by all CPUs */
163 	size_t                        sswq_capacity; /* Constant after preflight */
164 	bool _Atomic                  sswq_populated; /* Only modified by main CPU */
165 	struct stackshot_workitem    *__counted_by(capacity) sswq_items;
166 };
167 
168 struct freelist_entry {
169 	struct freelist_entry        *fl_next; /* Next entry in the freelist */
170 	size_t                        fl_size; /* Size of the entry (must be >= sizeof(struct freelist_entry)) */
171 };
172 
173 struct stackshot_buffer {
174 	void                         *ssb_ptr; /* Base of buffer */
175 	size_t                        ssb_size;
176 	size_t _Atomic                ssb_used;
177 	struct freelist_entry        *ssb_freelist; /* First freelist entry */
178 	int _Atomic                   ssb_freelist_lock;
179 	size_t _Atomic                ssb_overhead; /* Total amount ever freed (even if re-allocated from freelist) */
180 };
181 
182 struct kdp_snapshot_args {
183 	int                           pid;
184 	void                         *buffer;
185 	struct kcdata_descriptor     *descriptor;
186 	uint32_t                      buffer_size;
187 	uint64_t                      flags;
188 	uint64_t                      since_timestamp;
189 	uint32_t                      pagetable_mask;
190 };
191 
192 /*
193  * Keep a simple cache of the most recent validation done at a page granularity
194  * to avoid the expensive software KVA-to-phys translation in the VM.
195  */
196 
197 struct _stackshot_validation_state {
198 	vm_offset_t last_valid_page_kva;
199 	size_t last_valid_size;
200 };
201 
202 /* CPU-local generation counts for PLH */
203 struct _stackshot_plh_gen_state {
204 	uint8_t                *pgs_gen;       /* last 'gen #' seen in */
205 	int16_t                 pgs_curgen_min; /* min idx seen for this gen */
206 	int16_t                 pgs_curgen_max; /* max idx seen for this gen */
207 	uint8_t                 pgs_curgen;     /* current gen */
208 };
209 
210 /*
211  * For port labels, we have a small hash table we use to track the
212  * struct ipc_service_port_label pointers we see along the way.
213  * This structure encapsulates the global state.
214  *
215  * The hash table is insert-only, similar to "intern"ing strings.  It's
216  * only used an manipulated in during the stackshot collection.  We use
217  * seperate chaining, with the hash elements and chains being int16_ts
218  * indexes into the parallel arrays, with -1 ending the chain.  Array indices are
219  * allocated using a bump allocator.
220  *
221  * The parallel arrays contain:
222  *      - plh_array[idx]	the pointer entered
223  *      - plh_chains[idx]	the hash chain
224  *      - plh_gen[idx]		the last 'generation #' seen
225  *
226  * Generation IDs are used to track entries looked up in the current
227  * task; 0 is never used, and the plh_gen array is cleared to 0 on
228  * rollover.
229  *
230  * The portlabel_ids we report externally are just the index in the array,
231  * plus 1 to avoid 0 as a value.  0 is NONE, -1 is UNKNOWN (e.g. there is
232  * one, but we ran out of space)
233  */
234 struct port_label_hash {
235 	int _Atomic             plh_lock;       /* lock for concurrent modifications to this plh */
236 	uint16_t                plh_size;       /* size of allocations; 0 disables tracking */
237 	uint16_t                plh_count;      /* count of used entries in plh_array */
238 	struct ipc_service_port_label **plh_array; /* _size allocated, _count used */
239 	int16_t                *plh_chains;    /* _size allocated */
240 	int16_t                *plh_hash;      /* (1 << STACKSHOT_PLH_SHIFT) entry hash table: hash(ptr) -> array index */
241 #if DEVELOPMENT || DEBUG
242 	/* statistics */
243 	uint32_t _Atomic        plh_lookups;    /* # lookups or inserts */
244 	uint32_t _Atomic        plh_found;
245 	uint32_t _Atomic        plh_found_depth;
246 	uint32_t _Atomic        plh_insert;
247 	uint32_t _Atomic        plh_insert_depth;
248 	uint32_t _Atomic        plh_bad;
249 	uint32_t _Atomic        plh_bad_depth;
250 	uint32_t _Atomic        plh_lookup_send;
251 	uint32_t _Atomic        plh_lookup_receive;
252 #define PLH_STAT_OP(...)    (void)(__VA_ARGS__)
253 #else /* DEVELOPMENT || DEBUG */
254 #define PLH_STAT_OP(...)    (void)(0)
255 #endif /* DEVELOPMENT || DEBUG */
256 };
257 
258 #define plh_lock(plh) while(!os_atomic_cmpxchg(&(plh)->plh_lock, 0, 1, acquire)) { loop_wait(); }
259 #define plh_unlock(plh) os_atomic_store(&(plh)->plh_lock, 0, release);
260 
261 #define STACKSHOT_PLH_SHIFT    7
262 #define STACKSHOT_PLH_SIZE_MAX ((kdp_ipc_have_splabel)? 1024 : 0)
263 size_t stackshot_port_label_size = (2 * (1u << STACKSHOT_PLH_SHIFT));
264 #define STASKSHOT_PLH_SIZE(x) MIN((x), STACKSHOT_PLH_SIZE_MAX)
265 
266 struct stackshot_cpu_context {
267 	bool                               scc_can_work; /* Whether the CPU can do more stackshot work */
268 	bool                               scc_did_work; /* Whether the CPU actually did any stackshot work */
269 	linked_kcdata_descriptor_t         scc_kcdata_head; /* See `linked_kcdata_alloc_callback */
270 	linked_kcdata_descriptor_t         scc_kcdata_tail; /* See `linked_kcdata_alloc_callback */
271 	uintptr_t                         *scc_stack_buffer; /* A buffer for stacktraces. */
272 	struct stackshot_fault_stats       scc_fault_stats;
273 	struct _stackshot_validation_state scc_validation_state;
274 	struct _stackshot_plh_gen_state    scc_plh_gen;
275 };
276 
277 /*
278  * When directly modifying the stackshot state, always use the macros below to
279  * work wth this enum - the higher order bits are used to store an error code
280  * in the case of SS_ERRORED.
281  *
282  *        +------------------------------------+-------------------+
283  *        |                                    |                   |
284  *        v                                    |                   |
285  * +-------------+     +----------+     +------------+     +------------+
286  * | SS_INACTIVE |---->| SS_SETUP |---->| SS_RUNNING |---->| SS_ERRORED |
287  * +-------------+     +----------+     +------------+     +------------+
288  *                         |  |                |                ^  |
289  *                         |  +----------------|----------------+  |
290  * +-------------+         |                   |                   |
291  * | SS_PANICKED |<--------+-------------------+                   |
292  * +-------------+                                                 |
293  *        ^                                                        |
294  *        |                                                        |
295  *        +--------------------------------------------------------+
296  */
297 __enum_closed_decl(stackshot_state_t, uint, {
298 	SS_INACTIVE = 0x0, /* -> SS_SETUP */
299 	SS_SETUP    = 0x1, /* -> SS_RUNNING, SS_ERRORED, SS_PANICKED */
300 	SS_RUNNING  = 0x2, /* -> SS_ERRORED, SS_PANICKED, SS_INACTIVE */
301 	SS_ERRORED  = 0x3, /* -> SS_INACTIVE, SS_PANICKED */
302 	SS_PANICKED = 0x4, /* -> N/A */
303 	_SS_COUNT
304 });
305 
306 static_assert(_SS_COUNT <= 0x5);
307 /* Get the stackshot state ID from a stackshot_state_t. */
308 #define SS_STATE(state) ((state) & 0x7u)
309 /* Get the error code from a stackshot_state_t. */
310 #define SS_ERRCODE(state) ((state) >> 3)
311 /* Make a stackshot error state with a given code. */
312 #define SS_MKERR(code) (((code) << 3) | SS_ERRORED)
313 
314 struct stackshot_context {
315 	/* Constants & Arguments */
316 	struct kdp_snapshot_args      sc_args;
317 	int                           sc_calling_cpuid;
318 	int                           sc_main_cpuid;
319 	bool                          sc_enable_faulting;
320 	uint64_t                      sc_microsecs; /* Timestamp */
321 	bool                          sc_panic_stackshot;
322 	size_t                        sc_min_kcdata_size;
323 	bool                          sc_is_singlethreaded;
324 
325 	/* State & Errors */
326 	stackshot_state_t _Atomic     sc_state; /* Only modified by calling CPU, main CPU, or panicking CPU. See comment above type definition for details. */
327 	kern_return_t                 sc_retval; /* The return value of the main thread */
328 	uint32_t _Atomic              sc_cpus_working;
329 
330 	/* KCData */
331 	linked_kcdata_descriptor_t    sc_pretask_kcdata;
332 	linked_kcdata_descriptor_t    sc_posttask_kcdata;
333 	kcdata_descriptor_t           sc_finalized_kcdata;
334 
335 	/* Buffers & Queues */
336 	struct stackshot_buffer       __counted_by(num_buffers) sc_buffers[STACKSHOT_NUM_BUFFERS];
337 	size_t                        sc_num_buffers;
338 	struct stackshot_workqueue    __counted_by(STACKSHOT_NUM_WORKQUEUES) sc_workqueues[STACKSHOT_NUM_WORKQUEUES];
339 	struct port_label_hash        sc_plh;
340 
341 	/* Statistics */
342 	struct stackshot_duration_v2  sc_duration;
343 	uint32_t                      sc_bytes_traced;
344 	uint32_t                      sc_bytes_uncompressed;
345 #if STACKSHOT_COLLECTS_LATENCY_INFO
346 	struct stackshot_latency_collection_v2 sc_latency;
347 #endif
348 };
349 
350 #define STACKSHOT_DEBUG_TRACEBUF_SIZE 16
351 
352 struct stackshot_trace_entry {
353 	int               sste_line_no;
354 	uint64_t          sste_timestamp;
355 	mach_vm_address_t sste_data;
356 };
357 
358 struct stackshot_trace_buffer {
359 	uint64_t                     sstb_last_trace_timestamp;
360 	size_t                       sstb_tail_idx;
361 	size_t                       sstb_size;
362 	struct stackshot_trace_entry __counted_by(STACKSHOT_DEBUG_TRACEBUF_SIZE) sstb_entries[STACKSHOT_DEBUG_TRACEBUF_SIZE];
363 };
364 
365 #pragma mark ---Stackshot State and Data---
366 
367 /*
368  * Two stackshot states, one for panic and one for normal.
369  * That way, we can take a stackshot during a panic without clobbering state.
370  */
371 #define STACKSHOT_CTX_IDX_NORMAL 0
372 #define STACKSHOT_CTX_IDX_PANIC  1
373 size_t cur_stackshot_ctx_idx   = STACKSHOT_CTX_IDX_NORMAL;
374 struct stackshot_context stackshot_contexts[2] = {{0}, {0}};
375 #define stackshot_ctx (stackshot_contexts[cur_stackshot_ctx_idx])
376 #define stackshot_args (stackshot_ctx.sc_args)
377 #define stackshot_flags (stackshot_args.flags)
378 
379 static struct {
380 	uint64_t last_abs_start;      /* start time of last stackshot */
381 	uint64_t last_abs_end;        /* end time of last stackshot */
382 	uint64_t stackshots_taken;    /* total stackshots taken since boot */
383 	uint64_t stackshots_duration; /* total abs time spent in stackshot_trap() since boot */
384 } stackshot_stats = { 0 };
385 
386 #if STACKSHOT_COLLECTS_LATENCY_INFO
387 static struct stackshot_latency_cpu PERCPU_DATA(stackshot_cpu_latency_percpu);
388 #define stackshot_cpu_latency (*PERCPU_GET(stackshot_cpu_latency_percpu))
389 #endif
390 
391 static struct stackshot_cpu_context PERCPU_DATA(stackshot_cpu_ctx_percpu);
392 #define stackshot_cpu_ctx (*PERCPU_GET(stackshot_cpu_ctx_percpu))
393 
394 static struct kcdata_descriptor PERCPU_DATA(stackshot_kcdata_percpu);
395 #define stackshot_kcdata_p (PERCPU_GET(stackshot_kcdata_percpu))
396 
397 #if STACKSHOT_COLLECTS_LATENCY_INFO
398 static bool collect_latency_info = true;
399 #endif
400 
401 static uint64_t stackshot_max_fault_time;
402 
403 #if STACKSHOT_COLLECTS_DIAGNOSTICS
404 static struct stackshot_trace_buffer PERCPU_DATA(stackshot_trace_buffer);
405 #endif
406 
407 #pragma mark ---Stackshot Global State---
408 
409 uint32_t stackshot_estimate_adj = 25; /* experiment factor: 0-100, adjust our estimate up by this amount */
410 
411 static uint32_t stackshot_initial_estimate;
412 static uint32_t stackshot_initial_estimate_adj;
413 static uint64_t stackshot_duration_prior_abs;   /* prior attempts, abs */
414 static unaligned_u64 * stackshot_duration_outer;
415 static uint64_t stackshot_tries;
416 
417 void * kernel_stackshot_buf   = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
418 int kernel_stackshot_buf_size = 0;
419 
420 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
421 
422 #if CONFIG_EXCLAVES
423 static ctid_t *stackshot_exclave_inspect_ctids = NULL;
424 static size_t stackshot_exclave_inspect_ctid_count = 0;
425 static size_t stackshot_exclave_inspect_ctid_capacity = 0;
426 
427 static kern_return_t stackshot_exclave_kr = KERN_SUCCESS;
428 #endif /* CONFIG_EXCLAVES */
429 
430 #if DEBUG || DEVELOPMENT
431 TUNABLE(bool, disable_exclave_stackshot, "-disable_exclave_stackshot", false);
432 #else
433 const bool disable_exclave_stackshot = false;
434 #endif
435 
436 #pragma mark ---Stackshot Static Function Declarations---
437 
438 __private_extern__ void stackshot_init( void );
439 static boolean_t        memory_iszero(void *addr, size_t size);
440 static void             stackshot_cpu_do_work(void);
441 static kern_return_t    stackshot_finalize_kcdata(void);
442 static kern_return_t    stackshot_finalize_singlethreaded_kcdata(void);
443 static kern_return_t    stackshot_collect_kcdata(void);
444 static int              kdp_stackshot_kcdata_format();
445 static void             kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
446 #if HAS_MTE
447 static kern_return_t    stackshot_mteinfo_snapshot(kcdata_descriptor_t data);
448 #endif
449 static vm_offset_t      stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags);
450 static boolean_t        stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
451 static int              stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
452 static boolean_t        stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
453 static uint64_t         proc_was_throttled_from_task(task_t task);
454 static void             stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t * waitinfo);
455 static int              stackshot_thread_has_valid_waitinfo(thread_t thread);
456 static void             stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo);
457 static int              stackshot_thread_has_valid_turnstileinfo(thread_t thread);
458 static uint32_t         get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj, uint64_t trace_flags, pid_t target_pid);
459 static kern_return_t    kdp_snapshot_preflight_internal(struct kdp_snapshot_args args);
460 
461 #if CONFIG_COALITIONS
462 static void             stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
463 static void             stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
464 #endif /* CONFIG_COALITIONS */
465 
466 #if CONFIG_THREAD_GROUPS
467 static void             stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
468 static void             stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
469 #endif /* CONFIG_THREAD_GROUPS */
470 
471 extern uint64_t         workqueue_get_task_ss_flags_from_pwq_state_kdp(void *proc);
472 
473 static kcdata_descriptor_t linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size);
474 
475 #pragma mark ---Stackshot Externs---
476 
477 struct proc;
478 extern int              proc_pid(struct proc *p);
479 extern uint64_t         proc_uniqueid(void *p);
480 extern uint64_t         proc_was_throttled(void *p);
481 extern uint64_t         proc_did_throttle(void *p);
482 extern int              proc_exiting(void *p);
483 extern int              proc_in_teardown(void *p);
484 static uint64_t         proc_did_throttle_from_task(task_t task);
485 extern void             proc_name_kdp(struct proc *p, char * buf, int size);
486 extern int              proc_threadname_kdp(void * uth, char * buf, size_t size);
487 extern void             proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
488 extern void             proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
489 extern uint64_t         proc_getcsflags_kdp(void * p);
490 extern boolean_t        proc_binary_uuid_kdp(task_t task, uuid_t uuid);
491 extern uint32_t         proc_getuid(proc_t);
492 extern uint32_t         proc_getgid(proc_t);
493 extern void             proc_memstat_data_kdp(void *p, int32_t *current_memlimit, int32_t *prio_effective, int32_t *prio_requested, int32_t *prio_assertion);
494 extern int              memorystatus_get_pressure_status_kdp(void);
495 extern void             memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit, boolean_t *is_active, boolean_t *is_managed, boolean_t *has_assertion);
496 extern void             panic_stackshot_release_lock(void);
497 
498 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
499 
500 #if CONFIG_TELEMETRY
501 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
502 #endif /* CONFIG_TELEMETRY */
503 
504 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
505 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
506 
507 static size_t stackshot_plh_est_size(void);
508 
509 #if CONFIG_EXCLAVES
510 static kern_return_t collect_exclave_threads(uint64_t);
511 static kern_return_t stackshot_setup_exclave_waitlist(void);
512 static void stackshot_cleanup_exclave_waitlist(void);
513 #endif
514 
515 /*
516  * Validates that the given address for a word is both a valid page and has
517  * default caching attributes for the current map.
518  */
519 bool machine_trace_thread_validate_kva(vm_offset_t);
520 /*
521  * Validates a region that stackshot will potentially inspect.
522  */
523 static bool _stackshot_validate_kva(vm_offset_t, size_t);
524 /*
525  * Must be called whenever stackshot is re-driven.
526  */
527 static void _stackshot_validation_reset(void);
528 /*
529  * A kdp-safe strlen() call.  Returns:
530  *      -1 if we reach maxlen or a bad address before the end of the string, or
531  *      strlen(s)
532  */
533 static long _stackshot_strlen(const char *s, size_t maxlen);
534 
535 #define MAX_FRAMES 1000
536 #define STACKSHOT_PAGETABLE_BUFSZ 4000
537 #define MAX_LOADINFOS 500
538 #define MAX_DYLD_COMPACTINFO (20 * 1024)  // max bytes of compactinfo to include per proc/shared region
539 #define TASK_IMP_WALK_LIMIT 20
540 
541 typedef struct thread_snapshot *thread_snapshot_t;
542 typedef struct task_snapshot *task_snapshot_t;
543 
544 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
545 extern kdp_send_t    kdp_en_send_pkt;
546 #endif
547 
548 /*
549  * Stackshot locking and other defines.
550  */
551 LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
552 LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
553 
554 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
555 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
556 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
557 #define STACKSHOT_SUBSYS_ASSERT_LOCKED() lck_mtx_assert(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
558 
559 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
560 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
561 
562 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
563 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
564 
565 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
566 
567 /*
568  * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
569  * for non-panic stackshots where faulting is requested.
570  */
571 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
572 
573 
574 #ifndef ROUNDUP
575 #define ROUNDUP(x, y)            ((((x)+(y)-1)/(y))*(y))
576 #endif
577 
578 #define STACKSHOT_QUEUE_LABEL_MAXSIZE  64
579 
580 #pragma mark ---Stackshot Useful Macros---
581 
582 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
583 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
584 /*
585  * Use of the kcd_exit_on_error(action) macro requires a local
586  * 'kern_return_t error' variable and 'error_exit' label.
587  */
588 #define kcd_exit_on_error(action)                      \
589 	do {                                               \
590 	    if (KERN_SUCCESS != (error = (action))) {      \
591 	        STACKSHOT_TRACE(error);                    \
592 	        if (error == KERN_RESOURCE_SHORTAGE) {     \
593 	            error = KERN_INSUFFICIENT_BUFFER_SIZE; \
594 	        }                                          \
595 	        goto error_exit;                           \
596 	    }                                              \
597 	} while (0); /* end kcd_exit_on_error */
598 
599 #if defined(__arm64__)
600 #define loop_wait_noguard() __builtin_arm_wfe()
601 #elif defined(__x86_64__)
602 #define loop_wait_noguard() __builtin_ia32_pause()
603 #else
604 #define loop_wait_noguard()
605 #endif /* __x86_64__ */
606 
607 #define loop_wait() { loop_wait_noguard(); stackshot_panic_guard(); }
608 
609 static inline void stackshot_panic_guard(void);
610 
611 static __attribute__((noreturn, noinline)) void
stackshot_panic_spin(void)612 stackshot_panic_spin(void)
613 {
614 	if (stackshot_cpu_ctx.scc_can_work) {
615 		stackshot_cpu_ctx.scc_can_work = false;
616 		os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
617 	}
618 	if (stackshot_ctx.sc_calling_cpuid == cpu_number()) {
619 		while (os_atomic_load(&stackshot_ctx.sc_cpus_working, acquire) != 0) {
620 			loop_wait_noguard();
621 		}
622 		panic_stackshot_release_lock();
623 	}
624 	while (1) {
625 		loop_wait_noguard();
626 	}
627 }
628 
629 /**
630  * Immediately aborts if another CPU panicked during the stackshot.
631  */
632 static inline void
stackshot_panic_guard(void)633 stackshot_panic_guard(void)
634 {
635 	if (__improbable(os_atomic_load(&stackshot_ctx.sc_state, relaxed) == SS_PANICKED)) {
636 		stackshot_panic_spin();
637 	}
638 }
639 
640 /*
641  * Signal that we panicked during a stackshot by setting an atomic flag and
642  * waiting for others to coalesce before continuing the panic. Other CPUs will
643  * spin on this as soon as they see it set in order to prevent multiple
644  * concurrent panics. The calling CPU (i.e. the one holding the debugger lock)
645  * will release it for us in `stackshot_panic_spin` so we can continue
646  * panicking.
647  *
648  * This is called from panic_trap_to_debugger.
649  */
650 void
stackshot_cpu_signal_panic(void)651 stackshot_cpu_signal_panic(void)
652 {
653 	stackshot_state_t o_state;
654 	if (stackshot_active()) {
655 		/* Check if someone else panicked before we did. */
656 		o_state = os_atomic_xchg(&stackshot_ctx.sc_state, SS_PANICKED, seq_cst);
657 		if (o_state == SS_PANICKED) {
658 			stackshot_panic_spin();
659 		}
660 
661 		/* We're the first CPU to panic - wait for everyone to coalesce. */
662 		if (stackshot_cpu_ctx.scc_can_work) {
663 			stackshot_cpu_ctx.scc_can_work = false;
664 			os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
665 		}
666 		while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
667 			loop_wait_noguard();
668 		}
669 	}
670 }
671 
672 /*
673  * Sets the stackshot state to SS_ERRORED along with the error code.
674  * Only works if the current state is SS_RUNNING or SS_SETUP.
675  */
676 static inline void
stackshot_set_error(kern_return_t error)677 stackshot_set_error(kern_return_t error)
678 {
679 	stackshot_state_t cur_state;
680 	stackshot_state_t err_state = SS_MKERR(error);
681 	if (__improbable(!os_atomic_cmpxchgv(&stackshot_ctx.sc_state, SS_RUNNING, err_state, &cur_state, seq_cst))) {
682 		if (cur_state == SS_SETUP) {
683 			os_atomic_cmpxchg(&stackshot_ctx.sc_state, SS_SETUP, err_state, seq_cst);
684 		} else {
685 			/* Our state is something other than SS_RUNNING or SS_SETUP... Check for panic. */
686 			stackshot_panic_guard();
687 		}
688 	}
689 }
690 
691 /* Returns an error code if the current stackshot context has errored out.
692  * Also functions as a panic guard.
693  */
694 __result_use_check
695 static inline kern_return_t
stackshot_status_check(void)696 stackshot_status_check(void)
697 {
698 	stackshot_state_t state = os_atomic_load(&stackshot_ctx.sc_state, relaxed);
699 
700 	/* Check for panic */
701 	if (__improbable(SS_STATE(state) == SS_PANICKED)) {
702 		stackshot_panic_spin();
703 	}
704 
705 	/* Check for error */
706 	if (__improbable(SS_STATE(state) == SS_ERRORED)) {
707 		kern_return_t err = SS_ERRCODE(state);
708 		assert(err != KERN_SUCCESS); /* SS_ERRORED should always store an associated error code. */
709 		return err;
710 	}
711 
712 	return KERN_SUCCESS;
713 }
714 
715 #pragma mark ---Stackshot Tracing---
716 
717 #if STACKSHOT_COLLECTS_DIAGNOSTICS
718 static void
stackshot_trace(int line_no,mach_vm_address_t data)719 stackshot_trace(int line_no, mach_vm_address_t data)
720 {
721 	struct stackshot_trace_buffer *buffer = PERCPU_GET(stackshot_trace_buffer);
722 	buffer->sstb_entries[buffer->sstb_tail_idx] = (struct stackshot_trace_entry) {
723 		.sste_line_no = line_no,
724 		.sste_timestamp = mach_continuous_time(),
725 		.sste_data = data
726 	};
727 	buffer->sstb_tail_idx = (buffer->sstb_tail_idx + 1) % STACKSHOT_DEBUG_TRACEBUF_SIZE;
728 	buffer->sstb_size = MIN(buffer->sstb_size + 1, STACKSHOT_DEBUG_TRACEBUF_SIZE);
729 }
730 #define STACKSHOT_TRACE(data) stackshot_trace(__LINE__, (mach_vm_address_t) (data))
731 
732 #else /* STACKSHOT_COLLECTS_DIAGNOSTICS */
733 #define STACKSHOT_TRACE(data) ((void) data)
734 #endif /* !STACKSHOT_COLLECTS_DIAGNOSTICS */
735 
736 #pragma mark ---Stackshot Buffer Management---
737 
738 #define freelist_lock(buffer) while(!os_atomic_cmpxchg(&buffer->ssb_freelist_lock, 0, 1, acquire)) { loop_wait(); }
739 #define freelist_unlock(buffer) os_atomic_store(&buffer->ssb_freelist_lock, 0, release);
740 
741 /**
742  * Allocates some data from the shared stackshot buffer freelist.
743  * This should not be used directly, it is a last resort if we run out of space.
744  */
745 static void *
stackshot_freelist_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)746 stackshot_freelist_alloc(
747 	size_t size,
748 	struct stackshot_buffer *buffer,
749 	kern_return_t *error)
750 {
751 	struct freelist_entry **cur_freelist, **best_freelist = NULL, *ret = NULL;
752 
753 	freelist_lock(buffer);
754 
755 	cur_freelist = &buffer->ssb_freelist;
756 
757 	while (*cur_freelist != NULL) {
758 		if (((*cur_freelist)->fl_size >= size) && ((best_freelist == NULL) || ((*best_freelist)->fl_size > (*cur_freelist)->fl_size))) {
759 			best_freelist = cur_freelist;
760 			if ((*best_freelist)->fl_size == size) {
761 				break;
762 			}
763 		}
764 		cur_freelist = &((*cur_freelist)->fl_next);
765 	}
766 
767 	/* If we found a freelist entry, update the freelist */
768 	if (best_freelist != NULL) {
769 		os_atomic_sub(&buffer->ssb_overhead, size, relaxed);
770 		ret = *best_freelist;
771 
772 		/* If there's enough unused space at the end of this entry, we should make a new one */
773 		if (((*best_freelist)->fl_size - size) > sizeof(struct freelist_entry)) {
774 			struct freelist_entry *new_freelist = (struct freelist_entry*) ((mach_vm_address_t) *best_freelist + size);
775 			*new_freelist = (struct freelist_entry) {
776 				.fl_next = (*best_freelist)->fl_next,
777 				.fl_size = (*best_freelist)->fl_size - size
778 			};
779 			(*best_freelist)->fl_next = new_freelist;
780 		}
781 
782 		/* Update previous entry with next or new entry */
783 		*best_freelist = (*best_freelist)->fl_next;
784 	}
785 
786 	freelist_unlock(buffer);
787 
788 	if (error != NULL) {
789 		if (ret == NULL) {
790 			*error = KERN_INSUFFICIENT_BUFFER_SIZE;
791 		} else {
792 			*error = KERN_SUCCESS;
793 		}
794 	}
795 
796 	return ret;
797 }
798 
799 /**
800  * Allocates some data from the shared stackshot buffer.
801  * Should not be used directly - see the `stackshot_alloc` and
802  * `stackshot_alloc_arr` macros.
803  */
804 static void *
stackshot_buffer_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)805 stackshot_buffer_alloc(
806 	size_t size,
807 	struct stackshot_buffer *buffer,
808 	kern_return_t *error)
809 {
810 	size_t o_used, new_used;
811 
812 	stackshot_panic_guard();
813 	assert(!stackshot_ctx.sc_is_singlethreaded);
814 	assert(buffer->ssb_ptr != NULL);
815 
816 	os_atomic_rmw_loop(&buffer->ssb_used, o_used, new_used, relaxed, {
817 		new_used = o_used + size;
818 		if (new_used > buffer->ssb_size) {
819 		        os_atomic_rmw_loop_give_up(return stackshot_freelist_alloc(size, buffer, error));
820 		}
821 	});
822 
823 	if (error != NULL) {
824 		*error = KERN_SUCCESS;
825 	}
826 
827 	return (void*) ((mach_vm_address_t) buffer->ssb_ptr + o_used);
828 }
829 
830 /**
831  * Finds the best stackshot buffer to use (prefer our cluster's buffer)
832  * and allocates from it.
833  * Should not be used directly - see the `stackshot_alloc` and
834  * `stackshot_alloc_arr` macros.
835  */
836 __result_use_check
837 static void *
stackshot_best_buffer_alloc(size_t size,kern_return_t * error)838 stackshot_best_buffer_alloc(size_t size, kern_return_t *error)
839 {
840 #if defined(__AMP__)
841 	kern_return_t err;
842 	int           my_cluster;
843 	void         *ret = NULL;
844 #endif /* __AMP__ */
845 
846 #if STACKSHOT_COLLECTS_LATENCY_INFO
847 	stackshot_cpu_latency.total_buf += size;
848 #endif
849 
850 #if defined(__AMP__)
851 	/* First, try our cluster's buffer */
852 	my_cluster = cpu_cluster_id();
853 	ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[my_cluster], &err);
854 
855 	/* Try other buffers now. */
856 	if (err != KERN_SUCCESS) {
857 		for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
858 			if ((buf_idx == my_cluster) || (stackshot_ctx.sc_buffers[buf_idx].ssb_ptr == NULL)) {
859 				continue;
860 			}
861 
862 			ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[buf_idx], &err);
863 			if (err == KERN_SUCCESS) {
864 #if STACKSHOT_COLLECTS_LATENCY_INFO
865 				stackshot_cpu_latency.intercluster_buf_used += size;
866 #endif
867 				break;
868 			}
869 		}
870 	}
871 
872 	if (error != NULL) {
873 		*error = err;
874 	}
875 
876 	return ret;
877 #else /* __AMP__ */
878 	return stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[0], error);
879 #endif /* !__AMP__ */
880 }
881 
882 /**
883  * Frees some data from the shared stackshot buffer and adds it to the freelist.
884  */
885 static void
stackshot_buffer_free(void * ptr,struct stackshot_buffer * buffer,size_t size)886 stackshot_buffer_free(
887 	void *ptr,
888 	struct stackshot_buffer *buffer,
889 	size_t size)
890 {
891 	stackshot_panic_guard();
892 
893 	/* This should never be called during a singlethreaded stackshot. */
894 	assert(!stackshot_ctx.sc_is_singlethreaded);
895 
896 	os_atomic_add(&buffer->ssb_overhead, size, relaxed);
897 
898 	/* Make sure we have enough space for the freelist entry */
899 	if (size < sizeof(struct freelist_entry)) {
900 		return;
901 	}
902 
903 	freelist_lock(buffer);
904 
905 	/* Create new freelist entry and push it to the front of the list */
906 	*((struct freelist_entry*) ptr) = (struct freelist_entry) {
907 		.fl_size = size,
908 		.fl_next = buffer->ssb_freelist
909 	};
910 	buffer->ssb_freelist = ptr;
911 
912 	freelist_unlock(buffer);
913 }
914 
915 /**
916  * Allocates some data from the stackshot buffer. Uses the bump allocator in
917  * multithreaded mode and endalloc in singlethreaded.
918  * err must ALWAYS be nonnull.
919  * Should not be used directly - see the macros in kern_stackshot.h.
920  */
921 void *
stackshot_alloc_with_size(size_t size,kern_return_t * err)922 stackshot_alloc_with_size(size_t size, kern_return_t *err)
923 {
924 	void *ptr;
925 	assert(err != NULL);
926 	assert(stackshot_active());
927 
928 	stackshot_panic_guard();
929 
930 	if (stackshot_ctx.sc_is_singlethreaded) {
931 		ptr = kcdata_endalloc(stackshot_kcdata_p, size);
932 		if (ptr == NULL) {
933 			*err = KERN_INSUFFICIENT_BUFFER_SIZE;
934 		}
935 	} else {
936 		ptr = stackshot_best_buffer_alloc(size, err);
937 		if (ptr == NULL) {
938 			/* We should always return an error if we return a null ptr */
939 			assert3u(*err, !=, KERN_SUCCESS);
940 		}
941 	}
942 
943 	return ptr;
944 }
945 
946 /**
947  * Initializes a new kcdata buffer somewhere in a linked kcdata list.
948  * Allocates a buffer for the kcdata from the shared stackshot buffer.
949  *
950  * See `linked_kcdata_alloc_callback` for the implementation details of
951  * linked kcdata for stackshot.
952  */
953 __result_use_check
954 static kern_return_t
linked_kcdata_init(linked_kcdata_descriptor_t descriptor,size_t min_size,unsigned int data_type,unsigned int flags)955 linked_kcdata_init(
956 	linked_kcdata_descriptor_t descriptor,
957 	size_t min_size,
958 	unsigned int data_type,
959 	unsigned int flags)
960 {
961 	void              *buf_ptr;
962 	kern_return_t      error;
963 	size_t             buf_size = MAX(min_size, stackshot_ctx.sc_min_kcdata_size);
964 
965 	buf_ptr = stackshot_alloc_arr(uint8_t, buf_size, &error);
966 	if (error != KERN_SUCCESS) {
967 		return error;
968 	}
969 
970 	error = kcdata_memory_static_init(&descriptor->kcdata, (mach_vm_address_t) buf_ptr, data_type, buf_size, flags);
971 	if (error != KERN_SUCCESS) {
972 		return error;
973 	}
974 
975 	descriptor->kcdata.kcd_alloc_callback = linked_kcdata_alloc_callback;
976 
977 	return KERN_SUCCESS;
978 }
979 
980 static void
stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)981 stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)
982 {
983 	/*
984 	 * If we have free space at the end of the kcdata, we can add it to the
985 	 * freelist. We always add to *our* cluster's freelist, no matter where
986 	 * the data was originally allocated.
987 	 *
988 	 * Important Note: We do not use kcdata_memory_get_used_bytes here because
989 	 * that includes extra space for the end tag (which we do not care about).
990 	 */
991 	int    buffer;
992 	size_t used_size = descriptor->kcd_addr_end - descriptor->kcd_addr_begin;
993 	size_t free_size = (descriptor->kcd_length - used_size);
994 	if (free_size > 0) {
995 #if defined(__arm64__)
996 		buffer = cpu_cluster_id();
997 #else /* __arm64__ */
998 		buffer = 0;
999 #endif /* !__arm64__ */
1000 		stackshot_buffer_free((void*) descriptor->kcd_addr_end, &stackshot_ctx.sc_buffers[buffer], free_size);
1001 		descriptor->kcd_length = used_size;
1002 	}
1003 }
1004 
1005 /**
1006  * The callback for linked kcdata, which is called when one of the kcdata
1007  * buffers runs out of space. This allocates a new kcdata descriptor &
1008  * buffer in the linked list and sets it up.
1009  *
1010  * When kcdata calls this callback, it takes the returned descriptor
1011  * and copies it to its own descriptor (which will be the per-cpu kcdata
1012  * descriptor, in the case of stackshot).
1013  *
1014  * --- Stackshot linked kcdata details ---
1015  * The way stackshot allocates kcdata buffers (in a non-panic context) is via
1016  * a basic bump allocator (see `stackshot_buffer_alloc`) and a linked list of
1017  * kcdata structures. The kcdata are allocated with a reasonable size based on
1018  * some system heuristics (or more if whatever is being pushed into the buffer
1019  * is larger). When the current kcdata buffer runs out of space, it calls this
1020  * callback, which allocates a new linked kcdata object at the tail of the
1021  * current list.
1022  *
1023  * The per-cpu `stackshot_kcdata_p` descriptor is the "tail" of the list, but
1024  * is not actually part of the linked list (this simplified implementation,
1025  * since it didn't require changing every kcdata call & a bunch of
1026  * kcdata code, since the current in-use descriptor is always in the same place
1027  * this way). When it is filled up and this callback is called, the
1028  * `stackshot_kcdata_p` descriptor is copied to the *actual* tail of the list
1029  * (in stackshot_cpu_ctx.scc_kcdata_tail), and a new linked kcdata struct is
1030  * allocated at the tail.
1031  */
1032 static kcdata_descriptor_t
linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor,size_t min_size)1033 linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size)
1034 {
1035 	kern_return_t error;
1036 	linked_kcdata_descriptor_t new_kcdata = NULL;
1037 
1038 	/* This callback should ALWAYS be coming from our per-cpu kcdata. If not, something has gone horribly wrong.*/
1039 	stackshot_panic_guard();
1040 	assert(descriptor == stackshot_kcdata_p);
1041 
1042 	/* Free the unused space in the buffer and copy it to the tail of the linked kcdata list. */
1043 	stackshot_kcdata_free_unused(descriptor);
1044 	stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *descriptor;
1045 
1046 	/* Allocate another linked_kcdata and initialize it. */
1047 	new_kcdata = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1048 	if (error != KERN_SUCCESS) {
1049 		return NULL;
1050 	}
1051 
1052 	/* It doesn't matter what we mark the data type as - we're throwing it away when weave the data together anyway. */
1053 	error = linked_kcdata_init(new_kcdata, min_size, KCDATA_BUFFER_BEGIN_STACKSHOT, descriptor->kcd_flags);
1054 	if (error != KERN_SUCCESS) {
1055 		return NULL;
1056 	}
1057 
1058 	bzero(descriptor, sizeof(struct kcdata_descriptor));
1059 	stackshot_cpu_ctx.scc_kcdata_tail->next = new_kcdata;
1060 	stackshot_cpu_ctx.scc_kcdata_tail = new_kcdata;
1061 
1062 	return &new_kcdata->kcdata;
1063 }
1064 
1065 /**
1066  * Allocates a new linked kcdata list for the current CPU and sets it up.
1067  * If there was a previous linked kcdata descriptor, you should call
1068  * `stackshot_finalize_linked_kcdata` first, or otherwise save it somewhere.
1069  */
1070 __result_use_check
1071 static kern_return_t
stackshot_new_linked_kcdata(void)1072 stackshot_new_linked_kcdata(void)
1073 {
1074 	kern_return_t error;
1075 
1076 	stackshot_panic_guard();
1077 	assert(!stackshot_ctx.sc_panic_stackshot);
1078 
1079 	stackshot_cpu_ctx.scc_kcdata_head = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1080 	if (error != KERN_SUCCESS) {
1081 		return error;
1082 	}
1083 
1084 	kcd_exit_on_error(linked_kcdata_init(stackshot_cpu_ctx.scc_kcdata_head, 0,
1085 	    KCDATA_BUFFER_BEGIN_STACKSHOT,
1086 	    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER | KCFLAG_ALLOC_CALLBACK));
1087 
1088 	stackshot_cpu_ctx.scc_kcdata_tail = stackshot_cpu_ctx.scc_kcdata_head;
1089 	*stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_head->kcdata;
1090 
1091 error_exit:
1092 	return error;
1093 }
1094 
1095 /**
1096  * Finalizes the current linked kcdata structure for the CPU by updating the
1097  * tail of the list with the per-cpu kcdata descriptor.
1098  */
1099 static void
stackshot_finalize_linked_kcdata(void)1100 stackshot_finalize_linked_kcdata(void)
1101 {
1102 	stackshot_panic_guard();
1103 	assert(!stackshot_ctx.sc_panic_stackshot);
1104 	stackshot_kcdata_free_unused(stackshot_kcdata_p);
1105 	if (stackshot_cpu_ctx.scc_kcdata_tail != NULL) {
1106 		stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *stackshot_kcdata_p;
1107 	}
1108 	*stackshot_kcdata_p = (struct kcdata_descriptor){};
1109 }
1110 
1111 /*
1112  * Initialize the mutex governing access to the stack snapshot subsystem
1113  * and other stackshot related bits.
1114  */
1115 __private_extern__ void
stackshot_init(void)1116 stackshot_init(void)
1117 {
1118 	mach_timebase_info_data_t timebase;
1119 
1120 	clock_timebase_info(&timebase);
1121 	stackshot_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
1122 
1123 	max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
1124 
1125 	PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
1126 }
1127 
1128 /*
1129  * Called with interrupts disabled after stackshot context has been
1130  * initialized.
1131  */
1132 static kern_return_t
stackshot_trap(void)1133 stackshot_trap(void)
1134 {
1135 	kern_return_t   rv;
1136 
1137 #if defined(__x86_64__)
1138 	/*
1139 	 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
1140 	 * operation, it's essential to apply mutual exclusion to the other when one
1141 	 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
1142 	 * capture CPUs.
1143 	 *
1144 	 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
1145 	 * allowed, so we check to ensure there there is no rendezvous in progress before
1146 	 * trying to grab the lock (if there is, a deadlock will occur when we try to
1147 	 * grab the lock).  This is accomplished by setting cpu_rendezvous_in_progress to
1148 	 * TRUE in the mp rendezvous action function.  If stackshot_trap() is called by
1149 	 * a subordinate of the call chain within the mp rendezvous action, this flag will
1150 	 * be set and can be used to detect the inevitable deadlock that would occur
1151 	 * if this thread tried to grab the rendezvous lock.
1152 	 */
1153 
1154 	if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
1155 		panic("Calling stackshot from a rendezvous is not allowed!");
1156 	}
1157 
1158 	mp_rendezvous_lock();
1159 #endif
1160 
1161 	stackshot_stats.last_abs_start = mach_absolute_time();
1162 	stackshot_stats.last_abs_end = 0;
1163 
1164 	rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0, NULL);
1165 
1166 	stackshot_stats.last_abs_end = mach_absolute_time();
1167 	stackshot_stats.stackshots_taken++;
1168 	stackshot_stats.stackshots_duration += (stackshot_stats.last_abs_end - stackshot_stats.last_abs_start);
1169 
1170 #if defined(__x86_64__)
1171 	mp_rendezvous_unlock();
1172 #endif
1173 	return rv;
1174 }
1175 
1176 extern void stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration);
1177 void
stackshot_get_timing(uint64_t * last_abs_start,uint64_t * last_abs_end,uint64_t * count,uint64_t * total_duration)1178 stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration)
1179 {
1180 	STACKSHOT_SUBSYS_LOCK();
1181 	*last_abs_start = stackshot_stats.last_abs_start;
1182 	*last_abs_end = stackshot_stats.last_abs_end;
1183 	*count = stackshot_stats.stackshots_taken;
1184 	*total_duration = stackshot_stats.stackshots_duration;
1185 	STACKSHOT_SUBSYS_UNLOCK();
1186 }
1187 
1188 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)1189 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
1190 {
1191 	kern_return_t error = KERN_SUCCESS;
1192 	boolean_t istate;
1193 	struct kdp_snapshot_args args;
1194 
1195 	args = (struct kdp_snapshot_args) {
1196 		.pid =               pid,
1197 		.buffer =            buf,
1198 		.buffer_size =       size,
1199 		.flags =             flags,
1200 		.since_timestamp =   delta_since_timestamp,
1201 		.pagetable_mask =    pagetable_mask
1202 	};
1203 
1204 #if DEVELOPMENT || DEBUG
1205 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
1206 		return KERN_NOT_SUPPORTED;
1207 	}
1208 #endif
1209 	if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
1210 		return KERN_INVALID_ARGUMENT;
1211 	}
1212 
1213 	/* zero caller's buffer to match KMA_ZERO in other path */
1214 	bzero(buf, size);
1215 
1216 	/* cap in individual stackshot to max_tracebuf_size */
1217 	if (size > max_tracebuf_size) {
1218 		size = max_tracebuf_size;
1219 	}
1220 
1221 	/* Serialize tracing */
1222 	if (flags & STACKSHOT_TRYLOCK) {
1223 		if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
1224 			return KERN_LOCK_OWNED;
1225 		}
1226 	} else {
1227 		STACKSHOT_SUBSYS_LOCK();
1228 	}
1229 
1230 #if CONFIG_EXCLAVES
1231 	assert(!stackshot_exclave_inspect_ctids);
1232 #endif
1233 
1234 	stackshot_initial_estimate = 0;
1235 	stackshot_duration_prior_abs = 0;
1236 	stackshot_duration_outer = NULL;
1237 
1238 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
1239 	    flags, size, pid, delta_since_timestamp);
1240 
1241 	/* Prepare the compressor for a stackshot */
1242 	error = vm_compressor_kdp_init();
1243 	if (error != KERN_SUCCESS) {
1244 		return error;
1245 	}
1246 
1247 #if STACKSHOT_COLLECTS_RDAR_126582377_DATA
1248 	// Opportunistically collect reports of the rdar://126582377 failure.
1249 	// If the allocation doesn't succeed, or if another CPU "steals" the
1250 	// allocated event first, that is acceptable.
1251 	ca_event_t new_event = CA_EVENT_ALLOCATE_FLAGS(bad_stackshot_upper16, Z_NOWAIT);
1252 	if (new_event) {
1253 		if (os_atomic_cmpxchg(&rdar_126582377_event, NULL, new_event, relaxed) == 0) {
1254 			// Already set up, so free it
1255 			CA_EVENT_DEALLOCATE(new_event);
1256 		}
1257 	}
1258 #endif
1259 
1260 	istate = ml_set_interrupts_enabled(FALSE);
1261 	uint64_t time_start      = mach_absolute_time();
1262 
1263 	/* Emit a SOCD tracepoint that we are initiating a stackshot */
1264 	SOCD_TRACE_XNU_START(STACKSHOT);
1265 
1266 	/* Preload trace parameters*/
1267 	error = kdp_snapshot_preflight_internal(args);
1268 
1269 	/*
1270 	 * Trap to the debugger to obtain a coherent stack snapshot; this populates
1271 	 * the trace buffer
1272 	 */
1273 	if (error == KERN_SUCCESS) {
1274 		error = stackshot_trap();
1275 	}
1276 
1277 	uint64_t time_end = mach_absolute_time();
1278 
1279 	/* Emit a SOCD tracepoint that we have completed the stackshot */
1280 	SOCD_TRACE_XNU_END(STACKSHOT);
1281 
1282 	ml_set_interrupts_enabled(istate);
1283 
1284 #if CONFIG_EXCLAVES
1285 	/* stackshot trap should only finish successfully or with no pending Exclave threads */
1286 	assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
1287 #endif
1288 
1289 	/*
1290 	 * Stackshot is no longer active.
1291 	 * (We have to do this here for the special interrupt disable timeout case to work)
1292 	 */
1293 	os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
1294 
1295 	/* Release kdp compressor buffers */
1296 	vm_compressor_kdp_teardown();
1297 
1298 	/* Collect multithreaded kcdata into one finalized buffer */
1299 	if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
1300 		error = stackshot_collect_kcdata();
1301 	}
1302 
1303 #if CONFIG_EXCLAVES
1304 	if (stackshot_exclave_inspect_ctids) {
1305 		if (error == KERN_SUCCESS) {
1306 			error = collect_exclave_threads(flags);
1307 		}
1308 		stackshot_cleanup_exclave_waitlist();
1309 	}
1310 #endif /* CONFIG_EXCLAVES */
1311 
1312 	if (error == KERN_SUCCESS) {
1313 		if (!stackshot_ctx.sc_is_singlethreaded) {
1314 			error = stackshot_finalize_kcdata();
1315 		} else {
1316 			error = stackshot_finalize_singlethreaded_kcdata();
1317 		}
1318 	}
1319 
1320 	if (stackshot_duration_outer) {
1321 		*stackshot_duration_outer = time_end - time_start;
1322 	}
1323 	*bytes_traced = kdp_stack_snapshot_bytes_traced();
1324 
1325 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
1326 	    error, (time_end - time_start), size, *bytes_traced);
1327 
1328 	STACKSHOT_SUBSYS_UNLOCK();
1329 	return error;
1330 }
1331 
1332 #if CONFIG_TELEMETRY
1333 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)1334 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
1335 {
1336 	int error = KERN_FAILURE;
1337 	uint32_t bytes_traced = 0;
1338 
1339 	/*
1340 	 * "Flags" is actually treated as an enumeration, make sure only one value
1341 	 * is passed at a time.
1342 	 */
1343 	bool set_mark = flags & STACKSHOT_SET_MICROSTACKSHOT_MARK;
1344 	flags &= ~STACKSHOT_SET_MICROSTACKSHOT_MARK;
1345 	if (__builtin_popcount(flags) != 1) {
1346 		return KERN_INVALID_ARGUMENT;
1347 	}
1348 
1349 	/*
1350 	 * Ensure that there's space to copyout to.
1351 	 */
1352 	if (tracebuf == USER_ADDR_NULL || tracebuf_size == 0) {
1353 		return KERN_INVALID_ARGUMENT;
1354 	}
1355 
1356 	STACKSHOT_SUBSYS_LOCK();
1357 
1358 	switch (flags) {
1359 	case STACKSHOT_GET_KERNEL_MICROSTACKSHOT:
1360 		/*
1361 		 * Kernel samples consume from their buffer, so using a mark is the only
1362 		 * allowed option.
1363 		 */
1364 		if (!set_mark) {
1365 			error = KERN_INVALID_ARGUMENT;
1366 			break;
1367 		}
1368 		bytes_traced = tracebuf_size;
1369 		error = telemetry_kernel_gather(tracebuf, &bytes_traced);
1370 		*retval = (int)bytes_traced;
1371 		break;
1372 	case STACKSHOT_GET_MICROSTACKSHOT: {
1373 		if (tracebuf_size > max_tracebuf_size) {
1374 			error = KERN_INVALID_ARGUMENT;
1375 			break;
1376 		}
1377 
1378 		bytes_traced = tracebuf_size;
1379 		error = telemetry_gather(tracebuf, &bytes_traced, set_mark);
1380 		*retval = (int)bytes_traced;
1381 		break;
1382 	}
1383 	default:
1384 		error = KERN_NOT_SUPPORTED;
1385 		break;
1386 	}
1387 
1388 	STACKSHOT_SUBSYS_UNLOCK();
1389 	return error;
1390 }
1391 #endif /* CONFIG_TELEMETRY */
1392 
1393 /**
1394  * Grabs the next work item from the stackshot work queue.
1395  */
1396 static struct stackshot_workitem *
stackshot_get_workitem(struct stackshot_workqueue * queue)1397 stackshot_get_workitem(struct stackshot_workqueue *queue)
1398 {
1399 	uint32_t old_count, new_count;
1400 
1401 	/* note: this relies on give_up not performing the write, just bailing out immediately */
1402 	os_atomic_rmw_loop(&queue->sswq_cur_item, old_count, new_count, acq_rel, {
1403 		if (old_count >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1404 		        os_atomic_rmw_loop_give_up(return NULL);
1405 		}
1406 		new_count = old_count + 1;
1407 	});
1408 
1409 	return &queue->sswq_items[old_count];
1410 };
1411 
1412 /**
1413  * Puts an item on the appropriate stackshot work queue.
1414  * We don't need the lock for this, but only because it's
1415  * only called by one writer..
1416  *
1417  * @returns
1418  * true if the item fit in the queue, false if not.
1419  */
1420 static kern_return_t
stackshot_put_workitem(struct stackshot_workitem item)1421 stackshot_put_workitem(struct stackshot_workitem item)
1422 {
1423 	struct stackshot_workqueue *queue;
1424 
1425 	/* Put in higher queue if task has more threads, with highest queue having >= STACKSHOT_HARDEST_THREADCOUNT threads */
1426 	size_t queue_idx = ((item.sswi_task->thread_count * (STACKSHOT_NUM_WORKQUEUES - 1)) / STACKSHOT_HARDEST_THREADCOUNT);
1427 	queue_idx = MIN(queue_idx, STACKSHOT_NUM_WORKQUEUES - 1);
1428 
1429 	queue = &stackshot_ctx.sc_workqueues[queue_idx];
1430 
1431 	size_t num_items = os_atomic_load(&queue->sswq_num_items, relaxed);
1432 
1433 	if (num_items >= queue->sswq_capacity) {
1434 		return KERN_INSUFFICIENT_BUFFER_SIZE;
1435 	}
1436 
1437 	queue->sswq_items[num_items] = item;
1438 	os_atomic_inc(&queue->sswq_num_items, release);
1439 
1440 	return KERN_SUCCESS;
1441 }
1442 
1443 #define calc_num_linked_kcdata_frames(size, kcdata_size) (1 + ((size) - 1) / (kcdata_size))
1444 #define calc_linked_kcdata_size(size, kcdata_size) (calc_num_linked_kcdata_frames((size), (kcdata_size)) * ((kcdata_size) + sizeof(struct linked_kcdata_descriptor)))
1445 
1446 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
1447 #define TASK_SHARED_CACHE_AVG_SIZE (128) /* Average space consumed by task shared cache info */
1448 #define sizeof_if_traceflag(a, flag) (((trace_flags & (flag)) != 0) ? sizeof(a) : 0)
1449 
1450 #define FUDGED_SIZE(size, adj) (((size) * ((adj) + 100)) / 100)
1451 
1452 /*
1453  * Return the estimated size of a single task (including threads)
1454  * in a stackshot with the given flags.
1455  */
1456 static uint32_t
get_stackshot_est_tasksize(uint64_t trace_flags)1457 get_stackshot_est_tasksize(uint64_t trace_flags)
1458 {
1459 	size_t total_size;
1460 	size_t threads_per_task = (((threads_count + terminated_threads_count) - 1) / (tasks_count + terminated_tasks_count)) + 1;
1461 	size_t est_thread_size = sizeof(struct thread_snapshot_v4) + 42 * sizeof(uintptr_t);
1462 	size_t est_task_size = sizeof(struct task_snapshot_v3) +
1463 	    TASK_UUID_AVG_SIZE +
1464 	    TASK_SHARED_CACHE_AVG_SIZE +
1465 	    sizeof_if_traceflag(struct io_stats_snapshot, STACKSHOT_INSTRS_CYCLES) +
1466 	    sizeof_if_traceflag(uint32_t, STACKSHOT_ASID) +
1467 	    sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ, STACKSHOT_PAGE_TABLES) +
1468 	    sizeof_if_traceflag(struct instrs_cycles_snapshot_v2, STACKSHOT_INSTRS_CYCLES) +
1469 	    sizeof(struct stackshot_cpu_architecture) +
1470 	    sizeof(struct stackshot_task_codesigning_info);
1471 
1472 #if STACKSHOT_COLLECTS_LATENCY_INFO
1473 	if (collect_latency_info) {
1474 		est_thread_size += sizeof(struct stackshot_latency_thread);
1475 		est_task_size += sizeof(struct stackshot_latency_task);
1476 	}
1477 #endif
1478 
1479 	total_size = est_task_size + threads_per_task * est_thread_size;
1480 
1481 	return total_size;
1482 }
1483 
1484 /*
1485  * Return the estimated size of a stackshot based on the
1486  * number of currently running threads and tasks.
1487  *
1488  * adj is an adjustment in units of percentage
1489  */
1490 static uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj,uint64_t trace_flags,pid_t target_pid)1491 get_stackshot_estsize(
1492 	uint32_t prev_size_hint,
1493 	uint32_t adj,
1494 	uint64_t trace_flags,
1495 	pid_t target_pid)
1496 {
1497 	vm_size_t thread_and_task_total;
1498 	uint64_t  size;
1499 	uint32_t  estimated_size;
1500 	bool      process_scoped = ((target_pid != -1) && ((trace_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0));
1501 
1502 	/*
1503 	 * We use the estimated task size (with a fudge factor) as the default
1504 	 * linked kcdata buffer size in an effort to reduce overhead (ideally, we want
1505 	 * each task to only need a single kcdata buffer.)
1506 	 */
1507 	uint32_t est_task_size = get_stackshot_est_tasksize(trace_flags);
1508 	uint32_t est_kcdata_size = FUDGED_SIZE(est_task_size, adj);
1509 	uint64_t est_preamble_size = calc_linked_kcdata_size(8192 * 4, est_kcdata_size);
1510 	uint64_t est_postamble_size = calc_linked_kcdata_size(8192 * 2, est_kcdata_size);
1511 	uint64_t est_extra_size = 0;
1512 
1513 	adj = MIN(adj, 100u);   /* no more than double our estimate */
1514 
1515 #if STACKSHOT_COLLECTS_LATENCY_INFO
1516 	est_extra_size += real_ncpus * sizeof(struct stackshot_latency_cpu);
1517 	est_extra_size += sizeof(struct stackshot_latency_collection_v2);
1518 #endif
1519 
1520 #if HAS_MTE
1521 	if (trace_flags & STACKSHOT_MTEINFO) {
1522 		est_extra_size += mte_tag_storage_count * sizeof(struct mte_info_cell);
1523 	}
1524 #endif
1525 	est_extra_size += real_ncpus * MAX_FRAMES * sizeof(uintptr_t); /* Stacktrace buffers */
1526 	est_extra_size += FUDGED_SIZE(tasks_count, 10) * sizeof(uintptr_t) * STACKSHOT_NUM_WORKQUEUES; /* Work queues */
1527 	est_extra_size += sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ * real_ncpus, STACKSHOT_PAGE_TABLES);
1528 
1529 	thread_and_task_total = calc_linked_kcdata_size(est_task_size, est_kcdata_size);
1530 	if (!process_scoped) {
1531 		thread_and_task_total *= tasks_count;
1532 	}
1533 	size = thread_and_task_total + est_preamble_size + est_postamble_size + est_extra_size; /* estimate */
1534 	size = FUDGED_SIZE(size, adj); /* add adj */
1535 	size = MAX(size, prev_size_hint); /* allow hint to increase */
1536 	size += stackshot_plh_est_size(); /* add space for the port label hash */
1537 	size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK)); /* avoid overflow */
1538 	estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
1539 
1540 	return estimated_size;
1541 }
1542 
1543 /**
1544  * Copies a linked list of kcdata structures into a final kcdata structure.
1545  * Only used from stackshot_finalize_kcdata.
1546  */
1547 __result_use_check
1548 static kern_return_t
stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata,linked_kcdata_descriptor_t linked_kcdata)1549 stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata, linked_kcdata_descriptor_t linked_kcdata)
1550 {
1551 	kern_return_t error = KERN_SUCCESS;
1552 
1553 	while (linked_kcdata) {
1554 		/* Walk linked kcdata list */
1555 		kcdata_descriptor_t cur_kcdata = &linked_kcdata->kcdata;
1556 		if ((cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin) == 0) {
1557 			linked_kcdata = linked_kcdata->next;
1558 			continue;
1559 		}
1560 
1561 		/* Every item in the linked kcdata should have a header tag of type KCDATA_BUFFER_BEGIN_STACKSHOT. */
1562 		assert(((struct kcdata_item*) cur_kcdata->kcd_addr_begin)->type == KCDATA_BUFFER_BEGIN_STACKSHOT);
1563 		assert((final_kcdata->kcd_addr_begin + final_kcdata->kcd_length) > final_kcdata->kcd_addr_end);
1564 		size_t header_size = sizeof(kcdata_item_t) + kcdata_calc_padding(sizeof(kcdata_item_t));
1565 		size_t size = cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin - header_size;
1566 		size_t free = (final_kcdata->kcd_length + final_kcdata->kcd_addr_begin) - final_kcdata->kcd_addr_end;
1567 		if (free < size) {
1568 			error = KERN_INSUFFICIENT_BUFFER_SIZE;
1569 			goto error_exit;
1570 		}
1571 
1572 		/* Just memcpy the data over (and compress if we need to.) */
1573 		kcdata_compression_window_open(final_kcdata);
1574 		error = kcdata_memcpy(final_kcdata, final_kcdata->kcd_addr_end, (void*) (cur_kcdata->kcd_addr_begin + header_size), size);
1575 		if (error != KERN_SUCCESS) {
1576 			goto error_exit;
1577 		}
1578 		final_kcdata->kcd_addr_end += size;
1579 		kcdata_compression_window_close(final_kcdata);
1580 
1581 		linked_kcdata = linked_kcdata->next;
1582 	}
1583 
1584 error_exit:
1585 	return error;
1586 }
1587 
1588 /**
1589  * Copies the duration, latency, and diagnostic info into a final kcdata buffer.
1590  * Only used by stackshot_finalize_kcdata and stackshot_finalize_singlethreaded_kcdata.
1591  */
1592 __result_use_check
1593 static kern_return_t
stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)1594 stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)
1595 {
1596 	kern_return_t error;
1597 	mach_vm_address_t out_addr;
1598 	bool use_fault_path = ((stackshot_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
1599 #if STACKSHOT_COLLECTS_LATENCY_INFO
1600 	size_t buffer_used = 0;
1601 	size_t buffer_overhead = 0;
1602 	struct stackshot_latency_buffer buffer_latency;
1603 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1604 
1605 	if (use_fault_path) {
1606 		struct stackshot_fault_stats stats = (struct stackshot_fault_stats) {
1607 			.sfs_pages_faulted_in = 0,
1608 			.sfs_time_spent_faulting = 0,
1609 			.sfs_system_max_fault_time = stackshot_max_fault_time,
1610 			.sfs_stopped_faulting = false
1611 		};
1612 		percpu_foreach_base(base) {
1613 			struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
1614 			if (!cpu_ctx->scc_did_work) {
1615 				continue;
1616 			}
1617 			stats.sfs_pages_faulted_in += cpu_ctx->scc_fault_stats.sfs_pages_faulted_in;
1618 			stats.sfs_time_spent_faulting += cpu_ctx->scc_fault_stats.sfs_time_spent_faulting;
1619 			stats.sfs_stopped_faulting = stats.sfs_stopped_faulting || cpu_ctx->scc_fault_stats.sfs_stopped_faulting;
1620 		}
1621 		kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
1622 		    sizeof(struct stackshot_fault_stats), &stats);
1623 	}
1624 
1625 #if STACKSHOT_COLLECTS_LATENCY_INFO
1626 	int num_working_cpus = 0;
1627 	if (collect_latency_info) {
1628 		/* Add per-CPU latency info */
1629 		percpu_foreach(cpu_ctx, stackshot_cpu_ctx_percpu) {
1630 			if (cpu_ctx->scc_did_work) {
1631 				num_working_cpus++;
1632 			}
1633 		}
1634 		kcdata_compression_window_open(kcdata);
1635 		kcd_exit_on_error(kcdata_get_memory_addr_for_array(
1636 			    kcdata, STACKSHOT_KCTYPE_LATENCY_INFO_CPU, sizeof(struct stackshot_latency_cpu), num_working_cpus, &out_addr));
1637 		percpu_foreach_base(base) {
1638 			if (PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu)->scc_did_work) {
1639 				kcdata_memcpy(kcdata, out_addr, PERCPU_GET_WITH_BASE(base, stackshot_cpu_latency_percpu),
1640 				    sizeof(struct stackshot_latency_cpu));
1641 				out_addr += sizeof(struct stackshot_latency_cpu);
1642 			}
1643 		}
1644 		kcd_exit_on_error(kcdata_compression_window_close(kcdata));
1645 
1646 		kcdata_compression_window_open(kcdata);
1647 		kcd_exit_on_error(kcdata_get_memory_addr_for_array(
1648 			    kcdata, STACKSHOT_KCTYPE_LATENCY_INFO_BUFFER, sizeof(struct stackshot_latency_buffer), stackshot_ctx.sc_num_buffers, &out_addr));
1649 
1650 		/* Add up buffer info */
1651 		for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++, out_addr += sizeof(buffer_latency)) {
1652 			struct stackshot_buffer *buf = &stackshot_ctx.sc_buffers[buf_idx];
1653 			if (buf->ssb_ptr == NULL) {
1654 				kcdata_bzero(kcdata, out_addr, sizeof(struct stackshot_latency_buffer));
1655 				continue;
1656 			}
1657 
1658 #if defined(__arm64__)
1659 			ml_topology_cluster_t *cluster = &ml_get_topology_info()->clusters[buf_idx];
1660 			buffer_latency.cluster_type = cluster->cluster_type;
1661 #else /* __arm64__ */
1662 			buffer_latency.cluster_type = CLUSTER_TYPE_SMP;
1663 #endif /* !__arm64__ */
1664 			buffer_latency.size = buf->ssb_size;
1665 			buffer_latency.used = os_atomic_load(&buf->ssb_used, relaxed);
1666 			buffer_latency.overhead = os_atomic_load(&buf->ssb_overhead, relaxed);
1667 			kcd_exit_on_error(kcdata_memcpy(
1668 				    kcdata, out_addr, &buffer_latency, sizeof(buffer_latency)));
1669 
1670 			buffer_used += buffer_latency.used;
1671 			buffer_overhead += buffer_latency.overhead;
1672 		}
1673 		kcd_exit_on_error(kcdata_compression_window_close(kcdata));
1674 
1675 		stackshot_ctx.sc_latency.buffer_size = stackshot_ctx.sc_args.buffer_size;
1676 		stackshot_ctx.sc_latency.buffer_overhead = buffer_overhead;
1677 		stackshot_ctx.sc_latency.buffer_used = buffer_used;
1678 		stackshot_ctx.sc_latency.buffer_count = stackshot_ctx.sc_num_buffers;
1679 
1680 		/* Add overall latency info */
1681 		kcd_exit_on_error(kcdata_push_data(
1682 			    kcdata, STACKSHOT_KCTYPE_LATENCY_INFO,
1683 			    sizeof(stackshot_ctx.sc_latency), &stackshot_ctx.sc_latency));
1684 	}
1685 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1686 
1687 	if ((stackshot_flags & STACKSHOT_DO_COMPRESS) == 0) {
1688 		assert(!stackshot_ctx.sc_panic_stackshot);
1689 		kcd_exit_on_error(kcdata_get_memory_addr(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
1690 		    sizeof(struct stackshot_duration_v2), &out_addr));
1691 		struct stackshot_duration_v2 *duration_p = (void *) out_addr;
1692 		memcpy(duration_p, &stackshot_ctx.sc_duration, sizeof(*duration_p));
1693 		stackshot_duration_outer = (unaligned_u64 *) &duration_p->stackshot_duration_outer;
1694 		kcd_exit_on_error(kcdata_add_uint64_with_description(kcdata, stackshot_tries, "stackshot_tries"));
1695 	} else {
1696 		kcd_exit_on_error(kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_ctx.sc_duration), &stackshot_ctx.sc_duration));
1697 		stackshot_duration_outer = NULL;
1698 	}
1699 
1700 error_exit:
1701 	return error;
1702 }
1703 
1704 /**
1705  * Allocates the final kcdata buffer for a mulitithreaded stackshot,
1706  * where all of the per-task kcdata (and exclave kcdata) will end up.
1707  */
1708 __result_use_check
1709 static kern_return_t
stackshot_alloc_final_kcdata(void)1710 stackshot_alloc_final_kcdata(void)
1711 {
1712 	vm_offset_t   final_kcdata_buffer = 0;
1713 	kern_return_t error = KERN_SUCCESS;
1714 	uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1715 	    : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
1716 	    : KCDATA_BUFFER_BEGIN_STACKSHOT;
1717 
1718 	if (stackshot_ctx.sc_is_singlethreaded) {
1719 		return KERN_SUCCESS;
1720 	}
1721 
1722 	if ((error = kmem_alloc(kernel_map, &final_kcdata_buffer, stackshot_args.buffer_size,
1723 	    KMA_ZERO | KMA_DATA_SHARED, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
1724 		os_log_error(OS_LOG_DEFAULT, "stackshot: final allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, stackshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
1725 		return KERN_RESOURCE_SHORTAGE;
1726 	}
1727 
1728 	stackshot_ctx.sc_finalized_kcdata = kcdata_memory_alloc_init(final_kcdata_buffer, hdr_tag,
1729 	    stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
1730 
1731 	if (stackshot_ctx.sc_finalized_kcdata == NULL) {
1732 		kmem_free(kernel_map, final_kcdata_buffer, stackshot_args.buffer_size);
1733 		return KERN_FAILURE;
1734 	}
1735 
1736 	return KERN_SUCCESS;
1737 }
1738 
1739 /**
1740  * Frees the final kcdata buffer.
1741  */
1742 static void
stackshot_free_final_kcdata(void)1743 stackshot_free_final_kcdata(void)
1744 {
1745 	if (stackshot_ctx.sc_is_singlethreaded || (stackshot_ctx.sc_finalized_kcdata == NULL)) {
1746 		return;
1747 	}
1748 
1749 	kmem_free(kernel_map, stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1750 	kcdata_memory_destroy(stackshot_ctx.sc_finalized_kcdata);
1751 	stackshot_ctx.sc_finalized_kcdata = NULL;
1752 }
1753 
1754 /**
1755  * Called once we exit the debugger trap to collate all of the separate linked
1756  * kcdata lists into one kcdata buffer. The calling thread will run this, and
1757  * it is guaranteed that nobody else is touching any stackshot state at this
1758  * point. In the case of a panic stackshot, this is never called since we only
1759  * use one thread.
1760  *
1761  * Called with interrupts enabled, stackshot subsys lock held.
1762  */
1763 __result_use_check
1764 static kern_return_t
stackshot_collect_kcdata(void)1765 stackshot_collect_kcdata(void)
1766 {
1767 	kern_return_t error = 0;
1768 	uint32_t      hdr_tag;
1769 
1770 	assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1771 	LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1772 
1773 	/* Allocate our final kcdata buffer. */
1774 	kcd_exit_on_error(stackshot_alloc_final_kcdata());
1775 	assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1776 
1777 	/* Setup compression if we need it. */
1778 	if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
1779 		hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1780 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
1781 		kcd_exit_on_error(kcdata_init_compress(stackshot_ctx.sc_finalized_kcdata, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
1782 	}
1783 
1784 	/* Copy over all of the pre task-iteration kcdata (to preserve order as if it were single-threaded) */
1785 	kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_pretask_kcdata));
1786 
1787 	/* Set each queue's cur_item to 0. */
1788 	for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1789 		os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_cur_item, 0, relaxed);
1790 	}
1791 
1792 	/*
1793 	 * Iterate over work queue(s) and copy the kcdata in.
1794 	 */
1795 	while (true) {
1796 		struct stackshot_workitem  *next_item = NULL;
1797 		struct stackshot_workqueue *next_queue = NULL;
1798 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1799 			struct stackshot_workqueue *queue = &stackshot_ctx.sc_workqueues[i];
1800 			size_t cur_item = os_atomic_load(&queue->sswq_cur_item, relaxed);
1801 
1802 			/* Check if we're done with this queue */
1803 			if (cur_item >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1804 				continue;
1805 			}
1806 
1807 			/* Check if this workitem should come next */
1808 			struct stackshot_workitem *item = &queue->sswq_items[cur_item];
1809 			if ((next_item == NULL) || (next_item->sswi_idx > item->sswi_idx)) {
1810 				next_item = item;
1811 				next_queue = queue;
1812 			}
1813 		}
1814 
1815 		/* Queues are empty. */
1816 		if (next_item == NULL) {
1817 			break;
1818 		}
1819 
1820 		assert(next_queue);
1821 		assert(next_item->sswi_data != NULL);
1822 
1823 		os_atomic_inc(&next_queue->sswq_cur_item, relaxed);
1824 		kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, next_item->sswi_data));
1825 	}
1826 
1827 	/* Write post-task kcdata */
1828 	kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_posttask_kcdata));
1829 error_exit:
1830 	if (error != KERN_SUCCESS) {
1831 		stackshot_free_final_kcdata();
1832 	}
1833 	return error;
1834 }
1835 
1836 
1837 /**
1838  * Called at the very end of stackshot data generation, to write final timing
1839  * data to the kcdata structure and close compression. Only called for
1840  * multi-threaded stackshots; see stackshot_finalize_singlethreaded_kcata for
1841  * single-threaded variant.
1842  *
1843  * Called with interrupts enabled, stackshot subsys lock held.
1844  */
1845 __result_use_check
1846 static kern_return_t
stackshot_finalize_kcdata(void)1847 stackshot_finalize_kcdata(void)
1848 {
1849 	kern_return_t error = 0;
1850 
1851 	assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1852 	LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1853 
1854 	assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1855 
1856 	/* Write stackshot timing info */
1857 	kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1858 
1859 	/* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1860 	kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1861 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1862 	kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1863 
1864 	stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1865 	stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1866 
1867 	if (os_atomic_load(&stackshot_ctx.sc_retval, relaxed) == KERN_SUCCESS) {
1868 		/* releases and zeros done */
1869 		kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1870 	}
1871 
1872 	memcpy(stackshot_args.buffer, (void*) stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1873 
1874 	/* Fix duration_outer offset */
1875 	if (stackshot_duration_outer != NULL) {
1876 		stackshot_duration_outer = (unaligned_u64*) ((mach_vm_address_t) stackshot_args.buffer + ((mach_vm_address_t) stackshot_duration_outer - stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin));
1877 	}
1878 
1879 error_exit:
1880 	stackshot_free_final_kcdata();
1881 	return error;
1882 }
1883 
1884 /**
1885  * Finalizes the kcdata for a singlethreaded stackshot.
1886  *
1887  * May be called from interrupt/panic context.
1888  */
1889 __result_use_check
1890 static kern_return_t
stackshot_finalize_singlethreaded_kcdata(void)1891 stackshot_finalize_singlethreaded_kcdata(void)
1892 {
1893 	kern_return_t error;
1894 
1895 	assert(stackshot_ctx.sc_is_singlethreaded);
1896 
1897 	kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1898 	/* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1899 	kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1900 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1901 	kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1902 
1903 	stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1904 	stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1905 
1906 	kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1907 
1908 	if (stackshot_ctx.sc_panic_stackshot) {
1909 		*stackshot_args.descriptor = *stackshot_ctx.sc_finalized_kcdata;
1910 	}
1911 
1912 error_exit:
1913 	return error;
1914 }
1915 
1916 /*
1917  * stackshot_remap_buffer:	Utility function to remap bytes_traced bytes starting at stackshotbuf
1918  *				into the current task's user space and subsequently copy out the address
1919  *				at which the buffer has been mapped in user space to out_buffer_addr.
1920  *
1921  * Inputs:			stackshotbuf - pointer to the original buffer in the kernel's address space
1922  *				bytes_traced - length of the buffer to remap starting from stackshotbuf
1923  *				out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
1924  *				out_size_addr - pointer to be filled in with the size of the buffer
1925  *
1926  * Outputs:			ENOSPC if there is not enough free space in the task's address space to remap the buffer
1927  *				EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
1928  *				an error from copyout
1929  */
1930 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)1931 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
1932 {
1933 	int                     error = 0;
1934 	mach_vm_offset_t        stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
1935 	vm_prot_t               cur_prot = VM_PROT_NONE, max_prot = VM_PROT_NONE;
1936 
1937 	error = mach_vm_remap(current_map(), &stackshotbuf_user_addr, bytes_traced, 0,
1938 	    VM_FLAGS_ANYWHERE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE,
1939 	    &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
1940 	/*
1941 	 * If the call to mach_vm_remap fails, we return the appropriate converted error
1942 	 */
1943 	if (error == KERN_SUCCESS) {
1944 		/* If the user addr somehow didn't get set, we should make sure that we fail, and (eventually)
1945 		 * panic on development kernels to find out why
1946 		 */
1947 		if (stackshotbuf_user_addr == (mach_vm_offset_t)NULL) {
1948 #if DEVELOPMENT || DEBUG
1949 			os_log_error(OS_LOG_DEFAULT, "stackshot: mach_vm_remap succeeded with NULL\n");
1950 #endif // DEVELOPMENT || DEBUG
1951 			return KERN_FAILURE;
1952 		}
1953 
1954 		/*
1955 		 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
1956 		 * we just made in the task's user space.
1957 		 */
1958 		error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
1959 		if (error != KERN_SUCCESS) {
1960 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1961 			return error;
1962 		}
1963 		error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
1964 		if (error != KERN_SUCCESS) {
1965 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1966 			return error;
1967 		}
1968 	}
1969 	return error;
1970 }
1971 
1972 #if CONFIG_EXCLAVES
1973 
1974 /*
1975  * Allocates an array for exclaves inspection from the stackshot buffer. This
1976  * state must be cleaned up by calling `stackshot_cleanup_exclave_waitlist`
1977  * after the stackshot is finished.
1978  */
1979 static kern_return_t
stackshot_setup_exclave_waitlist(void)1980 stackshot_setup_exclave_waitlist(void)
1981 {
1982 	kern_return_t error = KERN_SUCCESS;
1983 	size_t exclave_threads_max = exclaves_ipc_buffer_count();
1984 	size_t waitlist_size = 0;
1985 
1986 	assert(!stackshot_exclave_inspect_ctids);
1987 
1988 	if (exclaves_inspection_is_initialized() && exclave_threads_max) {
1989 		if (os_mul_overflow(exclave_threads_max, sizeof(ctid_t), &waitlist_size)) {
1990 			error = KERN_INVALID_ARGUMENT;
1991 			goto error;
1992 		}
1993 		stackshot_exclave_inspect_ctids = stackshot_alloc_with_size(waitlist_size, &error);
1994 		if (!stackshot_exclave_inspect_ctids) {
1995 			goto error;
1996 		}
1997 		stackshot_exclave_inspect_ctid_count = 0;
1998 		stackshot_exclave_inspect_ctid_capacity = exclave_threads_max;
1999 	}
2000 
2001 error:
2002 	return error;
2003 }
2004 
2005 static void
stackshot_cleanup_exclave_waitlist(void)2006 stackshot_cleanup_exclave_waitlist(void)
2007 {
2008 	stackshot_exclave_inspect_ctids = NULL;
2009 	stackshot_exclave_inspect_ctid_capacity = 0;
2010 	stackshot_exclave_inspect_ctid_count = 0;
2011 }
2012 
2013 static kern_return_t
collect_exclave_threads(uint64_t ss_flags)2014 collect_exclave_threads(uint64_t ss_flags)
2015 {
2016 	size_t i;
2017 	ctid_t ctid;
2018 	thread_t thread;
2019 	kern_return_t kr = KERN_SUCCESS;
2020 	STACKSHOT_SUBSYS_ASSERT_LOCKED();
2021 
2022 	lck_mtx_lock(&exclaves_collect_mtx);
2023 
2024 	if (stackshot_exclave_inspect_ctid_count == 0) {
2025 		/* Nothing to do */
2026 		goto out;
2027 	}
2028 
2029 	// When asking for ASIDs, make sure we get all exclaves asids and mappings as well
2030 	exclaves_stackshot_raw_addresses = (ss_flags & STACKSHOT_ASID);
2031 	exclaves_stackshot_all_address_spaces = (ss_flags & (STACKSHOT_ASID | STACKSHOT_EXCLAVES));
2032 
2033 	/* This error is intentionally ignored: we are now committed to collecting
2034 	 * these threads, or at least properly waking them. If this fails, the first
2035 	 * collected thread should also fail to append to the kcdata, and will abort
2036 	 * further collection, properly clearing the AST and waking these threads.
2037 	 */
2038 	kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_BEGIN,
2039 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
2040 
2041 	for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
2042 		ctid = stackshot_exclave_inspect_ctids[i];
2043 		thread = ctid_get_thread(ctid);
2044 		assert(thread);
2045 		exclaves_inspection_queue_add(&exclaves_inspection_queue_stackshot, &thread->th_exclaves_inspection_queue_stackshot);
2046 	}
2047 	exclaves_inspection_begin_collecting();
2048 	exclaves_inspection_wait_complete(&exclaves_inspection_queue_stackshot);
2049 	kr = stackshot_exclave_kr; /* Read the result of work done on our behalf, by collection thread */
2050 	if (kr != KERN_SUCCESS) {
2051 		goto out;
2052 	}
2053 
2054 	kr = kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_END,
2055 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
2056 	if (kr != KERN_SUCCESS) {
2057 		goto out;
2058 	}
2059 out:
2060 	lck_mtx_unlock(&exclaves_collect_mtx);
2061 	return kr;
2062 }
2063 
2064 static kern_return_t
stackshot_exclaves_process_stacktrace(const address_v__opt_s * _Nonnull st,void * kcdata_ptr)2065 stackshot_exclaves_process_stacktrace(const address_v__opt_s *_Nonnull st, void *kcdata_ptr)
2066 {
2067 	kern_return_t error = KERN_SUCCESS;
2068 	exclave_ecstackentry_addr_t * addr = NULL;
2069 	__block size_t count = 0;
2070 
2071 	if (!st->has_value) {
2072 		goto error_exit;
2073 	}
2074 
2075 	address__v_visit(&st->value, ^(size_t __unused i, const stackshottypes_address_s __unused item) {
2076 		count++;
2077 	});
2078 
2079 	kcdata_compression_window_open(kcdata_ptr);
2080 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_ECSTACK,
2081 	    sizeof(exclave_ecstackentry_addr_t), count, (mach_vm_address_t*)&addr));
2082 
2083 	address__v_visit(&st->value, ^(size_t i, const stackshottypes_address_s item) {
2084 		addr[i] = (exclave_ecstackentry_addr_t)item;
2085 	});
2086 
2087 	kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2088 
2089 error_exit:
2090 	return error;
2091 }
2092 
2093 static kern_return_t
stackshot_exclaves_process_ipcstackentry(uint64_t index,const stackshottypes_ipcstackentry_s * _Nonnull ise,void * kcdata_ptr)2094 stackshot_exclaves_process_ipcstackentry(uint64_t index, const stackshottypes_ipcstackentry_s *_Nonnull ise, void *kcdata_ptr)
2095 {
2096 	kern_return_t error = KERN_SUCCESS;
2097 
2098 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2099 	    STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2100 
2101 	struct exclave_ipcstackentry_info info = { 0 };
2102 	info.eise_asid = ise->asid;
2103 
2104 	info.eise_tnid = ise->tnid;
2105 
2106 	if (ise->invocationid.has_value) {
2107 		info.eise_flags |= kExclaveIpcStackEntryHaveInvocationID;
2108 		info.eise_invocationid = ise->invocationid.value;
2109 	} else {
2110 		info.eise_invocationid = 0;
2111 	}
2112 
2113 	info.eise_flags |= (ise->stacktrace.has_value ? kExclaveIpcStackEntryHaveStack : 0);
2114 
2115 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_INFO, sizeof(struct exclave_ipcstackentry_info), &info));
2116 
2117 	if (ise->stacktrace.has_value) {
2118 		kcd_exit_on_error(stackshot_exclaves_process_stacktrace(&ise->stacktrace, kcdata_ptr));
2119 	}
2120 
2121 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2122 	    STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2123 
2124 error_exit:
2125 	return error;
2126 }
2127 
2128 static kern_return_t
stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s * _Nonnull ipcstack,void * kcdata_ptr)2129 stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s *_Nonnull ipcstack, void *kcdata_ptr)
2130 {
2131 	__block kern_return_t kr = KERN_SUCCESS;
2132 
2133 	if (!ipcstack->has_value) {
2134 		goto error_exit;
2135 	}
2136 
2137 	stackshottypes_ipcstackentry__v_visit(&ipcstack->value, ^(size_t i, const stackshottypes_ipcstackentry_s *_Nonnull item) {
2138 		if (kr == KERN_SUCCESS) {
2139 		        kr = stackshot_exclaves_process_ipcstackentry(i, item, kcdata_ptr);
2140 		}
2141 	});
2142 
2143 error_exit:
2144 	return kr;
2145 }
2146 
2147 static kern_return_t
stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s * _Nonnull se,void * kcdata_ptr)2148 stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s *_Nonnull se, void *kcdata_ptr)
2149 {
2150 	kern_return_t error = KERN_SUCCESS;
2151 
2152 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2153 	    STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2154 
2155 	struct exclave_scresult_info info = { 0 };
2156 	info.esc_id = se->scid;
2157 	info.esc_flags = se->ipcstack.has_value ? kExclaveScresultHaveIPCStack : 0;
2158 
2159 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_SCRESULT_INFO, sizeof(struct exclave_scresult_info), &info));
2160 
2161 	if (se->ipcstack.has_value) {
2162 		kcd_exit_on_error(stackshot_exclaves_process_ipcstack(&se->ipcstack, kcdata_ptr));
2163 	}
2164 
2165 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2166 	    STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2167 
2168 error_exit:
2169 	return error;
2170 }
2171 
2172 static kern_return_t
stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2173 stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2174 {
2175 	kern_return_t error = KERN_SUCCESS;
2176 	__block struct exclave_textlayout_segment_v2 * info = NULL;
2177 
2178 	__block size_t count = 0;
2179 	stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s __unused *_Nonnull item) {
2180 		count++;
2181 	});
2182 
2183 	if (!count) {
2184 		goto error_exit;
2185 	}
2186 
2187 	kcdata_compression_window_open(kcdata_ptr);
2188 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_SEGMENTS,
2189 	    sizeof(struct exclave_textlayout_segment_v2), count, (mach_vm_address_t*)&info));
2190 
2191 	stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s *_Nonnull item) {
2192 		memcpy(&info->layoutSegment_uuid, item->uuid, sizeof(uuid_t));
2193 		info->layoutSegment_loadAddress = item->loadaddress;
2194 		if (want_raw_addresses) {
2195 		        info->layoutSegment_rawLoadAddress = item->rawloadaddress.has_value ? item->rawloadaddress.value: 0;
2196 		} else {
2197 		        info->layoutSegment_rawLoadAddress = 0;
2198 		}
2199 		info++;
2200 	});
2201 
2202 	kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2203 
2204 error_exit:
2205 	return error;
2206 }
2207 
2208 static kern_return_t
stackshot_exclaves_process_textlayout(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2209 stackshot_exclaves_process_textlayout(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2210 {
2211 	kern_return_t error = KERN_SUCCESS;
2212 	__block struct exclave_textlayout_info info = { 0 };
2213 
2214 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2215 	    STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, tl->textlayoutid));
2216 
2217 	// tightbeam optional interfaced don't have enough const.
2218 	u32__opt_s sharedcacheindex_opt = tl->sharedcacheindex;
2219 	const uint32_t *sharedcache_index = u32__opt_get(&sharedcacheindex_opt);
2220 
2221 	info.layout_id = tl->textlayoutid;
2222 
2223 	info.etl_flags =
2224 	    (want_raw_addresses ? 0 : kExclaveTextLayoutLoadAddressesUnslid) |
2225 	    (sharedcache_index == NULL ? 0 : kExclaveTextLayoutHasSharedCache);
2226 	info.sharedcache_index = (sharedcache_index == NULL) ? UINT32_MAX : *sharedcache_index;
2227 
2228 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_INFO, sizeof(struct exclave_textlayout_info), &info));
2229 	kcd_exit_on_error(stackshot_exclaves_process_textlayout_segments(tl, kcdata_ptr, want_raw_addresses));
2230 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2231 	    STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, tl->textlayoutid));
2232 error_exit:
2233 	return error;
2234 }
2235 
2236 static kern_return_t
stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s * _Nonnull as,void * kcdata_ptr,bool want_raw_addresses)2237 stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s *_Nonnull as, void *kcdata_ptr, bool want_raw_addresses)
2238 {
2239 	kern_return_t error = KERN_SUCCESS;
2240 	struct exclave_addressspace_info info = { 0 };
2241 	__block size_t name_len = 0;
2242 	uint8_t * name = NULL;
2243 
2244 	u8__v_visit(&as->name, ^(size_t __unused i, const uint8_t __unused item) {
2245 		name_len++;
2246 	});
2247 
2248 	info.eas_id = as->asid;
2249 
2250 	if (want_raw_addresses && as->rawaddressslide.has_value) {
2251 		info.eas_flags = kExclaveAddressSpaceHaveSlide;
2252 		info.eas_slide = as->rawaddressslide.value;
2253 	} else {
2254 		info.eas_flags = 0;
2255 		info.eas_slide = UINT64_MAX;
2256 	}
2257 
2258 	info.eas_layoutid = as->textlayoutid; // text layout for this address space
2259 	info.eas_asroot = as->asroot.has_value ? as->asroot.value : 0;
2260 
2261 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2262 	    STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2263 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_INFO, sizeof(struct exclave_addressspace_info), &info));
2264 
2265 	if (name_len > 0) {
2266 		kcdata_compression_window_open(kcdata_ptr);
2267 		kcd_exit_on_error(kcdata_get_memory_addr(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_NAME, name_len + 1, (mach_vm_address_t*)&name));
2268 
2269 		u8__v_visit(&as->name, ^(size_t i, const uint8_t item) {
2270 			name[i] = item;
2271 		});
2272 		name[name_len] = 0;
2273 
2274 		kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2275 	}
2276 
2277 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2278 	    STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2279 error_exit:
2280 	return error;
2281 }
2282 
2283 kern_return_t
2284 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses);
2285 
2286 kern_return_t
stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s * result,void * kcdata_ptr,bool want_raw_addresses)2287 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses)
2288 {
2289 	__block kern_return_t kr = KERN_SUCCESS;
2290 
2291 	stackshot_stackshotentry__v_visit(&result->stackshotentries, ^(size_t __unused i, const stackshot_stackshotentry_s *_Nonnull item) {
2292 		if (kr == KERN_SUCCESS) {
2293 		        kr = stackshot_exclaves_process_stackshotentry(item, kcdata_ptr);
2294 		}
2295 	});
2296 
2297 	stackshottypes_addressspace__v_visit(&result->addressspaces, ^(size_t __unused i, const stackshottypes_addressspace_s *_Nonnull item) {
2298 		if (kr == KERN_SUCCESS) {
2299 		        kr = stackshot_exclaves_process_addressspace(item, kcdata_ptr, want_raw_addresses);
2300 		}
2301 	});
2302 
2303 	stackshottypes_textlayout__v_visit(&result->textlayouts, ^(size_t __unused i, const stackshottypes_textlayout_s *_Nonnull item) {
2304 		if (kr == KERN_SUCCESS) {
2305 		        kr = stackshot_exclaves_process_textlayout(item, kcdata_ptr, want_raw_addresses);
2306 		}
2307 	});
2308 
2309 	return kr;
2310 }
2311 
2312 kern_return_t
2313 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses);
2314 
2315 kern_return_t
stackshot_exclaves_process_result(kern_return_t collect_kr,const stackshot_stackshotresult_s * result,bool want_raw_addresses)2316 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses)
2317 {
2318 	kern_return_t kr = KERN_SUCCESS;
2319 	if (result == NULL) {
2320 		return collect_kr;
2321 	}
2322 
2323 	kr = stackshot_exclaves_process_stackshot(result, stackshot_ctx.sc_finalized_kcdata, want_raw_addresses);
2324 
2325 	stackshot_exclave_kr = kr;
2326 
2327 	return kr;
2328 }
2329 
2330 
2331 static void
commit_exclaves_ast(void)2332 commit_exclaves_ast(void)
2333 {
2334 	size_t i = 0;
2335 	thread_t thread = NULL;
2336 	size_t count;
2337 
2338 	assert(debug_mode_active());
2339 
2340 	count = os_atomic_load(&stackshot_exclave_inspect_ctid_count, acquire);
2341 
2342 	if (stackshot_exclave_inspect_ctids) {
2343 		for (i = 0; i < count; ++i) {
2344 			thread = ctid_get_thread(stackshot_exclave_inspect_ctids[i]);
2345 			assert(thread);
2346 			thread_reference(thread);
2347 			os_atomic_or(&thread->th_exclaves_inspection_state, TH_EXCLAVES_INSPECTION_STACKSHOT, relaxed);
2348 		}
2349 	}
2350 }
2351 
2352 #endif /* CONFIG_EXCLAVES */
2353 
2354 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)2355 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
2356 {
2357 	int error = 0;
2358 	boolean_t prev_interrupt_state;
2359 	bool did_copyout = false;
2360 	uint32_t bytes_traced = 0;
2361 	uint32_t stackshot_estimate = 0;
2362 	struct kdp_snapshot_args snapshot_args;
2363 
2364 	void * buf_to_free = NULL;
2365 	int size_to_free = 0;
2366 	bool is_traced = false;    /* has FUNC_START tracepoint fired? */
2367 	uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
2368 
2369 	/* Parsed arguments */
2370 	uint64_t                out_buffer_addr;
2371 	uint64_t                out_size_addr;
2372 	uint32_t                size_hint = 0;
2373 
2374 	snapshot_args.pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
2375 
2376 	if (stackshot_config == NULL) {
2377 		return KERN_INVALID_ARGUMENT;
2378 	}
2379 #if DEVELOPMENT || DEBUG
2380 	/* TBD: ask stackshot clients to avoid issuing stackshots in this
2381 	 * configuration in lieu of the kernel feature override.
2382 	 */
2383 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
2384 		return KERN_NOT_SUPPORTED;
2385 	}
2386 #endif
2387 
2388 	switch (stackshot_config_version) {
2389 	case STACKSHOT_CONFIG_TYPE:
2390 		if (stackshot_config_size != sizeof(stackshot_config_t)) {
2391 			return KERN_INVALID_ARGUMENT;
2392 		}
2393 		stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
2394 		out_buffer_addr = config->sc_out_buffer_addr;
2395 		out_size_addr = config->sc_out_size_addr;
2396 		snapshot_args.pid = config->sc_pid;
2397 		snapshot_args.flags = config->sc_flags;
2398 		snapshot_args.since_timestamp = config->sc_delta_timestamp;
2399 		if (config->sc_size <= max_tracebuf_size) {
2400 			size_hint = config->sc_size;
2401 		}
2402 		/*
2403 		 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
2404 		 * dump every level if the pagetable_mask is not set
2405 		 */
2406 		if (snapshot_args.flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
2407 			snapshot_args.pagetable_mask = config->sc_pagetable_mask;
2408 		}
2409 		break;
2410 	default:
2411 		return KERN_NOT_SUPPORTED;
2412 	}
2413 
2414 	/*
2415 	 * Currently saving a kernel buffer and trylock are only supported from the
2416 	 * internal/KEXT API.
2417 	 */
2418 	if (stackshot_from_user) {
2419 		if (snapshot_args.flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
2420 			return KERN_NO_ACCESS;
2421 		}
2422 #if !DEVELOPMENT && !DEBUG
2423 		if (snapshot_args.flags & (STACKSHOT_DO_COMPRESS)) {
2424 			return KERN_NO_ACCESS;
2425 		}
2426 #endif
2427 	} else {
2428 		if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2429 			return KERN_NOT_SUPPORTED;
2430 		}
2431 	}
2432 
2433 	if (!((snapshot_args.flags & STACKSHOT_KCDATA_FORMAT) || (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
2434 		return KERN_NOT_SUPPORTED;
2435 	}
2436 
2437 	/* Compresssed delta stackshots or page dumps are not yet supported */
2438 	if (((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (snapshot_args.flags & STACKSHOT_PAGE_TABLES))
2439 	    && (snapshot_args.flags & STACKSHOT_DO_COMPRESS)) {
2440 		return KERN_NOT_SUPPORTED;
2441 	}
2442 
2443 	/*
2444 	 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
2445 	 */
2446 	if ((!out_buffer_addr || !out_size_addr) && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2447 		return KERN_INVALID_ARGUMENT;
2448 	}
2449 
2450 	if (snapshot_args.since_timestamp != 0 && ((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
2451 		return KERN_INVALID_ARGUMENT;
2452 	}
2453 
2454 	/* EXCLAVES and SKIP_EXCLAVES conflict */
2455 	if ((snapshot_args.flags & (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) == (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) {
2456 		return KERN_INVALID_ARGUMENT;
2457 	}
2458 
2459 #if CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS
2460 	if (!mt_core_supported) {
2461 		snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2462 	}
2463 #else /* CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS */
2464 	snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2465 #endif /* !CONFIG_PERVASIVE_CPI || !CONFIG_CPU_COUNTERS */
2466 
2467 	STACKSHOT_TESTPOINT(TP_WAIT_START_STACKSHOT);
2468 	STACKSHOT_SUBSYS_LOCK();
2469 
2470 	stackshot_tries = 0;
2471 
2472 	if (snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
2473 		/*
2474 		 * Don't overwrite an existing stackshot
2475 		 */
2476 		if (kernel_stackshot_buf != NULL) {
2477 			error = KERN_MEMORY_PRESENT;
2478 			goto error_early_exit;
2479 		}
2480 	} else if (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
2481 		if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
2482 			error = KERN_NOT_IN_SET;
2483 			goto error_early_exit;
2484 		}
2485 		error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
2486 		    out_buffer_addr, out_size_addr);
2487 		/*
2488 		 * If we successfully remapped the buffer into the user's address space, we
2489 		 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
2490 		 * and then clear the kernel stackshot pointer and associated size.
2491 		 */
2492 		if (error == KERN_SUCCESS) {
2493 			did_copyout = true;
2494 			buf_to_free = kernel_stackshot_buf;
2495 			size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
2496 			kernel_stackshot_buf = NULL;
2497 			kernel_stackshot_buf_size = 0;
2498 		}
2499 
2500 		goto error_early_exit;
2501 	}
2502 
2503 	if (snapshot_args.flags & STACKSHOT_GET_BOOT_PROFILE) {
2504 		void *bootprofile = NULL;
2505 		uint32_t len = 0;
2506 #if CONFIG_TELEMETRY
2507 		bootprofile_get(&bootprofile, &len);
2508 #endif
2509 		if (!bootprofile || !len) {
2510 			error = KERN_NOT_IN_SET;
2511 			goto error_early_exit;
2512 		}
2513 		error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
2514 		if (error == KERN_SUCCESS) {
2515 			did_copyout = true;
2516 		}
2517 		goto error_early_exit;
2518 	}
2519 
2520 	stackshot_duration_prior_abs = 0;
2521 	stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
2522 	snapshot_args.buffer_size = stackshot_estimate =
2523 	    get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj, snapshot_args.flags, snapshot_args.pid);
2524 	stackshot_initial_estimate = stackshot_estimate;
2525 
2526 	// ensure at least one attempt, even if the initial size from estimate was too big
2527 	snapshot_args.buffer_size = MIN(snapshot_args.buffer_size, max_tracebuf_size);
2528 
2529 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
2530 	    snapshot_args.flags, snapshot_args.buffer_size, snapshot_args.pid, snapshot_args.since_timestamp);
2531 	is_traced = true;
2532 
2533 #if CONFIG_EXCLAVES
2534 	assert(!stackshot_exclave_inspect_ctids);
2535 #endif
2536 
2537 	for (; snapshot_args.buffer_size <= max_tracebuf_size; snapshot_args.buffer_size = MIN(snapshot_args.buffer_size << 1, max_tracebuf_size)) {
2538 		stackshot_tries++;
2539 		if ((error = kmem_alloc(kernel_map, (vm_offset_t *)&snapshot_args.buffer, snapshot_args.buffer_size,
2540 		    KMA_ZERO | KMA_DATA_SHARED, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
2541 			os_log_error(OS_LOG_DEFAULT, "stackshot: initial allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, snapshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
2542 			error = KERN_RESOURCE_SHORTAGE;
2543 			goto error_exit;
2544 		}
2545 
2546 		uint32_t hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2547 		    : (snapshot_args.flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2548 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2549 		#pragma unused(hdr_tag)
2550 
2551 		stackshot_duration_outer = NULL;
2552 
2553 		/* if compression was requested, allocate the extra zlib scratch area */
2554 		if (snapshot_args.flags & STACKSHOT_DO_COMPRESS) {
2555 			hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2556 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2557 			if (error != KERN_SUCCESS) {
2558 				os_log_error(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
2559 				    (int) error);
2560 				goto error_exit;
2561 			}
2562 		}
2563 
2564 		/* Prepare the compressor for a stackshot */
2565 		error = vm_compressor_kdp_init();
2566 		if (error != KERN_SUCCESS) {
2567 			goto error_exit;
2568 		}
2569 
2570 		/*
2571 		 * Disable interrupts and save the current interrupt state.
2572 		 */
2573 		prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
2574 		uint64_t time_start  = mach_absolute_time();
2575 
2576 		/* Emit a SOCD tracepoint that we are initiating a stackshot */
2577 		SOCD_TRACE_XNU_START(STACKSHOT);
2578 
2579 		/*
2580 		 * Load stackshot parameters.
2581 		 */
2582 		error = kdp_snapshot_preflight_internal(snapshot_args);
2583 
2584 		if (error == KERN_SUCCESS) {
2585 			error = stackshot_trap();
2586 		}
2587 
2588 		/* Emit a SOCD tracepoint that we have completed the stackshot */
2589 		SOCD_TRACE_XNU_END(STACKSHOT);
2590 		ml_set_interrupts_enabled(prev_interrupt_state);
2591 
2592 #if CONFIG_EXCLAVES
2593 		/* stackshot trap should only finish successfully or with no pending Exclave threads */
2594 		assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
2595 #endif
2596 
2597 		/*
2598 		 * Stackshot is no longer active.
2599 		 * (We have to do this here for the special interrupt disable timeout case to work)
2600 		 */
2601 		os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
2602 
2603 		/* Release compressor kdp buffers */
2604 		vm_compressor_kdp_teardown();
2605 
2606 		/* Record duration that interrupts were disabled */
2607 		uint64_t time_end = mach_absolute_time();
2608 		tot_interrupts_off_abs += (time_end - time_start);
2609 
2610 		/* Collect multithreaded kcdata into one finalized buffer */
2611 		if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
2612 			error = stackshot_collect_kcdata();
2613 		}
2614 
2615 #if CONFIG_EXCLAVES
2616 		if (stackshot_exclave_inspect_ctids) {
2617 			if (error == KERN_SUCCESS) {
2618 				if (stackshot_exclave_inspect_ctid_count > 0) {
2619 					STACKSHOT_TESTPOINT(TP_START_COLLECTION);
2620 				}
2621 				error = collect_exclave_threads(snapshot_args.flags);
2622 			}
2623 			stackshot_cleanup_exclave_waitlist();
2624 		}
2625 #endif /* CONFIG_EXCLAVES */
2626 
2627 		if (error == KERN_SUCCESS) {
2628 			if (stackshot_ctx.sc_is_singlethreaded) {
2629 				error = stackshot_finalize_singlethreaded_kcdata();
2630 			} else {
2631 				error = stackshot_finalize_kcdata();
2632 			}
2633 
2634 			if ((error != KERN_SUCCESS) && (error != KERN_INSUFFICIENT_BUFFER_SIZE)) {
2635 				goto error_exit;
2636 			}
2637 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE && snapshot_args.buffer_size == max_tracebuf_size) {
2638 				os_log_error(OS_LOG_DEFAULT, "stackshot: final buffer size was insufficient at maximum size: "
2639 				    "try %llu, estimate %u, flags %llu, pid %d, "
2640 				    "tasks: %d, terminated_tasks %d, threads: %d, terminated_threads: %d\n",
2641 				    stackshot_tries, snapshot_args.buffer_size, snapshot_args.flags, snapshot_args.pid,
2642 				    tasks_count, terminated_tasks_count,
2643 				    threads_count, terminated_threads_count);
2644 				error = KERN_RESOURCE_SHORTAGE;
2645 				goto error_exit;
2646 			}
2647 		}
2648 
2649 		/* record the duration that interupts were disabled + kcdata was being finalized */
2650 		if (stackshot_duration_outer) {
2651 			*stackshot_duration_outer = mach_absolute_time() - time_start;
2652 		}
2653 
2654 		if (error != KERN_SUCCESS) {
2655 			os_log_error(OS_LOG_DEFAULT, "stackshot: debugger call failed: %d, try %llu, buffer %u estimate %u\n", (int)error, stackshot_tries, snapshot_args.buffer_size, stackshot_estimate);
2656 			kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2657 			snapshot_args.buffer = NULL;
2658 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
2659 				/*
2660 				 * If we didn't allocate a big enough buffer, deallocate and try again.
2661 				 */
2662 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
2663 				    time_end - time_start, stackshot_estimate, snapshot_args.buffer_size);
2664 				stackshot_duration_prior_abs += (time_end - time_start);
2665 				if (snapshot_args.buffer_size == max_tracebuf_size) {
2666 					os_log_error(OS_LOG_DEFAULT, "stackshot: initial buffer size was insufficient at maximum size: "
2667 					    "try %llu, estimate %u, flags %llu, pid %d, "
2668 					    "tasks: %d, terminated_tasks %d, threads: %d, terminated_threads: %d\n",
2669 					    stackshot_tries, snapshot_args.buffer_size, snapshot_args.flags, snapshot_args.pid,
2670 					    tasks_count, terminated_tasks_count,
2671 					    threads_count, terminated_threads_count);
2672 					error = KERN_RESOURCE_SHORTAGE;
2673 					goto error_exit;
2674 				}
2675 				continue;
2676 			} else {
2677 				goto error_exit;
2678 			}
2679 		}
2680 
2681 		bytes_traced = kdp_stack_snapshot_bytes_traced();
2682 		if (bytes_traced <= 0) {
2683 			error = KERN_ABORTED;
2684 			goto error_exit;
2685 		}
2686 
2687 		if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2688 			error = stackshot_remap_buffer(snapshot_args.buffer, bytes_traced, out_buffer_addr, out_size_addr);
2689 			if (error == KERN_SUCCESS) {
2690 				did_copyout = true;
2691 			}
2692 			goto error_exit;
2693 		}
2694 
2695 		if (!(snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT)) {
2696 			os_log_info(OS_LOG_DEFAULT, "stackshot: succeeded, traced %u bytes to %u buffer (estimate %u) try %llu\n", bytes_traced, snapshot_args.buffer_size, stackshot_estimate, stackshot_tries);
2697 		}
2698 
2699 		/*
2700 		 * Save the stackshot in the kernel buffer.
2701 		 */
2702 		kernel_stackshot_buf = snapshot_args.buffer;
2703 		kernel_stackshot_buf_size =  bytes_traced;
2704 		/*
2705 		 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
2706 		 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
2707 		 * update size_to_free for kmem_free accordingly.
2708 		 */
2709 		size_to_free = snapshot_args.buffer_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
2710 
2711 		assert(size_to_free >= 0);
2712 
2713 		if (size_to_free != 0) {
2714 			buf_to_free = (void *)((uint64_t)snapshot_args.buffer + snapshot_args.buffer_size - size_to_free);
2715 		}
2716 
2717 		snapshot_args.buffer = NULL;
2718 		snapshot_args.buffer_size = 0;
2719 		goto error_exit;
2720 	}
2721 
2722 error_exit:
2723 	if (is_traced) {
2724 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
2725 		    error, tot_interrupts_off_abs, snapshot_args.buffer_size, bytes_traced);
2726 	}
2727 
2728 error_early_exit:
2729 	if (snapshot_args.buffer != NULL) {
2730 		kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2731 	}
2732 	if (buf_to_free != NULL) {
2733 		kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
2734 	}
2735 
2736 	if (error == KERN_SUCCESS && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) && !did_copyout) {
2737 		/* If we return success, we must have done the copyout to userspace. If
2738 		 * we somehow did not, we need to indicate failure instead.
2739 		 */
2740 #if DEVELOPMENT || DEBUG
2741 		os_log_error(OS_LOG_DEFAULT, "stackshot: reached end without doing copyout\n");
2742 #endif // DEVELOPMENT || DEBUG
2743 		error = KERN_FAILURE;
2744 	}
2745 
2746 	STACKSHOT_SUBSYS_UNLOCK();
2747 	STACKSHOT_TESTPOINT(TP_STACKSHOT_DONE);
2748 
2749 	return error;
2750 }
2751 
2752 /*
2753  * Set up state and parameters for a stackshot.
2754  * (This runs on the calling CPU before other CPUs enter the debugger trap.)
2755  * Called when interrupts are disabled, but we're not in the debugger trap yet.
2756  */
2757 __result_use_check
2758 static kern_return_t
kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)2759 kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)
2760 {
2761 	kern_return_t error = KERN_SUCCESS;
2762 	uint64_t microsecs = 0, secs = 0;
2763 	bool is_panic = ((args.flags & STACKSHOT_FROM_PANIC) != 0);
2764 	bool process_scoped = (args.pid != -1) &&
2765 	    ((args.flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
2766 	bool is_singlethreaded = stackshot_single_thread || (process_scoped || is_panic || ((args.flags & STACKSHOT_PAGE_TABLES) != 0));
2767 	clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)&microsecs);
2768 
2769 	cur_stackshot_ctx_idx = (is_panic ? STACKSHOT_CTX_IDX_PANIC : STACKSHOT_CTX_IDX_NORMAL);
2770 
2771 	/* Setup overall state */
2772 	stackshot_ctx = (struct stackshot_context) {
2773 		.sc_args               = args,
2774 		.sc_state              = SS_SETUP,
2775 		.sc_bytes_traced       = 0,
2776 		.sc_bytes_uncompressed = 0,
2777 		.sc_microsecs          = microsecs + (secs * USEC_PER_SEC),
2778 		.sc_panic_stackshot    = is_panic,
2779 		.sc_is_singlethreaded  = is_singlethreaded,
2780 		.sc_cpus_working       = 0,
2781 		.sc_retval             = 0,
2782 		.sc_calling_cpuid      = cpu_number(),
2783 		.sc_main_cpuid         = is_singlethreaded ? cpu_number() : -1,
2784 		.sc_min_kcdata_size    = get_stackshot_est_tasksize(args.flags),
2785 		.sc_enable_faulting    = false,
2786 	};
2787 
2788 	if (!stackshot_ctx.sc_panic_stackshot) {
2789 #if defined(__AMP__)
2790 		/* On AMP systems, we want to split the buffers up by cluster to avoid cache line effects. */
2791 		stackshot_ctx.sc_num_buffers = is_singlethreaded ? 1 : ml_get_cluster_count();
2792 #else /* __AMP__ */
2793 		stackshot_ctx.sc_num_buffers = 1;
2794 #endif /* !__AMP__ */
2795 
2796 		/*
2797 		 * Set all buffer sizes to zero. We'll use ssb_size to track how many CPUs in
2798 		 * that cluster are participating in the stackshot.
2799 		 */
2800 		bzero(stackshot_ctx.sc_buffers, sizeof(stackshot_ctx.sc_buffers));
2801 
2802 		/* Setup per-cpu state */
2803 		percpu_foreach_base(base) {
2804 			*PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu) = (struct stackshot_cpu_context) { 0 };
2805 		}
2806 
2807 		if (is_singlethreaded) {
2808 			/* If the stackshot is singlethreaded, set up the kcdata - we don't bother with linked-list kcdata in singlethreaded mode. */
2809 			uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2810 			    : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2811 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2812 			kcdata_memory_static_init(stackshot_kcdata_p, (mach_vm_address_t) stackshot_args.buffer, hdr_tag,
2813 			    stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
2814 			if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
2815 				hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2816 				    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2817 				kcd_exit_on_error(kcdata_init_compress(stackshot_kcdata_p, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
2818 			}
2819 			stackshot_cpu_ctx.scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES);
2820 		}
2821 	} else {
2822 		/*
2823 		 * If this is a panic stackshot, we need to handle things differently.
2824 		 * The panic code hands us a kcdata descriptor to work with instead of
2825 		 * us making one ourselves.
2826 		 */
2827 		*stackshot_kcdata_p = *stackshot_args.descriptor;
2828 		stackshot_cpu_ctx = (struct stackshot_cpu_context) {
2829 			.scc_can_work = true,
2830 			.scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES)
2831 		};
2832 #if STACKSHOT_COLLECTS_LATENCY_INFO
2833 		*(PERCPU_GET(stackshot_trace_buffer)) = (struct stackshot_trace_buffer) {};
2834 #endif
2835 	}
2836 
2837 	/* Set up our cpu state */
2838 	stackshot_cpu_preflight();
2839 
2840 error_exit:
2841 	return error;
2842 }
2843 
2844 /*
2845  * The old function signature for kdp_snapshot_preflight, used in the panic path.
2846  * Called when interrupts are disabled, but we're not in the debugger trap yet.
2847  */
2848 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)2849 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
2850     kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
2851 {
2852 	__assert_only kern_return_t err;
2853 	err = kdp_snapshot_preflight_internal((struct kdp_snapshot_args) {
2854 		.pid = pid,
2855 		.buffer = tracebuf,
2856 		.buffer_size = tracebuf_size,
2857 		.flags = flags,
2858 		.descriptor = data_p,
2859 		.since_timestamp = since_timestamp,
2860 		.pagetable_mask = pagetable_mask
2861 	});
2862 
2863 
2864 	/* This shouldn't ever return an error in the panic path. */
2865 	assert(err == KERN_SUCCESS);
2866 }
2867 
2868 static void
stackshot_reset_state(void)2869 stackshot_reset_state(void)
2870 {
2871 	stackshot_ctx = (struct stackshot_context) { 0 };
2872 }
2873 
2874 void
panic_stackshot_reset_state(void)2875 panic_stackshot_reset_state(void)
2876 {
2877 	stackshot_reset_state();
2878 }
2879 
2880 boolean_t
stackshot_active(void)2881 stackshot_active(void)
2882 {
2883 	return os_atomic_load(&stackshot_ctx.sc_state, relaxed) != SS_INACTIVE;
2884 }
2885 
2886 boolean_t
panic_stackshot_active(void)2887 panic_stackshot_active(void)
2888 {
2889 	return os_atomic_load(&stackshot_contexts[STACKSHOT_CTX_IDX_PANIC].sc_state, relaxed) != SS_INACTIVE;
2890 }
2891 
2892 uint32_t
kdp_stack_snapshot_bytes_traced(void)2893 kdp_stack_snapshot_bytes_traced(void)
2894 {
2895 	return stackshot_ctx.sc_bytes_traced;
2896 }
2897 
2898 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)2899 kdp_stack_snapshot_bytes_uncompressed(void)
2900 {
2901 	return stackshot_ctx.sc_bytes_uncompressed;
2902 }
2903 
2904 static boolean_t
memory_iszero(void * addr,size_t size)2905 memory_iszero(void *addr, size_t size)
2906 {
2907 	char *data = (char *)addr;
2908 	for (size_t i = 0; i < size; i++) {
2909 		if (data[i] != 0) {
2910 			return FALSE;
2911 		}
2912 	}
2913 	return TRUE;
2914 }
2915 
2916 static void
_stackshot_validation_reset(void)2917 _stackshot_validation_reset(void)
2918 {
2919 	percpu_foreach_base(base) {
2920 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2921 		cpu_ctx->scc_validation_state.last_valid_page_kva = -1;
2922 		cpu_ctx->scc_validation_state.last_valid_size = 0;
2923 	}
2924 }
2925 
2926 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)2927 _stackshot_validate_kva(vm_offset_t addr, size_t size)
2928 {
2929 	vm_offset_t page_addr = atop_kernel(addr);
2930 	if (stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva == page_addr &&
2931 	    stackshot_cpu_ctx.scc_validation_state.last_valid_size <= size) {
2932 		return true;
2933 	}
2934 
2935 	if (ml_validate_nofault(addr, size)) {
2936 		stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva = page_addr;
2937 		stackshot_cpu_ctx.scc_validation_state.last_valid_size = size;
2938 		return true;
2939 	}
2940 	return false;
2941 }
2942 
2943 static long
_stackshot_strlen(const char * s,size_t maxlen)2944 _stackshot_strlen(const char *s, size_t maxlen)
2945 {
2946 	size_t len = 0;
2947 	for (len = 0; _stackshot_validate_kva((vm_offset_t)s, 1); len++, s++) {
2948 		if (*s == 0) {
2949 			return len;
2950 		}
2951 		if (len >= maxlen) {
2952 			return -1;
2953 		}
2954 	}
2955 	return -1; /* failed before end of string */
2956 }
2957 
2958 
2959 static size_t
stackshot_plh_est_size(void)2960 stackshot_plh_est_size(void)
2961 {
2962 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
2963 	size_t size = STASKSHOT_PLH_SIZE(stackshot_port_label_size);
2964 
2965 	if (size == 0) {
2966 		return 0;
2967 	}
2968 #define SIZE_EST(x) ROUNDUP((x), sizeof (uintptr_t))
2969 	return SIZE_EST(size * sizeof(*plh->plh_array)) +
2970 	       SIZE_EST(size * sizeof(*plh->plh_chains)) +
2971 	       SIZE_EST(size * sizeof(*stackshot_cpu_ctx.scc_plh_gen.pgs_gen) * real_ncpus) +
2972 	       SIZE_EST((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh->plh_hash));
2973 #undef SIZE_EST
2974 }
2975 
2976 static void
stackshot_plh_reset(void)2977 stackshot_plh_reset(void)
2978 {
2979 	stackshot_ctx.sc_plh = (struct port_label_hash){.plh_size = 0};  /* structure assignment */
2980 }
2981 
2982 static kern_return_t
stackshot_plh_setup(void)2983 stackshot_plh_setup(void)
2984 {
2985 	kern_return_t error;
2986 	size_t size;
2987 	bool percpu_alloc_failed = false;
2988 	struct port_label_hash plh = {
2989 		.plh_size = STASKSHOT_PLH_SIZE(stackshot_port_label_size),
2990 		.plh_count = 0,
2991 	};
2992 
2993 	stackshot_plh_reset();
2994 
2995 	percpu_foreach_base(base) {
2996 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2997 		cpu_ctx->scc_plh_gen = (struct _stackshot_plh_gen_state){
2998 			.pgs_gen = NULL,
2999 			.pgs_curgen = 1,
3000 			.pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX,
3001 			.pgs_curgen_max = 0,
3002 		};
3003 	}
3004 
3005 	size = plh.plh_size;
3006 	if (size == 0) {
3007 		return KERN_SUCCESS;
3008 	}
3009 	plh.plh_array = stackshot_alloc_with_size(size * sizeof(*plh.plh_array), &error);
3010 	plh.plh_chains = stackshot_alloc_with_size(size * sizeof(*plh.plh_chains), &error);
3011 	percpu_foreach_base(base) {
3012 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
3013 		cpu_ctx->scc_plh_gen.pgs_gen = stackshot_alloc_with_size(size * sizeof(*cpu_ctx->scc_plh_gen.pgs_gen), &error);
3014 		if (cpu_ctx->scc_plh_gen.pgs_gen == NULL) {
3015 			percpu_alloc_failed = true;
3016 			break;
3017 		}
3018 		for (int x = 0; x < size; x++) {
3019 			cpu_ctx->scc_plh_gen.pgs_gen[x] = 0;
3020 		}
3021 	}
3022 	plh.plh_hash = stackshot_alloc_with_size((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh.plh_hash), &error);
3023 	if (error != KERN_SUCCESS) {
3024 		return error;
3025 	}
3026 	if (plh.plh_array == NULL || plh.plh_chains == NULL || percpu_alloc_failed || plh.plh_hash == NULL) {
3027 		PLH_STAT_OP(os_atomic_inc(&stackshot_ctx.sc_plh.plh_bad, relaxed));
3028 		return KERN_SUCCESS;
3029 	}
3030 	for (int x = 0; x < size; x++) {
3031 		plh.plh_array[x] = NULL;
3032 		plh.plh_chains[x] = -1;
3033 	}
3034 	for (int x = 0; x < (1ul << STACKSHOT_PLH_SHIFT); x++) {
3035 		plh.plh_hash[x] = -1;
3036 	}
3037 	stackshot_ctx.sc_plh = plh;  /* structure assignment */
3038 	return KERN_SUCCESS;
3039 }
3040 
3041 static int16_t
stackshot_plh_hash(struct ipc_service_port_label * ispl)3042 stackshot_plh_hash(struct ipc_service_port_label *ispl)
3043 {
3044 	uintptr_t ptr = VM_KERNEL_STRIP_PTR((uintptr_t)ispl);
3045 
3046 	static_assert(STACKSHOT_PLH_SHIFT < 16, "plh_hash must fit in 15 bits");
3047 #define PLH_HASH_STEP(ptr, x) \
3048 	    ((((x) * STACKSHOT_PLH_SHIFT) < (sizeof(ispl) * CHAR_BIT)) ? ((ptr) >> ((x) * STACKSHOT_PLH_SHIFT)) : 0)
3049 	ptr ^= PLH_HASH_STEP(ptr, 16);
3050 	ptr ^= PLH_HASH_STEP(ptr, 8);
3051 	ptr ^= PLH_HASH_STEP(ptr, 4);
3052 	ptr ^= PLH_HASH_STEP(ptr, 2);
3053 	ptr ^= PLH_HASH_STEP(ptr, 1);
3054 #undef PLH_HASH_STEP
3055 	return (int16_t)(ptr & ((1ul << STACKSHOT_PLH_SHIFT) - 1));
3056 }
3057 
3058 enum stackshot_plh_lookup_type {
3059 	STACKSHOT_PLH_LOOKUP_UNKNOWN,
3060 	STACKSHOT_PLH_LOOKUP_SEND,
3061 	STACKSHOT_PLH_LOOKUP_RECEIVE,
3062 };
3063 
3064 static void
stackshot_plh_resetgen(void)3065 stackshot_plh_resetgen(void)
3066 {
3067 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3068 	uint16_t plh_size = stackshot_ctx.sc_plh.plh_size;
3069 
3070 	if (pgs->pgs_curgen_min == STACKSHOT_PLH_SIZE_MAX && pgs->pgs_curgen_max == 0) {
3071 		return;  // no lookups, nothing using the current generation
3072 	}
3073 	pgs->pgs_curgen++;
3074 	pgs->pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX;
3075 	pgs->pgs_curgen_max = 0;
3076 	if (pgs->pgs_curgen == 0) { // wrapped, zero the array and increment the generation
3077 		for (int x = 0; x < plh_size; x++) {
3078 			pgs->pgs_gen[x] = 0;
3079 		}
3080 		pgs->pgs_curgen = 1;
3081 	}
3082 }
3083 
3084 static int16_t
stackshot_plh_lookup_locked(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3085 stackshot_plh_lookup_locked(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3086 {
3087 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3088 	int depth;
3089 	int16_t cur;
3090 	if (ispl == NULL) {
3091 		return STACKSHOT_PORTLABELID_NONE;
3092 	}
3093 	switch (type) {
3094 	case STACKSHOT_PLH_LOOKUP_SEND:
3095 		PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_send, relaxed));
3096 		break;
3097 	case STACKSHOT_PLH_LOOKUP_RECEIVE:
3098 		PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_receive, relaxed));
3099 		break;
3100 	default:
3101 		break;
3102 	}
3103 	PLH_STAT_OP(os_atomic_inc(&plh->plh_lookups, relaxed));
3104 	if (plh->plh_size == 0) {
3105 		return STACKSHOT_PORTLABELID_MISSING;
3106 	}
3107 	int16_t hash = stackshot_plh_hash(ispl);
3108 	assert(hash >= 0 && hash < (1ul << STACKSHOT_PLH_SHIFT));
3109 	depth = 0;
3110 	for (cur = plh->plh_hash[hash]; cur >= 0; cur = plh->plh_chains[cur]) {
3111 		/* cur must be in-range, and chain depth can never be above our # allocated */
3112 		if (cur >= plh->plh_count || depth > plh->plh_count || depth > plh->plh_size) {
3113 			PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3114 			PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3115 			return STACKSHOT_PORTLABELID_MISSING;
3116 		}
3117 		assert(cur < plh->plh_count);
3118 		if (plh->plh_array[cur] == ispl) {
3119 			PLH_STAT_OP(os_atomic_inc(&plh->plh_found, relaxed));
3120 			PLH_STAT_OP(os_atomic_add(&plh->plh_found_depth, depth, relaxed));
3121 			goto found;
3122 		}
3123 		depth++;
3124 	}
3125 	/* not found in hash table, so alloc and insert it */
3126 	if (cur != -1) {
3127 		PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3128 		PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3129 		return STACKSHOT_PORTLABELID_MISSING; /* bad end of chain */
3130 	}
3131 	PLH_STAT_OP(os_atomic_inc(&plh->plh_insert, relaxed));
3132 	PLH_STAT_OP(os_atomic_add(&plh->plh_insert_depth, depth, relaxed));
3133 	if (plh->plh_count >= plh->plh_size) {
3134 		return STACKSHOT_PORTLABELID_MISSING; /* no space */
3135 	}
3136 	cur = plh->plh_count;
3137 	plh->plh_count++;
3138 	plh->plh_array[cur] = ispl;
3139 	plh->plh_chains[cur] = plh->plh_hash[hash];
3140 	plh->plh_hash[hash] = cur;
3141 found:  ;
3142 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3143 	pgs->pgs_gen[cur] = pgs->pgs_curgen;
3144 	if (pgs->pgs_curgen_min > cur) {
3145 		pgs->pgs_curgen_min = cur;
3146 	}
3147 	if (pgs->pgs_curgen_max < cur) {
3148 		pgs->pgs_curgen_max = cur;
3149 	}
3150 	return cur + 1;   /* offset to avoid 0 */
3151 }
3152 
3153 static kern_return_t
kdp_stackshot_plh_record_locked(void)3154 kdp_stackshot_plh_record_locked(void)
3155 {
3156 	kern_return_t error = KERN_SUCCESS;
3157 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3158 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3159 	uint16_t count = plh->plh_count;
3160 	uint8_t curgen = pgs->pgs_curgen;
3161 	int16_t curgen_min = pgs->pgs_curgen_min;
3162 	int16_t curgen_max = pgs->pgs_curgen_max;
3163 	if (curgen_min <= curgen_max && curgen_max < count &&
3164 	    count <= plh->plh_size && plh->plh_size <= STACKSHOT_PLH_SIZE_MAX) {
3165 		struct ipc_service_port_label **arr = plh->plh_array;
3166 		size_t ispl_size, max_namelen;
3167 		kdp_ipc_splabel_size(&ispl_size, &max_namelen);
3168 		for (int idx = curgen_min; idx <= curgen_max; idx++) {
3169 			struct ipc_service_port_label *ispl = arr[idx];
3170 			struct portlabel_info spl = {
3171 				.portlabel_id = (idx + 1),
3172 			};
3173 			const char *name = NULL;
3174 			long name_sz = 0;
3175 			if (pgs->pgs_gen[idx] != curgen) {
3176 				continue;
3177 			}
3178 			if (_stackshot_validate_kva((vm_offset_t)ispl, ispl_size)) {
3179 				kdp_ipc_fill_splabel(ispl, &spl, &name);
3180 #if STACKSHOT_COLLECTS_RDAR_126582377_DATA
3181 			} else {
3182 				if (ispl != NULL && (vm_offset_t)ispl >> 48 == 0x0000) {
3183 					ca_event_t event_to_send = os_atomic_xchg(&rdar_126582377_event, NULL, relaxed);
3184 					if (event_to_send) {
3185 						CA_EVENT_SEND(event_to_send);
3186 					}
3187 				}
3188 #endif
3189 			}
3190 
3191 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3192 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3193 			if (name != NULL && (name_sz = _stackshot_strlen(name, max_namelen)) > 0) {   /* validates the kva */
3194 				kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL_NAME, name_sz + 1, name));
3195 			} else {
3196 				spl.portlabel_flags |= STACKSHOT_PORTLABEL_READFAILED;
3197 			}
3198 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL, sizeof(spl), &spl));
3199 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3200 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3201 		}
3202 	}
3203 
3204 error_exit:
3205 	return error;
3206 }
3207 
3208 // record any PLH referenced since the last stackshot_plh_resetgen() call
3209 static kern_return_t
kdp_stackshot_plh_record(void)3210 kdp_stackshot_plh_record(void)
3211 {
3212 	kern_return_t error;
3213 	plh_lock(&stackshot_ctx.sc_plh);
3214 	error = kdp_stackshot_plh_record_locked();
3215 	plh_unlock(&stackshot_ctx.sc_plh);
3216 	return error;
3217 }
3218 
3219 static int16_t
stackshot_plh_lookup(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3220 stackshot_plh_lookup(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3221 {
3222 	int16_t result;
3223 	plh_lock(&stackshot_ctx.sc_plh);
3224 	result = stackshot_plh_lookup_locked(ispl, type);
3225 	plh_unlock(&stackshot_ctx.sc_plh);
3226 	return result;
3227 }
3228 
3229 #if DEVELOPMENT || DEBUG
3230 static kern_return_t
kdp_stackshot_plh_stats(void)3231 kdp_stackshot_plh_stats(void)
3232 {
3233 	kern_return_t error = KERN_SUCCESS;
3234 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3235 
3236 #define PLH_STAT(x) do { if (os_atomic_load(&plh->x, relaxed) != 0) { \
3237 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, os_atomic_load(&plh->x, relaxed), "stackshot_" #x)); \
3238 } } while (0)
3239 	PLH_STAT(plh_size);
3240 	PLH_STAT(plh_lookups);
3241 	PLH_STAT(plh_found);
3242 	PLH_STAT(plh_found_depth);
3243 	PLH_STAT(plh_insert);
3244 	PLH_STAT(plh_insert_depth);
3245 	PLH_STAT(plh_bad);
3246 	PLH_STAT(plh_bad_depth);
3247 	PLH_STAT(plh_lookup_send);
3248 	PLH_STAT(plh_lookup_receive);
3249 #undef PLH_STAT
3250 
3251 error_exit:
3252 	return error;
3253 }
3254 #endif /* DEVELOPMENT || DEBUG */
3255 
3256 /*
3257  * This function can be called from stackshot / kdp context or
3258  * from telemetry / current task context
3259  */
3260 uint64_t
kcdata_get_task_ss_flags(task_t task,bool from_stackshot)3261 kcdata_get_task_ss_flags(task_t task, bool from_stackshot)
3262 {
3263 	uint64_t ss_flags = 0;
3264 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3265 	void *bsd_info = get_bsdtask_info(task);
3266 
3267 	if (task_64bit_addr) {
3268 		ss_flags |= kUser64_p;
3269 	}
3270 	if (!task->active || task_is_a_corpse(task) || proc_exiting(bsd_info)) {
3271 		ss_flags |= kTerminatedSnapshot;
3272 	}
3273 	if (task->pidsuspended) {
3274 		ss_flags |= kPidSuspended;
3275 	}
3276 	if (task->frozen) {
3277 		ss_flags |= kFrozen;
3278 	}
3279 	if (task->effective_policy.tep_darwinbg == 1) {
3280 		ss_flags |= kTaskDarwinBG;
3281 	}
3282 	if (task->requested_policy.trp_ext_darwinbg == 1) {
3283 		ss_flags |= kTaskExtDarwinBG;
3284 	}
3285 	if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
3286 		ss_flags |= kTaskIsForeground;
3287 	}
3288 	if (task->requested_policy.trp_boosted == 1) {
3289 		ss_flags |= kTaskIsBoosted;
3290 	}
3291 	if (task->effective_policy.tep_sup_active == 1) {
3292 		ss_flags |= kTaskIsSuppressed;
3293 	}
3294 #if CONFIG_MEMORYSTATUS
3295 
3296 	boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
3297 	boolean_t is_active = FALSE, is_managed = FALSE, has_assertion = FALSE;
3298 	memorystatus_proc_flags_unsafe(bsd_info, &dirty, &dirty_tracked, &allow_idle_exit, &is_active, &is_managed, &has_assertion);
3299 	if (dirty) {
3300 		ss_flags |= kTaskIsDirty;
3301 	}
3302 	if (dirty_tracked) {
3303 		ss_flags |= kTaskIsDirtyTracked;
3304 	}
3305 	if (allow_idle_exit) {
3306 		ss_flags |= kTaskAllowIdleExit;
3307 	}
3308 	if (is_active) {
3309 		ss_flags |= kTaskIsActive;
3310 	}
3311 	if (is_managed) {
3312 		ss_flags |= kTaskIsManaged;
3313 	}
3314 	if (has_assertion) {
3315 		ss_flags |= kTaskHasAssertion;
3316 	}
3317 
3318 #endif
3319 	if (task->effective_policy.tep_tal_engaged) {
3320 		ss_flags |= kTaskTALEngaged;
3321 	}
3322 
3323 	if (from_stackshot) {
3324 		ss_flags |= workqueue_get_task_ss_flags_from_pwq_state_kdp(bsd_info);
3325 	}
3326 
3327 #if IMPORTANCE_INHERITANCE
3328 	if (task->task_imp_base) {
3329 		if (task->task_imp_base->iit_donor) {
3330 			ss_flags |= kTaskIsImpDonor;
3331 		}
3332 		if (task->task_imp_base->iit_live_donor) {
3333 			ss_flags |= kTaskIsLiveImpDonor;
3334 		}
3335 	}
3336 #endif
3337 
3338 	if (task->effective_policy.tep_runaway_mitigation) {
3339 		ss_flags |= kTaskRunawayMitigation;
3340 	}
3341 
3342 	if (task->t_flags & TF_TELEMETRY) {
3343 		ss_flags |= kTaskRsrcFlagged;
3344 	}
3345 
3346 	return ss_flags;
3347 }
3348 
3349 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)3350 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
3351 {
3352 	kern_return_t error = KERN_SUCCESS;
3353 
3354 	uint64_t shared_cache_slide = 0;
3355 	uint64_t shared_cache_first_mapping = 0;
3356 	uint32_t shared_cache_id = 0;
3357 	struct dyld_shared_cache_loadinfo shared_cache_data = {0};
3358 
3359 
3360 	assert(task_snap_ss_flags != NULL);
3361 
3362 	/* Get basic info about the shared region pointer, regardless of any failures */
3363 	if (task->shared_region == NULL) {
3364 		*task_snap_ss_flags |= kTaskSharedRegionNone;
3365 	} else if (task->shared_region == primary_system_shared_region) {
3366 		*task_snap_ss_flags |= kTaskSharedRegionSystem;
3367 	} else {
3368 		*task_snap_ss_flags |= kTaskSharedRegionOther;
3369 	}
3370 
3371 	if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
3372 		struct vm_shared_region *sr = task->shared_region;
3373 		shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
3374 
3375 		shared_cache_id = sr->sr_id;
3376 	} else {
3377 		*task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
3378 		goto error_exit;
3379 	}
3380 
3381 	/*
3382 	 * We haven't copied in the shared region UUID yet as part of setup
3383 	 * This seems to happen infrequently with DriverKit processes on certain
3384 	 * configurations, even once the process has already been set up.
3385 	 * rdar://139753101
3386 	 */
3387 	if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
3388 		*task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
3389 		goto error_exit;
3390 	}
3391 
3392 
3393 	/*
3394 	 * No refcounting here, but we are in debugger context, so that should be safe.
3395 	 */
3396 	shared_cache_slide = task->shared_region->sr_slide;
3397 
3398 	if (task->shared_region == primary_system_shared_region) {
3399 		/* skip adding shared cache info -- it's the same as the system level one */
3400 		goto error_exit;
3401 	}
3402 	/*
3403 	 * New-style shared cache reference: for non-primary shared regions,
3404 	 * just include the ID of the shared cache we're attached to.  Consumers
3405 	 * should use the following info from the task's ts_ss_flags as well:
3406 	 *
3407 	 * kTaskSharedRegionNone - task is not attached to a shared region
3408 	 * kTaskSharedRegionSystem - task is attached to the shared region
3409 	 *     with kSharedCacheSystemPrimary set in sharedCacheFlags.
3410 	 * kTaskSharedRegionOther - task is attached to the shared region with
3411 	 *     sharedCacheID matching the STACKSHOT_KCTYPE_SHAREDCACHE_ID entry.
3412 	 */
3413 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_ID, sizeof(shared_cache_id), &shared_cache_id));
3414 
3415 	/*
3416 	 * For backwards compatibility; this should eventually be removed.
3417 	 *
3418 	 * Historically, this data was in a dyld_uuid_info_64 structure, but the
3419 	 * naming of both the structure and fields for this use wasn't great.  The
3420 	 * dyld_shared_cache_loadinfo structure has better names, but the same
3421 	 * layout and content as the original.
3422 	 *
3423 	 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
3424 	 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
3425 	 * entries; here, it's the slid first mapping, and we leave it that way
3426 	 * for backwards compatibility.
3427 	 */
3428 	shared_cache_data.sharedCacheSlide = shared_cache_slide;
3429 	kdp_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
3430 	shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
3431 	shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
3432 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
3433 
3434 error_exit:
3435 	return error;
3436 }
3437 
3438 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)3439 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
3440 {
3441 	bool save_loadinfo_p         = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
3442 	bool save_kextloadinfo_p     = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
3443 	bool save_compactinfo_p      = ((trace_flags & STACKSHOT_SAVE_DYLD_COMPACTINFO) != 0);
3444 	bool should_fault            = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
3445 
3446 	kern_return_t error        = KERN_SUCCESS;
3447 	mach_vm_address_t out_addr = 0;
3448 
3449 	mach_vm_address_t dyld_compactinfo_addr = 0;
3450 	uint32_t dyld_compactinfo_size = 0;
3451 
3452 	uint32_t uuid_info_count         = 0;
3453 	mach_vm_address_t uuid_info_addr = 0;
3454 	uint64_t uuid_info_timestamp     = 0;
3455 	#pragma unused(uuid_info_timestamp)
3456 	kdp_fault_result_flags_t kdp_fault_results = 0;
3457 
3458 
3459 	assert(task_snap_ss_flags != NULL);
3460 
3461 	int task_pid     = pid_from_task(task);
3462 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3463 
3464 	if ((save_loadinfo_p || save_compactinfo_p) && have_pmap && task->active && task_pid > 0) {
3465 		/* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
3466 		if (task_64bit_addr) {
3467 			struct user64_dyld_all_image_infos task_image_infos;
3468 			if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3469 			    sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3470 				uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
3471 				uuid_info_addr = task_image_infos.uuidArray;
3472 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3473 					uuid_info_timestamp = task_image_infos.timestamp;
3474 				}
3475 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3476 					dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3477 					dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3478 				}
3479 
3480 			}
3481 		} else {
3482 			struct user32_dyld_all_image_infos task_image_infos;
3483 			if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3484 			    sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3485 				uuid_info_count = task_image_infos.uuidArrayCount;
3486 				uuid_info_addr = task_image_infos.uuidArray;
3487 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3488 					uuid_info_timestamp = task_image_infos.timestamp;
3489 				}
3490 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3491 					dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3492 					dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3493 				}
3494 			}
3495 		}
3496 
3497 		/*
3498 		 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
3499 		 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
3500 		 * for this task.
3501 		 */
3502 		if (!uuid_info_addr) {
3503 			uuid_info_count = 0;
3504 		}
3505 
3506 		if (!dyld_compactinfo_addr) {
3507 			dyld_compactinfo_size = 0;
3508 		}
3509 
3510 	}
3511 
3512 	if (have_pmap && task_pid == 0) {
3513 		if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
3514 			uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
3515 		} else {
3516 			uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
3517 		}
3518 	}
3519 
3520 	if (save_compactinfo_p && task_pid > 0) {
3521 		if (dyld_compactinfo_size == 0) {
3522 			*task_snap_ss_flags |= kTaskDyldCompactInfoNone;
3523 		} else if (dyld_compactinfo_size > MAX_DYLD_COMPACTINFO) {
3524 			*task_snap_ss_flags |= kTaskDyldCompactInfoTooBig;
3525 		} else {
3526 			kdp_fault_result_flags_t ci_kdp_fault_results = 0;
3527 
3528 			/* Open a compression window to avoid overflowing the stack */
3529 			kcdata_compression_window_open(kcd);
3530 			kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_DYLD_COMPACTINFO,
3531 			    dyld_compactinfo_size, &out_addr));
3532 
3533 			if (!stackshot_copyin(task->map, dyld_compactinfo_addr, (void *)out_addr,
3534 			    dyld_compactinfo_size, should_fault, &ci_kdp_fault_results)) {
3535 				bzero((void *)out_addr, dyld_compactinfo_size);
3536 			}
3537 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3538 				*task_snap_ss_flags |= kTaskDyldCompactInfoMissing;
3539 			}
3540 
3541 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3542 				*task_snap_ss_flags |= kTaskDyldCompactInfoTriedFault;
3543 			}
3544 
3545 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3546 				*task_snap_ss_flags |= kTaskDyldCompactInfoFaultedIn;
3547 			}
3548 
3549 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
3550 		}
3551 	}
3552 	if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
3553 		uint32_t copied_uuid_count = 0;
3554 		uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
3555 		uint32_t uuid_info_array_size = 0;
3556 
3557 		/* Open a compression window to avoid overflowing the stack */
3558 		kcdata_compression_window_open(kcd);
3559 
3560 		/* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
3561 		if (uuid_info_count > 0) {
3562 			uuid_info_array_size = uuid_info_count * uuid_info_size;
3563 
3564 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3565 			    uuid_info_size, uuid_info_count, &out_addr));
3566 
3567 			if (!stackshot_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
3568 				bzero((void *)out_addr, uuid_info_array_size);
3569 			} else {
3570 				copied_uuid_count = uuid_info_count;
3571 			}
3572 		}
3573 
3574 		uuid_t binary_uuid;
3575 		if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
3576 			/* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
3577 			if (uuid_info_array_size == 0) {
3578 				/* We just need to store one UUID */
3579 				uuid_info_array_size = uuid_info_size;
3580 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3581 				    uuid_info_size, 1, &out_addr));
3582 			}
3583 
3584 			if (task_64bit_addr) {
3585 				struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
3586 				uint64_t image_load_address = task->mach_header_vm_address;
3587 
3588 				kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3589 				kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3590 			} else {
3591 				struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
3592 				uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
3593 
3594 				kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3595 				kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3596 			}
3597 		}
3598 
3599 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
3600 	} else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
3601 		uintptr_t image_load_address;
3602 
3603 		do {
3604 #if defined(__arm64__)
3605 			if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
3606 				struct dyld_uuid_info_64 kc_uuid = {0};
3607 				kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3608 				kdp_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
3609 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
3610 				break;
3611 			}
3612 #endif /* defined(__arm64__) */
3613 
3614 			if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
3615 				/* Kernel UUID not found or inaccessible */
3616 				break;
3617 			}
3618 
3619 			uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
3620 			if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
3621 				uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
3622 #if  defined(__arm64__)
3623 				kc_format_t primary_kc_type = KCFormatUnknown;
3624 				if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
3625 					/* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
3626 					uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
3627 				}
3628 #endif
3629 			}
3630 
3631 			/*
3632 			 * The element count of the array can vary - avoid overflowing the
3633 			 * stack by opening a window.
3634 			 */
3635 			kcdata_compression_window_open(kcd);
3636 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
3637 			    sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
3638 			kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
3639 
3640 			image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
3641 #if defined(__arm64__)
3642 			if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3643 				/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3644 				extern vm_offset_t segTEXTEXECB;
3645 				image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
3646 			}
3647 #endif
3648 			uuid_info_array[0].imageLoadAddress = image_load_address;
3649 			kdp_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
3650 
3651 			if (save_kextloadinfo_p &&
3652 			    _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
3653 			    _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
3654 			    gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
3655 				uint32_t kexti;
3656 				for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
3657 					image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
3658 #if defined(__arm64__)
3659 					if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3660 						/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3661 						image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
3662 					}
3663 #endif
3664 					uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
3665 					kdp_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
3666 				}
3667 			}
3668 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
3669 		} while (0);
3670 	}
3671 
3672 error_exit:
3673 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3674 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
3675 	}
3676 
3677 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3678 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3679 	}
3680 
3681 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3682 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3683 	}
3684 
3685 	return error;
3686 }
3687 
3688 uint64_t kdp_task_exec_meta_flags(task_t task);
3689 
3690 uint64_t
kdp_task_exec_meta_flags(task_t task)3691 kdp_task_exec_meta_flags(task_t task)
3692 {
3693 	uint64_t flags = 0;
3694 
3695 #if CONFIG_ROSETTA
3696 	if (task_is_translated(task)) {
3697 		flags |= kTaskExecTranslated;
3698 	}
3699 #endif /* CONFIG_ROSETTA */
3700 
3701 	if (task_has_hardened_heap(task)) {
3702 		flags |= kTaskExecHardenedHeap;
3703 	}
3704 
3705 #if (HAS_MTE || HAS_MTE_EMULATION_SHIMS) && APPLE_FEATURE_MTE
3706 	if (task_has_sec(task)) {
3707 		flags |= kTaskExecSec;
3708 
3709 		if (task_has_sec_inherit(task)) {
3710 			flags |= kTaskExecSecInherit;
3711 		}
3712 
3713 		if (task_has_sec_user_data(task)) {
3714 			flags |= kTaskExecSecUserData;
3715 		}
3716 #if HAS_MTE
3717 		if (task_has_sec_soft_mode(task)) {
3718 			flags |= kTaskExecSecSoftMode;
3719 		}
3720 #endif /* HAS_MTE */
3721 	}
3722 
3723 #endif /* (HAS_MTE || HAS_MTE_EMULATION_SHIMS) && APPLE_FEATURE_MTE */
3724 
3725 	return flags;
3726 }
3727 
3728 /* Compute the set of flags that kdp_task_exec_meta_flags can return based on the kernel config */
3729 static uint64_t
stackshot_available_task_exec_flags(void)3730 stackshot_available_task_exec_flags(void)
3731 {
3732 	uint64_t flags_mask = 0;
3733 
3734 #if CONFIG_ROSETTA
3735 	flags_mask |= kTaskExecTranslated;
3736 #endif /* CONFIG_ROSETTA */
3737 
3738 	flags_mask |= kTaskExecHardenedHeap;
3739 
3740 #if (HAS_MTE || HAS_MTE_EMULATION_SHIMS) && APPLE_FEATURE_MTE
3741 	flags_mask |= kTaskExecSec;
3742 	flags_mask |= kTaskExecSecUserData;
3743 	flags_mask |= kTaskExecSecSoftMode;
3744 	flags_mask |= kTaskExecSecInherit;
3745 #endif /* (HAS_MTE || HAS_MTE_EMULATION_SHIMS) && APPLE_FEATURE_MTE */
3746 
3747 	return flags_mask;
3748 }
3749 
3750 static kern_return_t
kcdata_record_task_exec_meta(kcdata_descriptor_t kcd,task_t task)3751 kcdata_record_task_exec_meta(kcdata_descriptor_t kcd, task_t task)
3752 {
3753 	struct task_exec_meta tem = {};
3754 	kern_return_t error = KERN_SUCCESS;
3755 
3756 	tem.tem_flags = kdp_task_exec_meta_flags(task);
3757 
3758 	if (tem.tem_flags != 0) {
3759 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_EXEC_META, sizeof(struct task_exec_meta), &tem));
3760 	}
3761 
3762 error_exit:
3763 	return error;
3764 }
3765 
3766 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)3767 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
3768 {
3769 	kern_return_t error = KERN_SUCCESS;
3770 	mach_vm_address_t out_addr = 0;
3771 
3772 	/* I/O Statistics if any counters are non zero */
3773 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
3774 	if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
3775 		/* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
3776 		kcdata_compression_window_open(kcd);
3777 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
3778 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
3779 		_iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
3780 		_iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
3781 		_iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
3782 		_iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
3783 		_iostat->ss_paging_count = task->task_io_stats->paging.count;
3784 		_iostat->ss_paging_size = task->task_io_stats->paging.size;
3785 		_iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
3786 		_iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
3787 		_iostat->ss_metadata_count = task->task_io_stats->metadata.count;
3788 		_iostat->ss_metadata_size = task->task_io_stats->metadata.size;
3789 		_iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
3790 		_iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
3791 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
3792 			_iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
3793 			_iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
3794 		}
3795 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
3796 	}
3797 
3798 
3799 error_exit:
3800 	return error;
3801 }
3802 
3803 #if CONFIG_PERVASIVE_CPI
3804 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)3805 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
3806 {
3807 	struct instrs_cycles_snapshot_v2 instrs_cycles = { 0 };
3808 	struct recount_usage usage = { 0 };
3809 	struct recount_usage perf_only = { 0 };
3810 	recount_task_terminated_usage_perf_only(task, &usage, &perf_only);
3811 	instrs_cycles.ics_instructions = recount_usage_instructions(&usage);
3812 	instrs_cycles.ics_cycles = recount_usage_cycles(&usage);
3813 	instrs_cycles.ics_p_instructions = recount_usage_instructions(&perf_only);
3814 	instrs_cycles.ics_p_cycles = recount_usage_cycles(&perf_only);
3815 
3816 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
3817 }
3818 #endif /* CONFIG_PERVASIVE_CPI */
3819 
3820 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)3821 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
3822 {
3823 	struct stackshot_cpu_architecture cpu_architecture = {0};
3824 	int32_t cputype;
3825 	int32_t cpusubtype;
3826 
3827 	proc_archinfo_kdp(get_bsdtask_info(task), &cputype, &cpusubtype);
3828 	cpu_architecture.cputype = cputype;
3829 	cpu_architecture.cpusubtype = cpusubtype;
3830 
3831 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
3832 }
3833 
3834 static kern_return_t
kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd,task_t task)3835 kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd, task_t task)
3836 {
3837 	struct stackshot_task_codesigning_info codesigning_info = {};
3838 	void * bsdtask_info = NULL;
3839 	uint32_t trust = 0;
3840 	kern_return_t ret = 0;
3841 	pmap_t pmap = get_task_pmap(task);
3842 	uint64_t cs_auxiliary_info = 0;
3843 	if (task != kernel_task) {
3844 		bsdtask_info = get_bsdtask_info(task);
3845 		codesigning_info.csflags = proc_getcsflags_kdp(bsdtask_info);
3846 		ret = get_trust_level_kdp(pmap, &trust);
3847 		if (ret != KERN_SUCCESS) {
3848 			trust = KCDATA_INVALID_CS_TRUST_LEVEL;
3849 		}
3850 		codesigning_info.cs_trust_level = trust;
3851 		cs_auxiliary_info = task_get_cs_auxiliary_info_kdp(task);
3852 	} else {
3853 		return KERN_SUCCESS;
3854 	}
3855 	ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_CODESIGNING_INFO, sizeof(struct stackshot_task_codesigning_info), &codesigning_info);
3856 	if (ret != KERN_SUCCESS) {
3857 		return ret;
3858 	}
3859 	return kcdata_push_data(kcd, TASK_CRASHINFO_CS_AUXILIARY_INFO, sizeof(cs_auxiliary_info), &cs_auxiliary_info);
3860 }
3861 
3862 static kern_return_t
kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd,task_t task)3863 kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd, task_t task)
3864 {
3865 	uint64_t jit_start_addr = 0;
3866 	uint64_t jit_end_addr = 0;
3867 	struct crashinfo_jit_address_range range = {};
3868 	kern_return_t ret = 0;
3869 	pmap_t pmap = get_task_pmap(task);
3870 	if (task == kernel_task || NULL == pmap) {
3871 		return KERN_SUCCESS;
3872 	}
3873 	ret = get_jit_address_range_kdp(pmap, (uintptr_t*)&jit_start_addr, (uintptr_t*)&jit_end_addr);
3874 	if (KERN_SUCCESS == ret) {
3875 		range.start_address = jit_start_addr;
3876 		range.end_address = jit_end_addr;
3877 		return kcdata_push_data(kcd, TASK_CRASHINFO_JIT_ADDRESS_RANGE, sizeof(struct crashinfo_jit_address_range), &range);
3878 	} else {
3879 		return KERN_SUCCESS;
3880 	}
3881 }
3882 
3883 #if CONFIG_TASK_SUSPEND_STATS
3884 static kern_return_t
kcdata_record_task_suspension_info(kcdata_descriptor_t kcd,task_t task)3885 kcdata_record_task_suspension_info(kcdata_descriptor_t kcd, task_t task)
3886 {
3887 	kern_return_t ret = KERN_SUCCESS;
3888 	struct stackshot_suspension_info suspension_info = {};
3889 	task_suspend_stats_data_t suspend_stats;
3890 	task_suspend_source_array_t suspend_sources;
3891 	struct stackshot_suspension_source suspension_sources[TASK_SUSPEND_SOURCES_MAX];
3892 	int i;
3893 
3894 	if (task == kernel_task) {
3895 		return KERN_SUCCESS;
3896 	}
3897 
3898 	ret = task_get_suspend_stats_kdp(task, &suspend_stats);
3899 	if (ret != KERN_SUCCESS) {
3900 		return ret;
3901 	}
3902 
3903 	suspension_info.tss_count = suspend_stats.tss_count;
3904 	suspension_info.tss_duration = suspend_stats.tss_duration;
3905 	suspension_info.tss_last_end = suspend_stats.tss_last_end;
3906 	suspension_info.tss_last_start = suspend_stats.tss_last_start;
3907 	ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_SUSPENSION_INFO, sizeof(suspension_info), &suspension_info);
3908 	if (ret != KERN_SUCCESS) {
3909 		return ret;
3910 	}
3911 
3912 	ret = task_get_suspend_sources_kdp(task, suspend_sources);
3913 	if (ret != KERN_SUCCESS) {
3914 		return ret;
3915 	}
3916 
3917 	for (i = 0; i < TASK_SUSPEND_SOURCES_MAX; ++i) {
3918 		suspension_sources[i].tss_pid = suspend_sources[i].tss_pid;
3919 		strlcpy(suspension_sources[i].tss_procname, suspend_sources[i].tss_procname, sizeof(suspend_sources[i].tss_procname));
3920 		suspension_sources[i].tss_tid = suspend_sources[i].tss_tid;
3921 		suspension_sources[i].tss_time = suspend_sources[i].tss_time;
3922 	}
3923 	return kcdata_push_array(kcd, STACKSHOT_KCTYPE_SUSPENSION_SOURCE, sizeof(suspension_sources[0]), TASK_SUSPEND_SOURCES_MAX, &suspension_sources);
3924 }
3925 #endif /* CONFIG_TASK_SUSPEND_STATS */
3926 
3927 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)3928 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
3929 {
3930 	kern_return_t error                 = KERN_SUCCESS;
3931 	mach_vm_address_t out_addr          = 0;
3932 	struct transitioning_task_snapshot * cur_tsnap = NULL;
3933 
3934 	int task_pid           = pid_from_task(task);
3935 	/* Is returning -1 ok for terminating task ok ??? */
3936 	uint64_t task_uniqueid = get_task_uniqueid(task);
3937 
3938 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3939 		/*
3940 		 * if this task is a transit task from another one, show the pid as
3941 		 * negative
3942 		 */
3943 		task_pid = 0 - task_pid;
3944 	}
3945 
3946 	/* the transitioning_task_snapshot struct is large - avoid overflowing the stack */
3947 	kcdata_compression_window_open(kcd);
3948 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
3949 	cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
3950 	bzero(cur_tsnap, sizeof(*cur_tsnap));
3951 
3952 	cur_tsnap->tts_unique_pid = task_uniqueid;
3953 	cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task, true);
3954 	cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
3955 	cur_tsnap->tts_transition_type = transition_type;
3956 	cur_tsnap->tts_pid = task_pid;
3957 
3958 	/* Add the BSD process identifiers */
3959 	if (task_pid != -1 && get_bsdtask_info(task) != NULL) {
3960 		proc_name_kdp(get_bsdtask_info(task), cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
3961 	} else {
3962 		cur_tsnap->tts_p_comm[0] = '\0';
3963 	}
3964 
3965 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
3966 
3967 error_exit:
3968 	return error;
3969 }
3970 
3971 static kern_return_t
3972 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)3973 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
3974 #else
3975 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
3976 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3977 {
3978 	bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3979 	bool collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
3980 #if CONFIG_PERVASIVE_CPI
3981 	bool collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
3982 #endif /* CONFIG_PERVASIVE_CPI */
3983 #if __arm64__
3984 	bool collect_asid            = ((trace_flags & STACKSHOT_ASID) != 0);
3985 #endif
3986 	bool collect_pagetables      = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
3987 
3988 
3989 	kern_return_t error                 = KERN_SUCCESS;
3990 	mach_vm_address_t out_addr          = 0;
3991 	struct task_snapshot_v3 * cur_tsnap = NULL;
3992 #if CONFIG_MEMORYSTATUS
3993 	mach_vm_address_t memorystatus_addr = 0;
3994 	struct task_memorystatus_snapshot *memorystatus_snapshot = NULL;
3995 #endif /* CONFIG_MEMORYSTATUS */
3996 #if STACKSHOT_COLLECTS_LATENCY_INFO
3997 	latency_info->cur_tsnap_latency = mach_absolute_time();
3998 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3999 
4000 	int task_pid           = pid_from_task(task);
4001 	uint64_t task_uniqueid = get_task_uniqueid(task);
4002 	void *bsd_info = get_bsdtask_info(task);
4003 	uint64_t proc_starttime_secs = 0;
4004 
4005 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
4006 		/*
4007 		 * if this task is a transit task from another one, show the pid as
4008 		 * negative
4009 		 */
4010 		task_pid = 0 - task_pid;
4011 	}
4012 
4013 	/* the task_snapshot_v3 struct is large - avoid overflowing the stack */
4014 	kcdata_compression_window_open(kcd);
4015 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v3), &out_addr));
4016 	cur_tsnap = (struct task_snapshot_v3 *)out_addr;
4017 	bzero(cur_tsnap, sizeof(*cur_tsnap));
4018 
4019 	cur_tsnap->ts_unique_pid = task_uniqueid;
4020 	cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task, true);
4021 	cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
4022 
4023 	struct recount_usage term_usage = { 0 };
4024 	recount_task_terminated_usage(task, &term_usage);
4025 	struct recount_times_mach term_times = recount_usage_times_mach(&term_usage);
4026 	cur_tsnap->ts_user_time_in_terminated_threads = term_times.rtm_user;
4027 	cur_tsnap->ts_system_time_in_terminated_threads = term_times.rtm_system;
4028 
4029 	proc_starttime_kdp(bsd_info, &proc_starttime_secs, NULL, NULL);
4030 	cur_tsnap->ts_p_start_sec = proc_starttime_secs;
4031 	cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
4032 	cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
4033 	cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
4034 	cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
4035 
4036 	cur_tsnap->ts_suspend_count = task->suspend_count;
4037 	cur_tsnap->ts_faults = counter_load(&task->faults);
4038 	cur_tsnap->ts_pageins = counter_load(&task->pageins);
4039 	cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
4040 	cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
4041 	    LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
4042 	cur_tsnap->ts_pid = task_pid;
4043 
4044 	/* Add the BSD process identifiers */
4045 	if (task_pid != -1 && bsd_info != NULL) {
4046 		proc_name_kdp(bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
4047 		cur_tsnap->ts_uid = proc_getuid(bsd_info);
4048 		cur_tsnap->ts_gid = proc_getgid(bsd_info);
4049 	} else {
4050 		cur_tsnap->ts_p_comm[0] = '\0';
4051 		cur_tsnap->ts_uid = UINT32_MAX;
4052 		cur_tsnap->ts_gid = UINT32_MAX;
4053 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
4054 		if (task->task_imp_base != NULL) {
4055 			kdp_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
4056 			    MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
4057 		}
4058 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
4059 	}
4060 
4061 #if CONFIG_MEMORYSTATUS
4062 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_MEMORYSTATUS, sizeof(struct task_memorystatus_snapshot), &memorystatus_addr));
4063 	memorystatus_snapshot = (struct task_memorystatus_snapshot *)memorystatus_addr;
4064 	bzero(memorystatus_snapshot, sizeof(*memorystatus_snapshot));
4065 
4066 
4067 	int32_t current_memlimit = 0, effectiveprio = 0, requestedprio = 0, assertionprio = 0;
4068 	proc_memstat_data_kdp(bsd_info, &current_memlimit, &effectiveprio, &requestedprio, &assertionprio);
4069 	memorystatus_snapshot->tms_current_memlimit = current_memlimit;
4070 	memorystatus_snapshot->tms_effectivepriority = effectiveprio;
4071 	memorystatus_snapshot->tms_requestedpriority = requestedprio;
4072 	memorystatus_snapshot->tms_assertionpriority = assertionprio;
4073 #endif /* CONFIG_MEMORYSTATUS */
4074 
4075 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
4076 
4077 #if CONFIG_COALITIONS
4078 	if (task_pid != -1 && bsd_info != NULL &&
4079 	    (task->coalition[COALITION_TYPE_JETSAM] != NULL)) {
4080 		/*
4081 		 * The jetsam coalition ID is always saved, even if
4082 		 * STACKSHOT_SAVE_JETSAM_COALITIONS is not set.
4083 		 */
4084 		uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
4085 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
4086 	}
4087 #endif /* CONFIG_COALITIONS */
4088 
4089 #if __arm64__
4090 	if (collect_asid && have_pmap) {
4091 		uint32_t asid = PMAP_VASID(task->map->pmap);
4092 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
4093 	}
4094 #endif
4095 
4096 #if STACKSHOT_COLLECTS_LATENCY_INFO
4097 	latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
4098 	latency_info->pmap_latency = mach_absolute_time();
4099 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4100 
4101 	if (collect_pagetables && have_pmap) {
4102 #if SCHED_HYGIENE_DEBUG
4103 		// pagetable dumps can be large; reset the interrupt timeout to avoid a panic
4104 		ml_spin_debug_clear_self();
4105 #endif
4106 		assert(stackshot_ctx.sc_is_singlethreaded);
4107 		size_t bytes_dumped = 0;
4108 		error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stackshot_args.pagetable_mask, &bytes_dumped);
4109 		if (error != KERN_SUCCESS) {
4110 			goto error_exit;
4111 		} else {
4112 			/* Variable size array - better not have it on the stack. */
4113 			kcdata_compression_window_open(kcd);
4114 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
4115 			    sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
4116 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
4117 		}
4118 	}
4119 
4120 #if STACKSHOT_COLLECTS_LATENCY_INFO
4121 	latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
4122 	latency_info->bsd_proc_ids_latency = mach_absolute_time();
4123 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4124 
4125 #if STACKSHOT_COLLECTS_LATENCY_INFO
4126 	latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
4127 	latency_info->end_latency = mach_absolute_time();
4128 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4129 
4130 	if (collect_iostats) {
4131 		kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
4132 	}
4133 
4134 #if CONFIG_PERVASIVE_CPI
4135 	if (collect_instrs_cycles) {
4136 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
4137 	}
4138 #endif /* CONFIG_PERVASIVE_CPI */
4139 
4140 	kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
4141 	kcd_exit_on_error(kcdata_record_task_codesigning_info(kcd, task));
4142 	kcd_exit_on_error(kcdata_record_task_jit_address_range(kcd, task));
4143 
4144 #if CONFIG_TASK_SUSPEND_STATS
4145 	kcd_exit_on_error(kcdata_record_task_suspension_info(kcd, task));
4146 #endif /* CONFIG_TASK_SUSPEND_STATS */
4147 
4148 #if STACKSHOT_COLLECTS_LATENCY_INFO
4149 	latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
4150 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4151 
4152 error_exit:
4153 	return error;
4154 }
4155 
4156 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)4157 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
4158 {
4159 #if !CONFIG_PERVASIVE_CPI
4160 #pragma unused(trace_flags)
4161 #endif /* !CONFIG_PERVASIVE_CPI */
4162 	kern_return_t error                       = KERN_SUCCESS;
4163 	struct task_delta_snapshot_v2 * cur_tsnap = NULL;
4164 	mach_vm_address_t out_addr                = 0;
4165 	(void) trace_flags;
4166 #if __arm64__
4167 	boolean_t collect_asid                    = ((trace_flags & STACKSHOT_ASID) != 0);
4168 #endif
4169 #if CONFIG_PERVASIVE_CPI
4170 	boolean_t collect_instrs_cycles           = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
4171 #endif /* CONFIG_PERVASIVE_CPI */
4172 
4173 	uint64_t task_uniqueid = get_task_uniqueid(task);
4174 
4175 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
4176 
4177 	cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
4178 
4179 	cur_tsnap->tds_unique_pid = task_uniqueid;
4180 	cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task, true);
4181 	cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
4182 
4183 	struct recount_usage usage = { 0 };
4184 	recount_task_terminated_usage(task, &usage);
4185 	struct recount_times_mach term_times = recount_usage_times_mach(&usage);
4186 
4187 	cur_tsnap->tds_user_time_in_terminated_threads = term_times.rtm_user;
4188 	cur_tsnap->tds_system_time_in_terminated_threads = term_times.rtm_system;
4189 
4190 	cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
4191 
4192 	cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
4193 	cur_tsnap->tds_suspend_count = task->suspend_count;
4194 	cur_tsnap->tds_faults            = counter_load(&task->faults);
4195 	cur_tsnap->tds_pageins           = counter_load(&task->pageins);
4196 	cur_tsnap->tds_cow_faults        = counter_load(&task->cow_faults);
4197 	cur_tsnap->tds_was_throttled     = (uint32_t)proc_was_throttled_from_task(task);
4198 	cur_tsnap->tds_did_throttle      = (uint32_t)proc_did_throttle_from_task(task);
4199 	cur_tsnap->tds_latency_qos       = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
4200 	    ? LATENCY_QOS_TIER_UNSPECIFIED
4201 	    : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
4202 
4203 #if __arm64__
4204 	if (collect_asid && have_pmap) {
4205 		uint32_t asid = PMAP_VASID(task->map->pmap);
4206 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
4207 		kdp_memcpy((void*)out_addr, &asid, sizeof(asid));
4208 	}
4209 #endif
4210 
4211 #if CONFIG_PERVASIVE_CPI
4212 	if (collect_instrs_cycles) {
4213 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
4214 	}
4215 #endif /* CONFIG_PERVASIVE_CPI */
4216 
4217 error_exit:
4218 	return error;
4219 }
4220 
4221 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)4222 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
4223 {
4224 	kern_return_t error = KERN_SUCCESS;
4225 	mach_vm_address_t out_addr = 0;
4226 
4227 	/* I/O Statistics */
4228 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
4229 	if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
4230 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
4231 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
4232 		_iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
4233 		_iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
4234 		_iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
4235 		_iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
4236 		_iostat->ss_paging_count = thread->thread_io_stats->paging.count;
4237 		_iostat->ss_paging_size = thread->thread_io_stats->paging.size;
4238 		_iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
4239 		_iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
4240 		_iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
4241 		_iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
4242 		_iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
4243 		_iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
4244 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
4245 			_iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
4246 			_iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
4247 		}
4248 	}
4249 
4250 error_exit:
4251 	return error;
4252 }
4253 
4254 bool
machine_trace_thread_validate_kva(vm_offset_t addr)4255 machine_trace_thread_validate_kva(vm_offset_t addr)
4256 {
4257 	return _stackshot_validate_kva(addr, sizeof(uintptr_t));
4258 }
4259 
4260 struct _stackshot_backtrace_context {
4261 	vm_map_t sbc_map;
4262 	vm_offset_t sbc_prev_page;
4263 	vm_offset_t sbc_prev_kva;
4264 	uint32_t sbc_flags;
4265 	bool sbc_allow_faulting;
4266 };
4267 
4268 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)4269 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
4270 {
4271 	struct _stackshot_backtrace_context *ctx = vctx;
4272 	size_t map_page_mask = 0;
4273 	size_t __assert_only map_page_size = kdp_vm_map_get_page_size(ctx->sbc_map,
4274 	    &map_page_mask);
4275 	assert(size < map_page_size);
4276 	if (src & (size - 1)) {
4277 		// The source should be aligned to the size passed in, like a stack
4278 		// frame or word.
4279 		return EINVAL;
4280 	}
4281 
4282 	vm_offset_t src_page = src & ~map_page_mask;
4283 	vm_offset_t src_kva = 0;
4284 
4285 	if (src_page != ctx->sbc_prev_page) {
4286 		uint32_t res = 0;
4287 		uint32_t flags = 0;
4288 		vm_offset_t src_pa = stackshot_find_phys(ctx->sbc_map, src,
4289 		    ctx->sbc_allow_faulting, &res);
4290 
4291 		flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
4292 		flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
4293 		flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
4294 		ctx->sbc_flags |= flags;
4295 		if (src_pa == 0) {
4296 			return EFAULT;
4297 		}
4298 
4299 		src_kva = phystokv(src_pa);
4300 		ctx->sbc_prev_page = src_page;
4301 		ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
4302 	} else {
4303 		src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
4304 	}
4305 
4306 #if KASAN
4307 	/*
4308 	 * KASan does not monitor accesses to userspace pages. Therefore, it is
4309 	 * pointless to maintain a shadow map for them. Instead, they are all
4310 	 * mapped to a single, always valid shadow map page. This approach saves
4311 	 * a considerable amount of shadow map pages which are limited and
4312 	 * precious.
4313 	 */
4314 	kasan_notify_address_nopoison(src_kva, size);
4315 #endif
4316 
4317 #if HAS_MTE
4318 	/*
4319 	 * Disable tag checking during the copy operation. While in the general case,
4320 	 * if the src page is tagged, we are dealing with a Swift async stack and
4321 	 * therefore a single tag value, we cannot merely fixup the tag here as
4322 	 * userspace could place any target address to be dereferenced in the
4323 	 * backtrace walk, leading to a Denial of Service. We TCO the access instead
4324 	 * although we should improve this to only TCO loads and not stores.
4325 	 */
4326 	mte_disable_tag_checking();
4327 #endif /* HAS_MTE */
4328 	memcpy(dst, (const void *)src_kva, size);
4329 #if HAS_MTE
4330 	mte_enable_tag_checking();
4331 #endif /* HAS_MTE */
4332 
4333 	return 0;
4334 }
4335 
4336 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)4337 kcdata_record_thread_snapshot(kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
4338 {
4339 	boolean_t dispatch_p              = ((trace_flags & STACKSHOT_GET_DQ) != 0);
4340 	boolean_t active_kthreads_only_p  = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4341 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4342 	boolean_t collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
4343 #if CONFIG_PERVASIVE_CPI
4344 	boolean_t collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
4345 #endif /* CONFIG_PERVASIVE_CPI */
4346 	kern_return_t error        = KERN_SUCCESS;
4347 
4348 #if STACKSHOT_COLLECTS_LATENCY_INFO
4349 	struct stackshot_latency_thread latency_info;
4350 	latency_info.cur_thsnap1_latency = mach_absolute_time();
4351 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4352 
4353 	mach_vm_address_t out_addr = 0;
4354 	int saved_count            = 0;
4355 
4356 	struct thread_snapshot_v4 * cur_thread_snap = NULL;
4357 	char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
4358 
4359 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
4360 	cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
4361 
4362 	/* Populate the thread snapshot header */
4363 	cur_thread_snap->ths_ss_flags = 0;
4364 	cur_thread_snap->ths_thread_id = thread_tid(thread);
4365 	cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
4366 	cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
4367 	cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
4368 
4369 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4370 		cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4371 	} else {
4372 		cur_thread_snap->ths_voucher_identifier = 0;
4373 	}
4374 
4375 #if STACKSHOT_COLLECTS_LATENCY_INFO
4376 	latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
4377 	latency_info.dispatch_serial_latency = mach_absolute_time();
4378 	latency_info.dispatch_label_latency = 0;
4379 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4380 
4381 	cur_thread_snap->ths_dqserialnum = 0;
4382 	if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
4383 		uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
4384 		if (dqkeyaddr != 0) {
4385 			uint64_t dqaddr = 0;
4386 			boolean_t copyin_ok = stackshot_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
4387 			if (copyin_ok && dqaddr != 0) {
4388 				uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
4389 				uint64_t dqserialnum = 0;
4390 				copyin_ok = stackshot_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
4391 				if (copyin_ok) {
4392 					cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
4393 					cur_thread_snap->ths_dqserialnum = dqserialnum;
4394 				}
4395 
4396 #if STACKSHOT_COLLECTS_LATENCY_INFO
4397 				latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
4398 				latency_info.dispatch_label_latency = mach_absolute_time();
4399 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4400 
4401 				/* try copying in the queue label */
4402 				uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
4403 				if (label_offs) {
4404 					uint64_t dqlabeladdr = dqaddr + label_offs;
4405 					uint64_t actual_dqlabeladdr = 0;
4406 
4407 					copyin_ok = stackshot_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
4408 					if (copyin_ok && actual_dqlabeladdr != 0) {
4409 						char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
4410 						int len;
4411 
4412 						bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
4413 						len = stackshot_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
4414 						if (len > 0) {
4415 							mach_vm_address_t label_addr = 0;
4416 							kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
4417 							kdp_strlcpy((char*)label_addr, &label_buf[0], len);
4418 						}
4419 					}
4420 				}
4421 #if STACKSHOT_COLLECTS_LATENCY_INFO
4422 				latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
4423 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4424 			}
4425 		}
4426 	}
4427 
4428 #if STACKSHOT_COLLECTS_LATENCY_INFO
4429 	if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
4430 		latency_info.dispatch_serial_latency = 0;
4431 	}
4432 	latency_info.cur_thsnap2_latency = mach_absolute_time();
4433 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4434 
4435 	struct recount_times_mach times = recount_thread_times(thread);
4436 	cur_thread_snap->ths_user_time = times.rtm_user;
4437 	cur_thread_snap->ths_sys_time = times.rtm_system;
4438 
4439 	if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
4440 		cur_thread_snap->ths_ss_flags |= kThreadMain;
4441 	}
4442 	if (thread->effective_policy.thep_darwinbg) {
4443 		cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
4444 	}
4445 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4446 		cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
4447 	}
4448 	if (thread->suspend_count > 0) {
4449 		cur_thread_snap->ths_ss_flags |= kThreadSuspended;
4450 	}
4451 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4452 		cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
4453 	}
4454 #if CONFIG_EXCLAVES
4455 	/* save exclave thread for later collection */
4456 	if ((thread->th_exclaves_state & TH_EXCLAVES_RPC) && stackshot_exclave_inspect_ctids && !stackshot_ctx.sc_panic_stackshot) {
4457 		/* certain threads, like the collector, must never be inspected */
4458 		if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) == 0) {
4459 			uint32_t ctid_index = os_atomic_inc_orig(&stackshot_exclave_inspect_ctid_count, acq_rel);
4460 			if (ctid_index < stackshot_exclave_inspect_ctid_capacity) {
4461 				stackshot_exclave_inspect_ctids[ctid_index] = thread_get_ctid(thread);
4462 			} else {
4463 				os_atomic_store(&stackshot_exclave_inspect_ctid_count, stackshot_exclave_inspect_ctid_capacity, release);
4464 			}
4465 			if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_STACKSHOT) != 0) {
4466 				panic("stackshot: trying to inspect already-queued thread");
4467 			}
4468 		}
4469 	}
4470 #endif /* CONFIG_EXCLAVES */
4471 	if (thread_on_core) {
4472 		cur_thread_snap->ths_ss_flags |= kThreadOnCore;
4473 	}
4474 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4475 		cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
4476 	}
4477 
4478 	/* make sure state flags defined in kcdata.h still match internal flags */
4479 	static_assert(SS_TH_WAIT == TH_WAIT);
4480 	static_assert(SS_TH_SUSP == TH_SUSP);
4481 	static_assert(SS_TH_RUN == TH_RUN);
4482 	static_assert(SS_TH_UNINT == TH_UNINT);
4483 	static_assert(SS_TH_TERMINATE == TH_TERMINATE);
4484 	static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
4485 	static_assert(SS_TH_IDLE == TH_IDLE);
4486 
4487 	cur_thread_snap->ths_last_run_time           = thread->last_run_time;
4488 	cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
4489 	cur_thread_snap->ths_state                   = thread->state;
4490 	cur_thread_snap->ths_sched_flags             = thread->sched_flags;
4491 	cur_thread_snap->ths_base_priority = thread->base_pri;
4492 	cur_thread_snap->ths_sched_priority = thread->sched_pri;
4493 	cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
4494 	cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
4495 	cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
4496 	    thread->requested_policy.thrp_qos_workq_override);
4497 	cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4498 	cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
4499 
4500 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4501 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4502 	cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4503 	cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4504 
4505 #if STACKSHOT_COLLECTS_LATENCY_INFO
4506 	latency_info.cur_thsnap2_latency = mach_absolute_time()  - latency_info.cur_thsnap2_latency;
4507 	latency_info.thread_name_latency = mach_absolute_time();
4508 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4509 
4510 	/* if there is thread name then add to buffer */
4511 	cur_thread_name[0] = '\0';
4512 	proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
4513 	if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
4514 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
4515 		kdp_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
4516 	}
4517 
4518 #if STACKSHOT_COLLECTS_LATENCY_INFO
4519 	latency_info.thread_name_latency = mach_absolute_time()  - latency_info.thread_name_latency;
4520 	latency_info.sur_times_latency = mach_absolute_time();
4521 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4522 
4523 	/* record system, user, and runnable times */
4524 	time_value_t runnable_time;
4525 	thread_read_times(thread, NULL, NULL, &runnable_time);
4526 	clock_sec_t user_sec = 0, system_sec = 0;
4527 	clock_usec_t user_usec = 0, system_usec = 0;
4528 	absolutetime_to_microtime(times.rtm_user, &user_sec, &user_usec);
4529 	absolutetime_to_microtime(times.rtm_system, &system_sec, &system_usec);
4530 
4531 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
4532 	struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
4533 	*stackshot_cpu_times = (struct stackshot_cpu_times_v2){
4534 		.user_usec = user_sec * USEC_PER_SEC + user_usec,
4535 		.system_usec = system_sec * USEC_PER_SEC + system_usec,
4536 		.runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
4537 	};
4538 
4539 #if STACKSHOT_COLLECTS_LATENCY_INFO
4540 	latency_info.sur_times_latency = mach_absolute_time()  - latency_info.sur_times_latency;
4541 	latency_info.user_stack_latency = mach_absolute_time();
4542 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4543 
4544 	/* Trace user stack, if any */
4545 	if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
4546 		uint32_t user_ths_ss_flags = 0;
4547 
4548 		/*
4549 		 * We don't know how big the stacktrace will be, so read it into our
4550 		 * per-cpu buffer, then copy it to the kcdata.
4551 		 */
4552 		struct _stackshot_backtrace_context ctx = {
4553 			.sbc_map = task->map,
4554 			.sbc_allow_faulting = stackshot_ctx.sc_enable_faulting,
4555 			.sbc_prev_page = -1,
4556 			.sbc_prev_kva = -1,
4557 		};
4558 		struct backtrace_control ctl = {
4559 			.btc_user_thread = thread,
4560 			.btc_user_copy = _stackshot_backtrace_copy,
4561 			.btc_user_copy_context = &ctx,
4562 		};
4563 		struct backtrace_user_info info = BTUINFO_INIT;
4564 
4565 		saved_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4566 		    &info);
4567 		if (saved_count > 0) {
4568 #if __LP64__
4569 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
4570 #else // __LP64__
4571 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
4572 #endif // !__LP64__
4573 			/* Now, copy the stacktrace into kcdata. */
4574 			kcd_exit_on_error(kcdata_push_array(kcd, STACKLR_WORDS, sizeof(uintptr_t),
4575 			    saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4576 			if (info.btui_info & BTI_64_BIT) {
4577 				user_ths_ss_flags |= kUser64_p;
4578 			}
4579 			if ((info.btui_info & BTI_TRUNCATED) ||
4580 			    (ctx.sbc_flags & kThreadTruncatedBT)) {
4581 				user_ths_ss_flags |= kThreadTruncatedBT;
4582 				user_ths_ss_flags |= kThreadTruncUserBT;
4583 			}
4584 			user_ths_ss_flags |= ctx.sbc_flags;
4585 			ctx.sbc_flags = 0;
4586 #if __LP64__
4587 			/* We only support async stacks on 64-bit kernels */
4588 			if (info.btui_async_frame_addr != 0) {
4589 				uint32_t async_start_offset = info.btui_async_start_index;
4590 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
4591 				    sizeof(async_start_offset), &async_start_offset));
4592 				ctl.btc_frame_addr = info.btui_async_frame_addr;
4593 				ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
4594 				info = BTUINFO_INIT;
4595 				unsigned int async_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4596 				    &info);
4597 				if (async_count > 0) {
4598 					kcd_exit_on_error(kcdata_push_array(kcd, STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64,
4599 					    sizeof(uintptr_t), async_count, stackshot_cpu_ctx.scc_stack_buffer));
4600 					if ((info.btui_info & BTI_TRUNCATED) ||
4601 					    (ctx.sbc_flags & kThreadTruncatedBT)) {
4602 						user_ths_ss_flags |= kThreadTruncatedBT;
4603 						user_ths_ss_flags |= kThreadTruncUserAsyncBT;
4604 					}
4605 					user_ths_ss_flags |= ctx.sbc_flags;
4606 				}
4607 			}
4608 #endif /* _LP64 */
4609 		}
4610 		if (user_ths_ss_flags != 0) {
4611 			cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
4612 		}
4613 	}
4614 
4615 #if STACKSHOT_COLLECTS_LATENCY_INFO
4616 	latency_info.user_stack_latency = mach_absolute_time()  - latency_info.user_stack_latency;
4617 	latency_info.kernel_stack_latency = mach_absolute_time();
4618 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4619 
4620 	/* Call through to the machine specific trace routines
4621 	 * Frames are added past the snapshot header.
4622 	 */
4623 	if (thread->kernel_stack != 0) {
4624 		uint32_t kern_ths_ss_flags = 0;
4625 #if defined(__LP64__)
4626 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
4627 		extern int machine_trace_thread64(thread_t thread, char *tracepos,
4628 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
4629 		saved_count = machine_trace_thread64(
4630 #else
4631 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
4632 		extern int machine_trace_thread(thread_t thread, char *tracepos,
4633 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
4634 		saved_count = machine_trace_thread(
4635 #endif
4636 			thread, (char*) stackshot_cpu_ctx.scc_stack_buffer,
4637 			(char *) (stackshot_cpu_ctx.scc_stack_buffer + MAX_FRAMES), MAX_FRAMES,
4638 			&kern_ths_ss_flags);
4639 		if (saved_count > 0) {
4640 			int frame_size = sizeof(uintptr_t);
4641 #if defined(__LP64__)
4642 			cur_thread_snap->ths_ss_flags |= kKernel64_p;
4643 #endif
4644 #if CONFIG_EXCLAVES
4645 			if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
4646 				struct thread_exclaves_info info = { 0 };
4647 
4648 				info.tei_flags = kExclaveRPCActive;
4649 				if (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) {
4650 					info.tei_flags |= kExclaveSchedulerRequest;
4651 				}
4652 				if (thread->th_exclaves_state & TH_EXCLAVES_UPCALL) {
4653 					info.tei_flags |= kExclaveUpcallActive;
4654 				}
4655 				info.tei_scid = thread->th_exclaves_ipc_ctx.scid;
4656 				info.tei_thread_offset = exclaves_stack_offset(stackshot_cpu_ctx.scc_stack_buffer, saved_count / frame_size, false);
4657 
4658 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERN_EXCLAVES_THREADINFO, sizeof(struct thread_exclaves_info), &info));
4659 			}
4660 #endif /* CONFIG_EXCLAVES */
4661 			kcd_exit_on_error(kcdata_push_array(kcd, stack_kcdata_type,
4662 			    frame_size, saved_count / frame_size, stackshot_cpu_ctx.scc_stack_buffer));
4663 		}
4664 		if (kern_ths_ss_flags & kThreadTruncatedBT) {
4665 			kern_ths_ss_flags |= kThreadTruncKernBT;
4666 		}
4667 		if (kern_ths_ss_flags != 0) {
4668 			cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
4669 		}
4670 	}
4671 
4672 #if STACKSHOT_COLLECTS_LATENCY_INFO
4673 	latency_info.kernel_stack_latency = mach_absolute_time()  - latency_info.kernel_stack_latency;
4674 	latency_info.misc_latency = mach_absolute_time();
4675 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4676 
4677 #if CONFIG_THREAD_GROUPS
4678 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
4679 		uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
4680 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
4681 		kdp_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
4682 	}
4683 #endif /* CONFIG_THREAD_GROUPS */
4684 
4685 	if (collect_iostats) {
4686 		kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
4687 	}
4688 
4689 #if CONFIG_PERVASIVE_CPI
4690 	if (collect_instrs_cycles) {
4691 		struct recount_usage usage = { 0 };
4692 		recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
4693 		    &usage);
4694 
4695 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
4696 		struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
4697 		    instrs_cycles->ics_instructions = recount_usage_instructions(&usage);
4698 		    instrs_cycles->ics_cycles = recount_usage_cycles(&usage);
4699 	}
4700 #endif /* CONFIG_PERVASIVE_CPI */
4701 
4702 #if STACKSHOT_COLLECTS_LATENCY_INFO
4703 	latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4704 	if (collect_latency_info) {
4705 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
4706 	}
4707 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4708 
4709 error_exit:
4710 	return error;
4711 }
4712 
4713 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)4714 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
4715 {
4716 	cur_thread_snap->tds_thread_id = thread_tid(thread);
4717 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4718 		cur_thread_snap->tds_voucher_identifier  = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4719 	} else {
4720 		cur_thread_snap->tds_voucher_identifier = 0;
4721 	}
4722 
4723 	cur_thread_snap->tds_ss_flags = 0;
4724 	if (thread->effective_policy.thep_darwinbg) {
4725 		cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
4726 	}
4727 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4728 		cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
4729 	}
4730 	if (thread->suspend_count > 0) {
4731 		cur_thread_snap->tds_ss_flags |= kThreadSuspended;
4732 	}
4733 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4734 		cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
4735 	}
4736 	if (thread_on_core) {
4737 		cur_thread_snap->tds_ss_flags |= kThreadOnCore;
4738 	}
4739 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4740 		cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
4741 	}
4742 
4743 	cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
4744 	cur_thread_snap->tds_state                   = thread->state;
4745 	cur_thread_snap->tds_sched_flags             = thread->sched_flags;
4746 	cur_thread_snap->tds_base_priority           = thread->base_pri;
4747 	cur_thread_snap->tds_sched_priority          = thread->sched_pri;
4748 	cur_thread_snap->tds_eqos                    = thread->effective_policy.thep_qos;
4749 	cur_thread_snap->tds_rqos                    = thread->requested_policy.thrp_qos;
4750 	cur_thread_snap->tds_rqos_override           = MAX(thread->requested_policy.thrp_qos_override,
4751 	    thread->requested_policy.thrp_qos_workq_override);
4752 	cur_thread_snap->tds_io_tier                 = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4753 
4754 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4755 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4756 	cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4757 	cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4758 
4759 	return 0;
4760 }
4761 
4762 /*
4763  * Why 12?  12 strikes a decent balance between allocating a large array on
4764  * the stack and having large kcdata item overheads for recording nonrunable
4765  * tasks.
4766  */
4767 #define UNIQUEIDSPERFLUSH 12
4768 
4769 struct saved_uniqueids {
4770 	uint64_t ids[UNIQUEIDSPERFLUSH];
4771 	unsigned count;
4772 };
4773 
4774 enum thread_classification {
4775 	tc_full_snapshot,  /* take a full snapshot */
4776 	tc_delta_snapshot, /* take a delta snapshot */
4777 };
4778 
4779 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)4780 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
4781 {
4782 	processor_t last_processor = thread->last_processor;
4783 
4784 	boolean_t thread_on_core = FALSE;
4785 	if (last_processor != PROCESSOR_NULL) {
4786 		/* Idle threads are always treated as on-core, since the processor state can change while they are running. */
4787 		thread_on_core = (thread == last_processor->idle_thread) ||
4788 		    (last_processor->state == PROCESSOR_RUNNING &&
4789 		    last_processor->active_thread == thread);
4790 	}
4791 
4792 	*thread_on_core_p = thread_on_core;
4793 
4794 	/* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
4795 	 * previous full stackshot */
4796 	if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stackshot_args.since_timestamp)) {
4797 		return tc_full_snapshot;
4798 	} else {
4799 		return tc_delta_snapshot;
4800 	}
4801 }
4802 
4803 
4804 static kern_return_t
kdp_stackshot_record_task(task_t task)4805 kdp_stackshot_record_task(task_t task)
4806 {
4807 	boolean_t active_kthreads_only_p  = ((stackshot_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4808 	boolean_t collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4809 	boolean_t save_owner_info         = ((stackshot_flags & STACKSHOT_THREAD_WAITINFO) != 0);
4810 	boolean_t include_drivers         = ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) != 0);
4811 
4812 	kern_return_t error = KERN_SUCCESS;
4813 	mach_vm_address_t out_addr = 0;
4814 	int saved_count = 0;
4815 
4816 	int task_pid                   = 0;
4817 	uint64_t task_uniqueid         = 0;
4818 	int num_delta_thread_snapshots = 0;
4819 	int num_waitinfo_threads       = 0;
4820 	int num_turnstileinfo_threads  = 0;
4821 
4822 	uint64_t task_start_abstime    = 0;
4823 	boolean_t have_map = FALSE, have_pmap = FALSE;
4824 	boolean_t some_thread_ran = FALSE;
4825 	unaligned_u64 task_snap_ss_flags = 0;
4826 #if STACKSHOT_COLLECTS_LATENCY_INFO
4827 	struct stackshot_latency_task latency_info;
4828 	latency_info.setup_latency = mach_absolute_time();
4829 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4830 
4831 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4832 	uint64_t task_begin_cpu_cycle_count = 0;
4833 	if (!stackshot_ctx.sc_panic_stackshot) {
4834 		task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
4835 	}
4836 #endif
4837 
4838 	if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
4839 		error = KERN_FAILURE;
4840 		goto error_exit;
4841 	}
4842 
4843 	void *bsd_info = get_bsdtask_info(task);
4844 	boolean_t task_in_teardown        = (bsd_info == NULL) || proc_in_teardown(bsd_info);// has P_LPEXIT set during proc_exit()
4845 	boolean_t task_in_transition      = task_in_teardown;         // here we can add other types of transition.
4846 	uint32_t  container_type          = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
4847 	uint32_t  transition_type         = (task_in_teardown) ? kTaskIsTerminated : 0;
4848 	/* Task just exec'd and this is the old task */
4849 	bool      task_is_exec_transit    = task_did_exec_internal(task) || task_is_exec_copy_internal(task);
4850 
4851 	if (task_in_transition) {
4852 		collect_delta_stackshot = FALSE;
4853 	}
4854 
4855 	have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
4856 	have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
4857 
4858 	task_pid = pid_from_task(task);
4859 	/* Is returning -1 ok for terminating task ok ??? */
4860 	task_uniqueid = get_task_uniqueid(task);
4861 
4862 	if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
4863 		/*
4864 		 * Not interested in terminated tasks without threads.
4865 		 */
4866 		if (queue_empty(&task->threads) || task_pid == -1) {
4867 			return KERN_SUCCESS;
4868 		}
4869 	}
4870 
4871 	/* All PIDs should have the MSB unset */
4872 	assert((task_pid & (1ULL << 31)) == 0);
4873 
4874 #if STACKSHOT_COLLECTS_LATENCY_INFO
4875 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
4876 	latency_info.task_uniqueid = task_uniqueid;
4877 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4878 
4879 	/* Trace everything, unless a process was specified. Add in driver tasks if requested. */
4880 	if ((stackshot_args.pid == -1) ||
4881 	    ((stackshot_args.pid == task_pid) && !task_is_exec_transit) ||
4882 	    (include_drivers && task_is_driver(task))) {
4883 #if STACKSHOT_COLLECTS_LATENCY_INFO
4884 		stackshot_cpu_latency.tasks_processed++;
4885 #endif
4886 
4887 		/* add task snapshot marker */
4888 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4889 		    container_type, task_uniqueid));
4890 
4891 		if (collect_delta_stackshot) {
4892 			/*
4893 			 * For delta stackshots we need to know if a thread from this task has run since the
4894 			 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
4895 			 */
4896 			thread_t thread = THREAD_NULL;
4897 			queue_iterate(&task->threads, thread, thread_t, task_threads)
4898 			{
4899 				if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4900 					error = KERN_FAILURE;
4901 					goto error_exit;
4902 				}
4903 
4904 				if (active_kthreads_only_p && thread->kernel_stack == 0) {
4905 					continue;
4906 				}
4907 
4908 				boolean_t thread_on_core;
4909 				enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4910 
4911 				switch (thread_classification) {
4912 				case tc_full_snapshot:
4913 					some_thread_ran = TRUE;
4914 					break;
4915 				case tc_delta_snapshot:
4916 					num_delta_thread_snapshots++;
4917 					break;
4918 				}
4919 			}
4920 		}
4921 
4922 		if (collect_delta_stackshot) {
4923 			proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &task_start_abstime);
4924 		}
4925 
4926 		/* Next record any relevant UUID info and store the task snapshot */
4927 		if (task_in_transition ||
4928 		    !collect_delta_stackshot ||
4929 		    (task_start_abstime == 0) ||
4930 		    (task_start_abstime > stackshot_args.since_timestamp) ||
4931 		    some_thread_ran) {
4932 			/*
4933 			 * Collect full task information in these scenarios:
4934 			 *
4935 			 * 1) a full stackshot or the task is in transition
4936 			 * 2) a delta stackshot where the task started after the previous full stackshot
4937 			 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
4938 			 *
4939 			 * because the task may have exec'ed, changing its name, architecture, load info, etc
4940 			 */
4941 
4942 			kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
4943 			kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, stackshot_flags, have_pmap, &task_snap_ss_flags));
4944 			kcd_exit_on_error(kcdata_record_task_exec_meta(stackshot_kcdata_p, task));
4945 #if STACKSHOT_COLLECTS_LATENCY_INFO
4946 			if (!task_in_transition) {
4947 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags, &latency_info));
4948 			} else {
4949 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4950 			}
4951 #else
4952 			if (!task_in_transition) {
4953 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4954 			} else {
4955 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4956 			}
4957 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4958 		} else {
4959 			kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4960 		}
4961 
4962 #if STACKSHOT_COLLECTS_LATENCY_INFO
4963 		latency_info.misc_latency = mach_absolute_time();
4964 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4965 
4966 		struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
4967 		int current_delta_snapshot_index                  = 0;
4968 		if (num_delta_thread_snapshots > 0) {
4969 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
4970 			    sizeof(struct thread_delta_snapshot_v3),
4971 			    num_delta_thread_snapshots, &out_addr));
4972 			delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
4973 		}
4974 
4975 
4976 #if STACKSHOT_COLLECTS_LATENCY_INFO
4977 		latency_info.task_thread_count_loop_latency = mach_absolute_time();
4978 #endif
4979 		/*
4980 		 * Iterate over the task threads to save thread snapshots and determine
4981 		 * how much space we need for waitinfo and turnstile info
4982 		 */
4983 		thread_t thread = THREAD_NULL;
4984 		queue_iterate(&task->threads, thread, thread_t, task_threads)
4985 		{
4986 			if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4987 				error = KERN_FAILURE;
4988 				goto error_exit;
4989 			}
4990 
4991 			uint64_t thread_uniqueid;
4992 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
4993 				continue;
4994 			}
4995 			thread_uniqueid = thread_tid(thread);
4996 
4997 			boolean_t thread_on_core;
4998 			enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4999 
5000 #if STACKSHOT_COLLECTS_LATENCY_INFO
5001 			stackshot_cpu_latency.threads_processed++;
5002 #endif
5003 
5004 			switch (thread_classification) {
5005 			case tc_full_snapshot:
5006 				/* add thread marker */
5007 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
5008 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
5009 
5010 				/* thread snapshot can be large, including strings, avoid overflowing the stack. */
5011 				kcdata_compression_window_open(stackshot_kcdata_p);
5012 
5013 				kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, stackshot_flags, have_pmap, thread_on_core));
5014 
5015 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5016 
5017 				/* mark end of thread snapshot data */
5018 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5019 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
5020 				break;
5021 			case tc_delta_snapshot:
5022 				kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
5023 				break;
5024 			}
5025 
5026 			/*
5027 			 * We want to report owner information regardless of whether a thread
5028 			 * has changed since the last delta, whether it's a normal stackshot,
5029 			 * or whether it's nonrunnable
5030 			 */
5031 			if (save_owner_info) {
5032 				if (stackshot_thread_has_valid_waitinfo(thread)) {
5033 					num_waitinfo_threads++;
5034 				}
5035 
5036 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
5037 					num_turnstileinfo_threads++;
5038 				}
5039 			}
5040 		}
5041 #if STACKSHOT_COLLECTS_LATENCY_INFO
5042 		latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
5043 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5044 
5045 		thread_waitinfo_v2_t *thread_waitinfo           = NULL;
5046 		thread_turnstileinfo_v2_t *thread_turnstileinfo = NULL;
5047 		int current_waitinfo_index              = 0;
5048 		int current_turnstileinfo_index         = 0;
5049 		/* allocate space for the wait and turnstil info */
5050 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
5051 			/* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
5052 			kcdata_compression_window_open(stackshot_kcdata_p);
5053 
5054 			if (num_waitinfo_threads > 0) {
5055 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
5056 				    sizeof(thread_waitinfo_v2_t), num_waitinfo_threads, &out_addr));
5057 				thread_waitinfo = (thread_waitinfo_v2_t *)out_addr;
5058 			}
5059 
5060 			if (num_turnstileinfo_threads > 0) {
5061 				/* get space for the turnstile info */
5062 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
5063 				    sizeof(thread_turnstileinfo_v2_t), num_turnstileinfo_threads, &out_addr));
5064 				thread_turnstileinfo = (thread_turnstileinfo_v2_t *)out_addr;
5065 			}
5066 
5067 			stackshot_plh_resetgen();  // so we know which portlabel_ids are referenced
5068 		}
5069 
5070 #if STACKSHOT_COLLECTS_LATENCY_INFO
5071 		latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
5072 		latency_info.task_thread_data_loop_latency = mach_absolute_time();
5073 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5074 
5075 		/* Iterate over the task's threads to save the wait and turnstile info */
5076 		queue_iterate(&task->threads, thread, thread_t, task_threads)
5077 		{
5078 			uint64_t thread_uniqueid;
5079 			#pragma unused(thread_uniqueid)
5080 
5081 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
5082 				continue;
5083 			}
5084 
5085 			thread_uniqueid = thread_tid(thread);
5086 
5087 			/* If we want owner info, we should capture it regardless of its classification */
5088 			if (save_owner_info) {
5089 				if (stackshot_thread_has_valid_waitinfo(thread)) {
5090 					stackshot_thread_wait_owner_info(
5091 						thread,
5092 						&thread_waitinfo[current_waitinfo_index++]);
5093 				}
5094 
5095 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
5096 					stackshot_thread_turnstileinfo(
5097 						thread,
5098 						&thread_turnstileinfo[current_turnstileinfo_index++]);
5099 				}
5100 			}
5101 		}
5102 
5103 #if STACKSHOT_COLLECTS_LATENCY_INFO
5104 		latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
5105 		latency_info.misc2_latency = mach_absolute_time();
5106 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5107 
5108 #if DEBUG || DEVELOPMENT
5109 		if (current_delta_snapshot_index != num_delta_thread_snapshots) {
5110 			panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
5111 			    num_delta_thread_snapshots, current_delta_snapshot_index);
5112 		}
5113 		if (current_waitinfo_index != num_waitinfo_threads) {
5114 			panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
5115 			    num_waitinfo_threads, current_waitinfo_index);
5116 		}
5117 #endif
5118 
5119 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
5120 			kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5121 			// now, record the portlabel hashes.
5122 			kcd_exit_on_error(kdp_stackshot_plh_record());
5123 		}
5124 
5125 #if IMPORTANCE_INHERITANCE
5126 		/* Ensure the buffer is big enough, since we're using the stack buffer for this. */
5127 		static_assert(TASK_IMP_WALK_LIMIT * sizeof(int32_t) <= MAX_FRAMES * sizeof(uintptr_t));
5128 		saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
5129 		    (char*) stackshot_cpu_ctx.scc_stack_buffer, TASK_IMP_WALK_LIMIT);
5130 		if (saved_count > 0) {
5131 			/* Variable size array - better not have it on the stack. */
5132 			kcdata_compression_window_open(stackshot_kcdata_p);
5133 			kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
5134 			    sizeof(int32_t), saved_count, stackshot_cpu_ctx.scc_stack_buffer));
5135 			kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5136 		}
5137 #endif
5138 
5139 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5140 		if (!stackshot_ctx.sc_panic_stackshot) {
5141 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
5142 			    "task_cpu_cycle_count"));
5143 		}
5144 #endif
5145 
5146 #if STACKSHOT_COLLECTS_LATENCY_INFO
5147 		latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
5148 		if (collect_latency_info) {
5149 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
5150 		}
5151 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5152 
5153 		/* mark end of task snapshot data */
5154 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
5155 		    task_uniqueid));
5156 	}
5157 
5158 
5159 error_exit:
5160 	return error;
5161 }
5162 
5163 /* Record global shared regions */
5164 static kern_return_t
kdp_stackshot_shared_regions(uint64_t trace_flags)5165 kdp_stackshot_shared_regions(uint64_t trace_flags)
5166 {
5167 	kern_return_t error        = KERN_SUCCESS;
5168 
5169 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
5170 	extern queue_head_t vm_shared_region_queue;
5171 	vm_shared_region_t sr;
5172 
5173 	extern queue_head_t vm_shared_region_queue;
5174 	queue_iterate(&vm_shared_region_queue,
5175 	    sr,
5176 	    vm_shared_region_t,
5177 	    sr_q) {
5178 		struct dyld_shared_cache_loadinfo_v2 scinfo = {0};
5179 		if (!_stackshot_validate_kva((vm_offset_t)sr, sizeof(*sr))) {
5180 			break;
5181 		}
5182 		if (collect_delta_stackshot && sr->sr_install_time < stackshot_args.since_timestamp) {
5183 			continue; // only include new shared caches in delta stackshots
5184 		}
5185 		uint32_t sharedCacheFlags = ((sr == primary_system_shared_region) ? kSharedCacheSystemPrimary : 0) |
5186 		    (sr->sr_driverkit ? kSharedCacheDriverkit : 0);
5187 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
5188 		    STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
5189 		kdp_memcpy(scinfo.sharedCacheUUID, sr->sr_uuid, sizeof(sr->sr_uuid));
5190 		scinfo.sharedCacheSlide = sr->sr_slide;
5191 		scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_base_address + sr->sr_first_mapping;
5192 		scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
5193 		scinfo.sharedCacheID = sr->sr_id;
5194 		scinfo.sharedCacheFlags = sharedCacheFlags;
5195 
5196 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_INFO,
5197 		    sizeof(scinfo), &scinfo));
5198 
5199 		if ((trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) && sr->sr_images != NULL &&
5200 		    _stackshot_validate_kva((vm_offset_t)sr->sr_images, sr->sr_images_count * sizeof(struct dyld_uuid_info_64))) {
5201 			assert(sr->sr_images_count != 0);
5202 			kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
5203 		}
5204 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5205 		    STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
5206 	}
5207 
5208 	/*
5209 	 * For backwards compatibility; this will eventually be removed.
5210 	 * Another copy of the Primary System Shared Region, for older readers.
5211 	 */
5212 	sr = primary_system_shared_region;
5213 	/* record system level shared cache load info (if available) */
5214 	if (!collect_delta_stackshot && sr &&
5215 	    _stackshot_validate_kva((vm_offset_t)sr, sizeof(struct vm_shared_region))) {
5216 		struct dyld_shared_cache_loadinfo scinfo = {0};
5217 
5218 		/*
5219 		 * Historically, this data was in a dyld_uuid_info_64 structure, but the
5220 		 * naming of both the structure and fields for this use isn't great.  The
5221 		 * dyld_shared_cache_loadinfo structure has better names, but the same
5222 		 * layout and content as the original.
5223 		 *
5224 		 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
5225 		 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
5226 		 * entries; here, it's the slid base address, and we leave it that way
5227 		 * for backwards compatibility.
5228 		 */
5229 		kdp_memcpy(scinfo.sharedCacheUUID, &sr->sr_uuid, sizeof(sr->sr_uuid));
5230 		scinfo.sharedCacheSlide = sr->sr_slide;
5231 		scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_slide + sr->sr_base_address;
5232 		scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
5233 
5234 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
5235 		    sizeof(scinfo), &scinfo));
5236 
5237 		if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
5238 			/*
5239 			 * Include a map of the system shared cache layout if it has been populated
5240 			 * (which is only when the system is using a custom shared cache).
5241 			 */
5242 			if (sr->sr_images && _stackshot_validate_kva((vm_offset_t)sr->sr_images,
5243 			    (sr->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
5244 				assert(sr->sr_images_count != 0);
5245 				kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
5246 			}
5247 		}
5248 	}
5249 
5250 error_exit:
5251 	return error;
5252 }
5253 
5254 static kern_return_t
kdp_stackshot_kcdata_format(void)5255 kdp_stackshot_kcdata_format(void)
5256 {
5257 	kern_return_t error        = KERN_SUCCESS;
5258 	mach_vm_address_t out_addr = 0;
5259 	uint64_t abs_time = 0;
5260 	uint64_t system_state_flags = 0;
5261 	task_t task = TASK_NULL;
5262 	mach_timebase_info_data_t timebase = {0, 0};
5263 	uint32_t length_to_copy = 0, tmp32 = 0;
5264 	abs_time = mach_absolute_time();
5265 	uint64_t last_task_start_time = 0;
5266 	int cur_workitem_index = 0;
5267 	uint64_t tasks_in_stackshot = 0;
5268 	uint64_t threads_in_stackshot = 0;
5269 
5270 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5271 	uint64_t stackshot_begin_cpu_cycle_count = 0;
5272 
5273 	if (!stackshot_ctx.sc_panic_stackshot) {
5274 		stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5275 	}
5276 #endif
5277 
5278 	/* the CPU entering here is participating in the stackshot */
5279 	stackshot_cpu_ctx.scc_did_work = true;
5280 
5281 #if STACKSHOT_COLLECTS_LATENCY_INFO
5282 	collect_latency_info = stackshot_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
5283 #endif
5284 	/* process the flags */
5285 	bool collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
5286 	bool collect_exclaves        = !disable_exclave_stackshot && ((stackshot_flags & STACKSHOT_SKIP_EXCLAVES) == 0);
5287 	stackshot_ctx.sc_enable_faulting = (stackshot_flags & (STACKSHOT_ENABLE_BT_FAULTING));
5288 
5289 	/* Currently we only support returning explicit KEXT load info on fileset kernels */
5290 	kc_format_t primary_kc_type = KCFormatUnknown;
5291 	if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
5292 		stackshot_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
5293 	}
5294 
5295 	if (sizeof(void *) == 8) {
5296 		system_state_flags |= kKernel64_p;
5297 	}
5298 
5299 #if CONFIG_EXCLAVES
5300 	if (!stackshot_ctx.sc_panic_stackshot && collect_exclaves) {
5301 		kcd_exit_on_error(stackshot_setup_exclave_waitlist()); /* Allocate list of exclave threads */
5302 	}
5303 #else
5304 #pragma unused(collect_exclaves)
5305 #endif /* CONFIG_EXCLAVES */
5306 
5307 	/* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
5308 	clock_timebase_info(&timebase);
5309 
5310 	/* begin saving data into the buffer */
5311 	if (stackshot_ctx.sc_bytes_uncompressed) {
5312 		stackshot_ctx.sc_bytes_uncompressed = 0;
5313 	}
5314 
5315 	/*
5316 	 * Setup pre-task linked kcdata buffer.
5317 	 * The idea here is that we want the kcdata to be in (roughly) the same order as it was
5318 	 * before we made this multithreaded, so we have separate buffers for pre and post task-iteration,
5319 	 * since that's the parallelized part.
5320 	 */
5321 	if (!stackshot_ctx.sc_is_singlethreaded) {
5322 		kcd_exit_on_error(stackshot_new_linked_kcdata());
5323 		stackshot_ctx.sc_pretask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5324 	}
5325 
5326 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, stackshot_flags, "stackshot_in_flags"));
5327 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)stackshot_flags, "stackshot_in_pid"));
5328 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
5329 	if (stackshot_flags & STACKSHOT_PAGE_TABLES) {
5330 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_args.pagetable_mask, "stackshot_pagetable_mask"));
5331 	}
5332 	if (stackshot_initial_estimate != 0) {
5333 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
5334 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
5335 	}
5336 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, stackshot_available_task_exec_flags(), "stackshot_te_flags_mask"));
5337 
5338 
5339 #if STACKSHOT_COLLECTS_LATENCY_INFO
5340 	stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time();
5341 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5342 
5343 #if CONFIG_JETSAM
5344 	tmp32 = memorystatus_get_pressure_status_kdp();
5345 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
5346 #endif
5347 
5348 	if (!collect_delta_stackshot) {
5349 		tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
5350 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
5351 
5352 		tmp32 = PAGE_SIZE;
5353 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
5354 
5355 		/* save boot-args and osversion string */
5356 		length_to_copy =  MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
5357 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
5358 		length_to_copy = MIN((uint32_t)(strlen(osversion) + 1), OSVERSIZE);
5359 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OS_BUILD_VERSION, length_to_copy, (void *)osversion));
5360 
5361 
5362 		length_to_copy =  MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
5363 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
5364 
5365 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
5366 	} else {
5367 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stackshot_args.since_timestamp));
5368 	}
5369 
5370 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
5371 
5372 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_ctx.sc_microsecs));
5373 
5374 	kcd_exit_on_error(kdp_stackshot_shared_regions(stackshot_flags));
5375 
5376 	/* Add requested information first */
5377 	if (stackshot_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
5378 		struct mem_and_io_snapshot mais = {0};
5379 		kdp_mem_and_io_snapshot(&mais);
5380 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
5381 	}
5382 
5383 #if HAS_MTE
5384 	if (stackshot_flags & STACKSHOT_MTEINFO) {
5385 		kcd_exit_on_error(stackshot_mteinfo_snapshot(stackshot_kcdata_p));
5386 	}
5387 #endif
5388 
5389 #if CONFIG_THREAD_GROUPS
5390 	struct thread_group_snapshot_v3 *thread_groups = NULL;
5391 	int num_thread_groups = 0;
5392 
5393 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5394 	uint64_t thread_group_begin_cpu_cycle_count = 0;
5395 
5396 	if (!stackshot_ctx.sc_is_singlethreaded && (stackshot_flags & STACKSHOT_THREAD_GROUP)) {
5397 		thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5398 	}
5399 #endif
5400 
5401 	/* Iterate over thread group names */
5402 	if (stackshot_flags & STACKSHOT_THREAD_GROUP) {
5403 		/* Variable size array - better not have it on the stack. */
5404 		kcdata_compression_window_open(stackshot_kcdata_p);
5405 
5406 		if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
5407 			stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5408 		}
5409 
5410 		if (num_thread_groups > 0) {
5411 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v3), num_thread_groups, &out_addr));
5412 			thread_groups = (struct thread_group_snapshot_v3 *)out_addr;
5413 		}
5414 
5415 		if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
5416 			error = KERN_FAILURE;
5417 			goto error_exit;
5418 		}
5419 
5420 		kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5421 	}
5422 
5423 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5424 	if (!stackshot_ctx.sc_panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
5425 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
5426 		    "thread_groups_cpu_cycle_count"));
5427 	}
5428 #endif
5429 #else
5430 	stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5431 #endif /* CONFIG_THREAD_GROUPS */
5432 
5433 
5434 #if STACKSHOT_COLLECTS_LATENCY_INFO
5435 	stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.setup_latency_mt;
5436 	if (stackshot_ctx.sc_is_singlethreaded) {
5437 		stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time();
5438 	} else {
5439 		stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time();
5440 	}
5441 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5442 
5443 	bool const process_scoped = (stackshot_args.pid != -1) &&
5444 	    ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
5445 
5446 	/* Iterate over tasks */
5447 	queue_iterate(&tasks, task, task_t, tasks)
5448 	{
5449 		stackshot_panic_guard();
5450 
5451 		if (collect_delta_stackshot) {
5452 			uint64_t abstime;
5453 			proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &abstime);
5454 
5455 			if (abstime > last_task_start_time) {
5456 				last_task_start_time = abstime;
5457 			}
5458 		}
5459 
5460 		pid_t task_pid = pid_from_task(task);
5461 
5462 		if (process_scoped && (task_pid != stackshot_args.pid)) {
5463 			continue;
5464 		}
5465 
5466 		if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5467 		    (!queue_empty(&task->threads) && task_pid != -1)) {
5468 			tasks_in_stackshot++;
5469 			threads_in_stackshot += task->thread_count;
5470 		}
5471 
5472 		/* If this is a singlethreaded stackshot, don't use the work queues. */
5473 		if (stackshot_ctx.sc_is_singlethreaded) {
5474 			kcd_exit_on_error(kdp_stackshot_record_task(task));
5475 		} else {
5476 			kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5477 				.sswi_task = task,
5478 				.sswi_data = NULL,
5479 				.sswi_idx = cur_workitem_index++
5480 			}));
5481 		}
5482 
5483 		if (process_scoped) {
5484 			/* Only targeting one process, we're done now. */
5485 			break;
5486 		}
5487 	}
5488 
5489 #if STACKSHOT_COLLECTS_LATENCY_INFO
5490 	if (stackshot_ctx.sc_is_singlethreaded) {
5491 		stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_task_iteration_latency_mt;
5492 	} else {
5493 		stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.task_queue_building_latency_mt;
5494 	}
5495 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5496 
5497 	/* Setup post-task kcdata buffer */
5498 	if (!stackshot_ctx.sc_is_singlethreaded) {
5499 		stackshot_finalize_linked_kcdata();
5500 		kcd_exit_on_error(stackshot_new_linked_kcdata());
5501 		stackshot_ctx.sc_posttask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5502 	}
5503 
5504 #if CONFIG_COALITIONS
5505 	/* Don't collect jetsam coalition snapshots in delta stackshots - these don't change */
5506 	if (!collect_delta_stackshot || (last_task_start_time > stackshot_args.since_timestamp)) {
5507 		int num_coalitions = 0;
5508 		struct jetsam_coalition_snapshot *coalitions = NULL;
5509 
5510 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5511 		uint64_t coalition_begin_cpu_cycle_count = 0;
5512 
5513 		if (!stackshot_ctx.sc_panic_stackshot && (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
5514 			coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5515 		}
5516 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5517 
5518 		/* Iterate over coalitions */
5519 		if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5520 			if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5521 				stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5522 			}
5523 		}
5524 		if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5525 			if (num_coalitions > 0) {
5526 				/* Variable size array - better not have it on the stack. */
5527 				kcdata_compression_window_open(stackshot_kcdata_p);
5528 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
5529 				coalitions = (struct jetsam_coalition_snapshot*)out_addr;
5530 
5531 				if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5532 					error = KERN_FAILURE;
5533 					goto error_exit;
5534 				}
5535 
5536 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5537 			}
5538 		}
5539 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5540 		if (!stackshot_ctx.sc_panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
5541 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
5542 			    "coalitions_cpu_cycle_count"));
5543 		}
5544 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5545 	}
5546 #else
5547 	stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5548 #endif /* CONFIG_COALITIONS */
5549 
5550 	stackshot_panic_guard();
5551 
5552 #if STACKSHOT_COLLECTS_LATENCY_INFO
5553 	if (stackshot_ctx.sc_is_singlethreaded) {
5554 		stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time();
5555 	} else {
5556 		stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time();
5557 	}
5558 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5559 
5560 	/*
5561 	 * Iterate over the tasks in the terminated tasks list. We only inspect
5562 	 * tasks that have a valid bsd_info pointer. The check for task transition
5563 	 * like past P_LPEXIT during proc_exit() is now checked for inside the
5564 	 * kdp_stackshot_record_task(), and then a safer and minimal
5565 	 * transitioning_task_snapshot struct is collected via
5566 	 * kcdata_record_transitioning_task_snapshot()
5567 	 */
5568 	queue_iterate(&terminated_tasks, task, task_t, tasks)
5569 	{
5570 		stackshot_panic_guard();
5571 
5572 		if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5573 		    (!queue_empty(&task->threads) && pid_from_task(task) != -1)) {
5574 			tasks_in_stackshot++;
5575 			threads_in_stackshot += task->thread_count;
5576 		}
5577 
5578 		/* Only use workqueues on non-panic and non-scoped stackshots. */
5579 		if (stackshot_ctx.sc_is_singlethreaded) {
5580 			kcd_exit_on_error(kdp_stackshot_record_task(task));
5581 		} else {
5582 			kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5583 				.sswi_task = task,
5584 				.sswi_data = NULL,
5585 				.sswi_idx = cur_workitem_index++
5586 			}));
5587 		}
5588 	}
5589 
5590 	/* Mark the queue(s) as populated. */
5591 	for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5592 		os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_populated, true, release);
5593 	}
5594 
5595 #if DEVELOPMENT || DEBUG
5596 	kcd_exit_on_error(kdp_stackshot_plh_stats());
5597 #endif /* DEVELOPMENT || DEBUG */
5598 
5599 #if STACKSHOT_COLLECTS_LATENCY_INFO
5600 	if (stackshot_ctx.sc_is_singlethreaded) {
5601 		stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt;
5602 	} else {
5603 		stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt;
5604 	}
5605 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5606 
5607 #if STACKSHOT_COLLECTS_LATENCY_INFO
5608 	if (collect_latency_info) {
5609 		stackshot_ctx.sc_latency.latency_version = 2;
5610 		stackshot_ctx.sc_latency.main_cpu_number = stackshot_ctx.sc_main_cpuid;
5611 		stackshot_ctx.sc_latency.calling_cpu_number = stackshot_ctx.sc_calling_cpuid;
5612 	}
5613 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5614 
5615 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5616 	if (!stackshot_ctx.sc_panic_stackshot) {
5617 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
5618 		    "stackshot_total_cpu_cycle_cnt"));
5619 	}
5620 #endif
5621 
5622 	kcdata_add_uint64_with_description(stackshot_kcdata_p, tasks_in_stackshot, "stackshot_tasks_count");
5623 	kcdata_add_uint64_with_description(stackshot_kcdata_p, threads_in_stackshot, "stackshot_threads_count");
5624 
5625 	stackshot_panic_guard();
5626 
5627 	if (!stackshot_ctx.sc_is_singlethreaded) {
5628 		/* Chip away at the queue. */
5629 		stackshot_finalize_linked_kcdata();
5630 		stackshot_cpu_do_work();
5631 		*stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_tail->kcdata;
5632 	}
5633 
5634 #if CONFIG_EXCLAVES
5635 	/* If this is the panic stackshot, check if Exclaves panic left its stackshot in the shared region */
5636 	if (stackshot_ctx.sc_panic_stackshot) {
5637 		struct exclaves_panic_stackshot excl_ss;
5638 		kdp_read_panic_exclaves_stackshot(&excl_ss);
5639 
5640 		if (excl_ss.stackshot_buffer != NULL && excl_ss.stackshot_buffer_size != 0) {
5641 			tb_error_t tberr = TB_ERROR_SUCCESS;
5642 			exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_FOUND;
5643 
5644 			/* this block does not escape, so this is okay... */
5645 			kern_return_t *error_in_block = &error;
5646 			kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
5647 			    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5648 			tberr = stackshot_stackshotresult__unmarshal(excl_ss.stackshot_buffer, excl_ss.stackshot_buffer_size, ^(stackshot_stackshotresult_s result){
5649 				*error_in_block = stackshot_exclaves_process_stackshot(&result, stackshot_kcdata_p, false);
5650 			});
5651 			kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5652 			    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5653 			if (tberr != TB_ERROR_SUCCESS) {
5654 				exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_DECODE_FAILED;
5655 			}
5656 		} else {
5657 			exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_NOT_FOUND;
5658 		}
5659 
5660 		/* check error from the block */
5661 		kcd_exit_on_error(error);
5662 	}
5663 #endif
5664 
5665 	/*  === END of populating stackshot data === */
5666 error_exit:;
5667 	if (error != KERN_SUCCESS) {
5668 		stackshot_set_error(error);
5669 	}
5670 
5671 	stackshot_panic_guard();
5672 
5673 	return error;
5674 }
5675 
5676 static uint64_t
proc_was_throttled_from_task(task_t task)5677 proc_was_throttled_from_task(task_t task)
5678 {
5679 	uint64_t was_throttled = 0;
5680 	void *bsd_info = get_bsdtask_info(task);
5681 
5682 	if (bsd_info) {
5683 		was_throttled = proc_was_throttled(bsd_info);
5684 	}
5685 
5686 	return was_throttled;
5687 }
5688 
5689 static uint64_t
proc_did_throttle_from_task(task_t task)5690 proc_did_throttle_from_task(task_t task)
5691 {
5692 	uint64_t did_throttle = 0;
5693 	void *bsd_info = get_bsdtask_info(task);
5694 
5695 	if (bsd_info) {
5696 		did_throttle = proc_did_throttle(bsd_info);
5697 	}
5698 
5699 	return did_throttle;
5700 }
5701 
5702 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)5703 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
5704 {
5705 	unsigned int pages_reclaimed;
5706 	unsigned int pages_wanted;
5707 	kern_return_t kErr;
5708 
5709 	uint64_t compressions = 0;
5710 	uint64_t decompressions = 0;
5711 
5712 	compressions = counter_load(&vm_statistics_compressions);
5713 	decompressions = counter_load(&vm_statistics_decompressions);
5714 
5715 	memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
5716 	memio_snap->free_pages = vm_page_free_count;
5717 	memio_snap->active_pages = vm_page_active_count;
5718 	memio_snap->inactive_pages = vm_page_inactive_count;
5719 	memio_snap->purgeable_pages = vm_page_purgeable_count;
5720 	memio_snap->wired_pages = vm_page_wire_count;
5721 	memio_snap->speculative_pages = vm_page_speculative_count;
5722 	memio_snap->throttled_pages = vm_page_throttled_count;
5723 	memio_snap->busy_buffer_count = count_busy_buffers();
5724 	memio_snap->filebacked_pages = vm_page_pageable_external_count;
5725 	memio_snap->compressions = (uint32_t)compressions;
5726 	memio_snap->decompressions = (uint32_t)decompressions;
5727 	memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
5728 	kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
5729 
5730 	if (!kErr) {
5731 		memio_snap->pages_wanted = (uint32_t)pages_wanted;
5732 		memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
5733 		memio_snap->pages_wanted_reclaimed_valid = 1;
5734 	} else {
5735 		memio_snap->pages_wanted = 0;
5736 		memio_snap->pages_reclaimed = 0;
5737 		memio_snap->pages_wanted_reclaimed_valid = 0;
5738 	}
5739 }
5740 
5741 #if HAS_MTE
5742 static kern_return_t
stackshot_mteinfo_snapshot(kcdata_descriptor_t data)5743 stackshot_mteinfo_snapshot(kcdata_descriptor_t data)
5744 {
5745 	kern_return_t error = KERN_SUCCESS;
5746 	mach_vm_address_t out_addr = 0;
5747 
5748 	kcdata_compression_window_open(stackshot_kcdata_p);
5749 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(data, STACKSHOT_KCTYPE_MTEINFO_CELL, sizeof(struct mte_info_cell), mte_tag_storage_count, &out_addr));
5750 
5751 	kdp_mteinfo_snapshot((struct mte_info_cell*)out_addr, mte_tag_storage_count);
5752 
5753 	kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5754 
5755 error_exit:
5756 	return error;
5757 }
5758 #endif
5759 
5760 static vm_offset_t
stackshot_find_phys(vm_map_t map,vm_offset_t target_addr,kdp_fault_flags_t fault_flags,uint32_t * kdp_fault_result_flags)5761 stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags)
5762 {
5763 	vm_offset_t result;
5764 	struct kdp_fault_result fault_results = {0};
5765 	if (stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting) {
5766 		fault_flags &= ~KDP_FAULT_FLAGS_ENABLE_FAULTING;
5767 	}
5768 	if (!stackshot_ctx.sc_panic_stackshot) {
5769 		fault_flags |= KDP_FAULT_FLAGS_MULTICPU;
5770 	}
5771 
5772 	result = kdp_find_phys(map, target_addr, fault_flags, &fault_results);
5773 
5774 	if ((fault_results.flags & KDP_FAULT_RESULT_TRIED_FAULT) || (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN)) {
5775 		stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting += fault_results.time_spent_faulting;
5776 
5777 #if STACKSHOT_COLLECTS_LATENCY_INFO
5778 		stackshot_cpu_latency.faulting_time_mt += fault_results.time_spent_faulting;
5779 #endif
5780 
5781 		if ((stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting >= stackshot_max_fault_time) && !stackshot_ctx.sc_panic_stackshot) {
5782 			stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
5783 		}
5784 	}
5785 
5786 	if (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN) {
5787 		stackshot_cpu_ctx.scc_fault_stats.sfs_pages_faulted_in++;
5788 	}
5789 
5790 	if (kdp_fault_result_flags) {
5791 		*kdp_fault_result_flags = fault_results.flags;
5792 	}
5793 
5794 	return result;
5795 }
5796 
5797 /*
5798  * Wrappers around kdp_generic_copyin, kdp_generic_copyin_word, kdp_generic_copyin_string that use stackshot_find_phys
5799  * in order to:
5800  *   1. collect statistics on the number of pages faulted in
5801  *   2. stop faulting if the time spent faulting has exceeded the limit.
5802  */
5803 static boolean_t
stackshot_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5804 stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5805 {
5806 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5807 	if (try_fault) {
5808 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5809 	}
5810 	return kdp_generic_copyin(map, uaddr, dest, size, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5811 }
5812 static boolean_t
stackshot_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5813 stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5814 {
5815 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5816 	if (try_fault) {
5817 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5818 	}
5819 	return kdp_generic_copyin_word(task, addr, result, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5820 }
5821 static int
stackshot_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5822 stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5823 {
5824 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5825 	if (try_fault) {
5826 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5827 	}
5828 	return kdp_generic_copyin_string(task, addr, buf, buf_sz, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags);
5829 }
5830 
5831 kern_return_t
do_stackshot(void * context)5832 do_stackshot(void *context)
5833 {
5834 #pragma unused(context)
5835 	kern_return_t error;
5836 	size_t queue_size;
5837 	uint64_t abs_time = mach_absolute_time(), abs_time_end = 0;
5838 	kdp_snapshot++;
5839 
5840 	if (!stackshot_ctx.sc_is_singlethreaded) {
5841 #if defined(__arm64__)
5842 		/*
5843 		 * Set up buffers. We used the ssb_size entry in each buffer entry
5844 		 * to indicate how many CPUs in that cluster are participating in the
5845 		 * stackshot, so that we can divvy up buffer space accordingly.
5846 		 */
5847 		size_t buf_per_cpu = stackshot_args.buffer_size / os_atomic_load(&stackshot_ctx.sc_cpus_working, relaxed);
5848 		buf_per_cpu -= buf_per_cpu % sizeof(uint64_t); /* align to uint64_t */
5849 		mach_vm_address_t cur_addr = (mach_vm_address_t) stackshot_args.buffer;
5850 		for (int buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
5851 			size_t bufsz = buf_per_cpu * stackshot_ctx.sc_buffers[buf_idx].ssb_size;
5852 			if (bufsz == 0) {
5853 				continue;
5854 			}
5855 			stackshot_ctx.sc_buffers[buf_idx] = (struct stackshot_buffer) {
5856 				.ssb_ptr = (void*) cur_addr,
5857 				.ssb_size = bufsz,
5858 				.ssb_used = 0,
5859 				.ssb_freelist = NULL,
5860 				.ssb_freelist_lock = 0,
5861 				.ssb_overhead = 0
5862 			};
5863 			cur_addr += bufsz;
5864 		}
5865 		assert(cur_addr <= ((mach_vm_address_t) stackshot_args.buffer + stackshot_args.buffer_size));
5866 #else /* __arm64__ */
5867 		/*
5868 		 * On Intel, we always just have one buffer
5869 		 */
5870 		stackshot_ctx.sc_buffers[0] = (struct stackshot_buffer) {
5871 			.ssb_ptr = stackshot_args.buffer,
5872 			.ssb_size = stackshot_args.buffer_size,
5873 			.ssb_used = 0,
5874 			.ssb_freelist = NULL,
5875 			.ssb_freelist_lock = 0,
5876 			.ssb_overhead = 0
5877 		};
5878 #endif /* !__arm64__ */
5879 
5880 		/* Set up queues. These numbers shouldn't change, but slightly fudge queue size just in case. */
5881 		queue_size = FUDGED_SIZE(tasks_count + terminated_tasks_count, 10);
5882 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5883 			stackshot_ctx.sc_workqueues[i] = (struct stackshot_workqueue) {
5884 				.sswq_items     = stackshot_alloc_arr(struct stackshot_workitem, queue_size, &error),
5885 				.sswq_capacity  = queue_size,
5886 				.sswq_num_items = 0,
5887 				.sswq_cur_item  = 0,
5888 				.sswq_populated = false
5889 			};
5890 			if (error != KERN_SUCCESS) {
5891 				break;
5892 			}
5893 		}
5894 	}
5895 
5896 	_stackshot_validation_reset();
5897 	error = stackshot_plh_setup(); /* set up port label hash */
5898 
5899 	if (error != KERN_SUCCESS) {
5900 		stackshot_set_error(error);
5901 		return error;
5902 	}
5903 
5904 	/*
5905 	 * If no main CPU has been selected at this point, (since every CPU has
5906 	 * called stackshot_cpu_preflight by now), then there was no CLPC
5907 	 * recommended P-core available. In that case, we should volunteer ourself
5908 	 * to be the main CPU, because someone has to do it.
5909 	 */
5910 	if (stackshot_ctx.sc_main_cpuid == -1) {
5911 		os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, cpu_number(), acquire);
5912 		stackshot_cpu_ctx.scc_can_work = true;
5913 	}
5914 
5915 	/* After this, auxiliary CPUs can begin work. */
5916 	os_atomic_store(&stackshot_ctx.sc_state, SS_RUNNING, release);
5917 
5918 	/* If we are the main CPU, populate the queues / do other main CPU work. */
5919 	if (stackshot_ctx.sc_panic_stackshot || (stackshot_ctx.sc_main_cpuid == cpu_number())) {
5920 		stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5921 	} else if (stackshot_cpu_ctx.scc_can_work) {
5922 		stackshot_cpu_do_work();
5923 	}
5924 
5925 	/* Wait for every CPU to finish. */
5926 #if STACKSHOT_COLLECTS_LATENCY_INFO
5927 	stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time();
5928 #endif
5929 	if (stackshot_cpu_ctx.scc_can_work) {
5930 		os_atomic_dec(&stackshot_ctx.sc_cpus_working, seq_cst);
5931 		stackshot_cpu_ctx.scc_can_work = false;
5932 	}
5933 	while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
5934 		loop_wait();
5935 	}
5936 	stackshot_panic_guard();
5937 #if STACKSHOT_COLLECTS_LATENCY_INFO
5938 	stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.cpu_wait_latency_mt;
5939 #endif
5940 
5941 	/* update timestamp of the stackshot */
5942 	abs_time_end = mach_absolute_time();
5943 	stackshot_ctx.sc_duration = (struct stackshot_duration_v2) {
5944 		.stackshot_duration       = (abs_time_end - abs_time),
5945 		.stackshot_duration_outer = 0,
5946 		.stackshot_duration_prior = stackshot_duration_prior_abs,
5947 	};
5948 
5949 	stackshot_plh_reset();
5950 
5951 	/* Check interrupts disabled time. */
5952 #if SCHED_HYGIENE_DEBUG
5953 	bool disable_interrupts_masked_check = kern_feature_override(
5954 		KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
5955 	    (stackshot_flags & STACKSHOT_DO_COMPRESS) != 0;
5956 
5957 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
5958 	disable_interrupts_masked_check = true;
5959 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
5960 
5961 	if (disable_interrupts_masked_check) {
5962 		ml_spin_debug_clear_self();
5963 	}
5964 
5965 	if (!stackshot_ctx.sc_panic_stackshot && interrupt_masked_debug_mode) {
5966 		/*
5967 		 * Try to catch instances where stackshot takes too long BEFORE returning from
5968 		 * the debugger
5969 		 */
5970 		ml_handle_stackshot_interrupt_disabled_duration(current_thread());
5971 	}
5972 #endif /* SCHED_HYGIENE_DEBUG */
5973 
5974 	kdp_snapshot--;
5975 
5976 	/* If any other CPU had an error, make sure we return it */
5977 	if (stackshot_ctx.sc_retval == KERN_SUCCESS) {
5978 		stackshot_ctx.sc_retval = stackshot_status_check();
5979 	}
5980 
5981 #if CONFIG_EXCLAVES
5982 	/* Avoid setting AST until as late as possible, in case the stackshot fails */
5983 	if (!stackshot_ctx.sc_panic_stackshot && stackshot_ctx.sc_retval == KERN_SUCCESS) {
5984 		commit_exclaves_ast();
5985 	}
5986 	if (stackshot_ctx.sc_retval != KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
5987 		/* Clear inspection CTID list: no need to wait for these threads */
5988 		stackshot_cleanup_exclave_waitlist();
5989 	}
5990 #endif
5991 
5992 	/* If this is a singlethreaded stackshot, the "final" kcdata buffer is just our CPU's kcdata buffer */
5993 	if (stackshot_ctx.sc_is_singlethreaded) {
5994 		stackshot_ctx.sc_finalized_kcdata = stackshot_kcdata_p;
5995 	}
5996 
5997 	return stackshot_ctx.sc_retval;
5998 }
5999 
6000 kern_return_t
do_panic_stackshot(void * context)6001 do_panic_stackshot(void *context)
6002 {
6003 	kern_return_t ret = do_stackshot(context);
6004 	if (ret != KERN_SUCCESS) {
6005 		goto out;
6006 	}
6007 
6008 	ret = stackshot_finalize_singlethreaded_kcdata();
6009 
6010 out:
6011 	return ret;
6012 }
6013 
6014 /*
6015  * Set up needed state for this CPU before participating in a stackshot.
6016  * Namely, we want to signal that we're available to do work.
6017  * Called while interrupts are disabled & in the debugger trap.
6018  */
6019 void
stackshot_cpu_preflight(void)6020 stackshot_cpu_preflight(void)
6021 {
6022 	bool is_recommended, is_calling_cpu;
6023 	int my_cpu_no = cpu_number();
6024 
6025 #if STACKSHOT_COLLECTS_LATENCY_INFO
6026 	stackshot_cpu_latency = (typeof(stackshot_cpu_latency)) {
6027 		.cpu_number            =  cpu_number(),
6028 #if defined(__AMP__)
6029 		.cluster_type          =  current_cpu_datap()->cpu_cluster_type,
6030 #else /* __AMP__ */
6031 		.cluster_type = CLUSTER_TYPE_SMP,
6032 #endif /* __AMP__ */
6033 		.faulting_time_mt      = 0,
6034 		.total_buf             = 0,
6035 		.intercluster_buf_used = 0
6036 	};
6037 #if CONFIG_PERVASIVE_CPI
6038 	mt_cur_cpu_cycles_instrs_speculative(&stackshot_cpu_latency.total_cycles, &stackshot_cpu_latency.total_instrs);
6039 #endif /* CONFIG_PERVASIVE_CPI */
6040 	stackshot_cpu_latency.init_latency_mt = stackshot_cpu_latency.total_latency_mt = mach_absolute_time();
6041 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
6042 
6043 	is_recommended = current_processor()->is_recommended;
6044 
6045 	/* If this is a recommended P-core (or SMP), try making it the main CPU */
6046 	if (is_recommended
6047 #if defined(__AMP__)
6048 	    && current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_P
6049 #endif /* __AMP__ */
6050 	    ) {
6051 		os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, my_cpu_no, acquire);
6052 	}
6053 
6054 	is_calling_cpu = stackshot_ctx.sc_calling_cpuid == my_cpu_no;
6055 
6056 	stackshot_cpu_ctx.scc_did_work = false;
6057 	stackshot_cpu_ctx.scc_can_work = is_calling_cpu || (is_recommended && !stackshot_ctx.sc_is_singlethreaded);
6058 
6059 	if (stackshot_cpu_ctx.scc_can_work) {
6060 		/*
6061 		 * Increase size of our cluster's buffer to indicate how many CPUs in this
6062 		 * cluster are participating
6063 		 */
6064 #if defined(__arm64__)
6065 		os_atomic_inc(&stackshot_ctx.sc_buffers[cpu_cluster_id()].ssb_size, relaxed);
6066 #endif /* __arm64__ */
6067 		os_atomic_inc(&stackshot_ctx.sc_cpus_working, seq_cst);
6068 	}
6069 }
6070 
6071 __result_use_check
6072 static kern_return_t
stackshot_cpu_work_on_queue(struct stackshot_workqueue * queue)6073 stackshot_cpu_work_on_queue(struct stackshot_workqueue *queue)
6074 {
6075 	struct stackshot_workitem     *cur_workitemp;
6076 	kern_return_t                  error = KERN_SUCCESS;
6077 
6078 	while (((cur_workitemp = stackshot_get_workitem(queue)) != NULL || !os_atomic_load(&queue->sswq_populated, acquire))) {
6079 		/* Check to make sure someone hasn't errored out or panicked. */
6080 		if (__improbable(stackshot_status_check() != KERN_SUCCESS)) {
6081 			return KERN_ABORTED;
6082 		}
6083 
6084 		if (cur_workitemp) {
6085 			kcd_exit_on_error(stackshot_new_linked_kcdata());
6086 			cur_workitemp->sswi_data = stackshot_cpu_ctx.scc_kcdata_head;
6087 			kcd_exit_on_error(kdp_stackshot_record_task(cur_workitemp->sswi_task));
6088 			stackshot_finalize_linked_kcdata();
6089 		} else {
6090 #if STACKSHOT_COLLECTS_LATENCY_INFO
6091 			uint64_t time_begin = mach_absolute_time();
6092 #endif
6093 			loop_wait();
6094 #if STACKSHOT_COLLECTS_LATENCY_INFO
6095 			stackshot_cpu_latency.workqueue_latency_mt += mach_absolute_time() - time_begin;
6096 #endif
6097 		}
6098 	}
6099 
6100 error_exit:
6101 	return error;
6102 }
6103 
6104 static void
stackshot_cpu_do_work(void)6105 stackshot_cpu_do_work(void)
6106 {
6107 	kern_return_t                  error;
6108 
6109 	stackshot_cpu_ctx.scc_stack_buffer = stackshot_alloc_arr(uintptr_t, MAX_FRAMES, &error);
6110 	if (error != KERN_SUCCESS) {
6111 		goto error_exit;
6112 	}
6113 
6114 #if STACKSHOT_COLLECTS_LATENCY_INFO
6115 	stackshot_cpu_latency.init_latency_mt = mach_absolute_time() - stackshot_cpu_latency.init_latency_mt;
6116 #endif
6117 
6118 	bool high_perf = true;
6119 
6120 #if defined(__AMP__)
6121 	if (current_cpu_datap()->cpu_cluster_type != CLUSTER_TYPE_P) {
6122 		high_perf = false;
6123 	}
6124 #endif /* __AMP__ */
6125 
6126 	if (high_perf) {
6127 		/* High Perf: Work from most difficult to least difficult */
6128 		for (size_t i = STACKSHOT_NUM_WORKQUEUES; i > 0; i--) {
6129 			kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i - 1]));
6130 		}
6131 	} else {
6132 		/* Low Perf: Work from least difficult to most difficult */
6133 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
6134 			kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i]));
6135 		}
6136 	}
6137 #if STACKSHOT_COLLECTS_LATENCY_INFO
6138 	stackshot_cpu_latency.total_latency_mt = mach_absolute_time() - stackshot_cpu_latency.total_latency_mt;
6139 #if CONFIG_PERVASIVE_CPI
6140 	uint64_t cycles, instrs;
6141 	mt_cur_cpu_cycles_instrs_speculative(&cycles, &instrs);
6142 	stackshot_cpu_latency.total_cycles = cycles - stackshot_cpu_latency.total_cycles;
6143 	stackshot_cpu_latency.total_instrs = instrs - stackshot_cpu_latency.total_instrs;
6144 #endif /* CONFIG_PERVASIVE_CPI */
6145 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
6146 
6147 error_exit:
6148 	if (error != KERN_SUCCESS) {
6149 		stackshot_set_error(error);
6150 	}
6151 	stackshot_panic_guard();
6152 }
6153 
6154 /*
6155  * This is where the other CPUs will end up when we take a stackshot.
6156  * If they're available to do work, they'll do so here.
6157  * Called with interrupts disabled & from the debugger trap.
6158  */
6159 void
stackshot_aux_cpu_entry(void)6160 stackshot_aux_cpu_entry(void)
6161 {
6162 	/*
6163 	 * This is where the other CPUs will end up when we take a stackshot.
6164 	 * Also, the main CPU will call this in the middle of its work to chip
6165 	 * away at the queue.
6166 	 */
6167 
6168 	/* Don't do work if we said we couldn't... */
6169 	if (!stackshot_cpu_ctx.scc_can_work) {
6170 		return;
6171 	}
6172 
6173 	/* Spin until we're ready to run. */
6174 	while (os_atomic_load(&stackshot_ctx.sc_state, acquire) == SS_SETUP) {
6175 		loop_wait();
6176 	}
6177 
6178 	/* Check to make sure the setup didn't error out or panic. */
6179 	if (stackshot_status_check() != KERN_SUCCESS) {
6180 		goto exit;
6181 	}
6182 
6183 	/* the CPU entering here is participating in the stackshot */
6184 	stackshot_cpu_ctx.scc_did_work = true;
6185 
6186 	if (stackshot_ctx.sc_main_cpuid == cpu_number()) {
6187 		stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
6188 	} else {
6189 		stackshot_cpu_do_work();
6190 	}
6191 
6192 exit:
6193 	os_atomic_dec(&stackshot_ctx.sc_cpus_working, release);
6194 }
6195 
6196 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)6197 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
6198 {
6199 	/* When the pthread kext puts a worker thread to sleep, it will
6200 	 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
6201 	 * struct. See parkit() in kern/kern_support.c in libpthread.
6202 	 */
6203 	return (thread->state & TH_WAIT) &&
6204 	       (thread->block_hint == kThreadWaitParkedWorkQueue);
6205 }
6206 
6207 #if CONFIG_COALITIONS
6208 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)6209 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
6210 {
6211 #pragma unused(i, coal)
6212 	unsigned int *coalition_count = (unsigned int*)arg;
6213 	(*coalition_count)++;
6214 }
6215 
6216 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)6217 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
6218 {
6219 	if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
6220 		return;
6221 	}
6222 
6223 	struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
6224 	struct jetsam_coalition_snapshot *jcs = &coalitions[i];
6225 	task_t leader = TASK_NULL;
6226 	jcs->jcs_id = coalition_id(coal);
6227 	jcs->jcs_flags = 0;
6228 	jcs->jcs_thread_group = 0;
6229 
6230 	if (coalition_term_requested(coal)) {
6231 		jcs->jcs_flags |= kCoalitionTermRequested;
6232 	}
6233 	if (coalition_is_terminated(coal)) {
6234 		jcs->jcs_flags |= kCoalitionTerminated;
6235 	}
6236 	if (coalition_is_reaped(coal)) {
6237 		jcs->jcs_flags |= kCoalitionReaped;
6238 	}
6239 	if (coalition_is_privileged(coal)) {
6240 		jcs->jcs_flags |= kCoalitionPrivileged;
6241 	}
6242 
6243 #if CONFIG_THREAD_GROUPS
6244 	struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
6245 	if (thread_group) {
6246 		jcs->jcs_thread_group = thread_group_get_id(thread_group);
6247 	}
6248 #endif /* CONFIG_THREAD_GROUPS */
6249 
6250 	leader = kdp_coalition_get_leader(coal);
6251 	if (leader) {
6252 		jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
6253 	} else {
6254 		jcs->jcs_leader_task_uniqueid = 0;
6255 	}
6256 }
6257 #endif /* CONFIG_COALITIONS */
6258 
6259 #if CONFIG_THREAD_GROUPS
6260 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)6261 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
6262 {
6263 #pragma unused(i, tg)
6264 	unsigned int *n = (unsigned int*)arg;
6265 	(*n)++;
6266 }
6267 
6268 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)6269 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
6270 {
6271 	struct thread_group_snapshot_v3 *thread_groups = arg;
6272 	struct thread_group_snapshot_v3 *tgs = &thread_groups[i];
6273 	const char *name = thread_group_get_name(tg);
6274 	uint32_t flags = thread_group_get_flags(tg);
6275 	tgs->tgs_id = thread_group_get_id(tg);
6276 	static_assert(THREAD_GROUP_MAXNAME > sizeof(tgs->tgs_name));
6277 	kdp_memcpy(tgs->tgs_name, name, sizeof(tgs->tgs_name));
6278 	kdp_memcpy(tgs->tgs_name_cont, name + sizeof(tgs->tgs_name),
6279 	    sizeof(tgs->tgs_name_cont));
6280 	tgs->tgs_flags =
6281 	    ((flags & THREAD_GROUP_FLAGS_EFFICIENT)     ? kThreadGroupEfficient     : 0) |
6282 	    ((flags & THREAD_GROUP_FLAGS_APPLICATION)   ? kThreadGroupApplication   : 0) |
6283 	    ((flags & THREAD_GROUP_FLAGS_CRITICAL)      ? kThreadGroupCritical      : 0) |
6284 	    ((flags & THREAD_GROUP_FLAGS_BEST_EFFORT)   ? kThreadGroupBestEffort    : 0) |
6285 	    ((flags & THREAD_GROUP_FLAGS_UI_APP)        ? kThreadGroupUIApplication : 0) |
6286 	    ((flags & THREAD_GROUP_FLAGS_MANAGED)       ? kThreadGroupManaged       : 0) |
6287 	    ((flags & THREAD_GROUP_FLAGS_STRICT_TIMERS) ? kThreadGroupStrictTimers  : 0) |
6288 	    0;
6289 }
6290 #endif /* CONFIG_THREAD_GROUPS */
6291 
6292 /* Determine if a thread has waitinfo that stackshot can provide */
6293 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)6294 stackshot_thread_has_valid_waitinfo(thread_t thread)
6295 {
6296 	if (!(thread->state & TH_WAIT)) {
6297 		return 0;
6298 	}
6299 
6300 	switch (thread->block_hint) {
6301 	// If set to None or is a parked work queue, ignore it
6302 	case kThreadWaitParkedWorkQueue:
6303 	case kThreadWaitNone:
6304 		return 0;
6305 	// There is a short window where the pthread kext removes a thread
6306 	// from its ksyn wait queue before waking the thread up
6307 	case kThreadWaitPThreadMutex:
6308 	case kThreadWaitPThreadRWLockRead:
6309 	case kThreadWaitPThreadRWLockWrite:
6310 	case kThreadWaitPThreadCondVar:
6311 		return kdp_pthread_get_thread_kwq(thread) != NULL;
6312 	// All other cases are valid block hints if in a wait state
6313 	default:
6314 		return 1;
6315 	}
6316 }
6317 
6318 /* Determine if a thread has turnstileinfo that stackshot can provide */
6319 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)6320 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
6321 {
6322 	struct turnstile *ts = thread_get_waiting_turnstile(thread);
6323 
6324 	return stackshot_thread_has_valid_waitinfo(thread) &&
6325 	       ts != TURNSTILE_NULL;
6326 }
6327 
6328 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_v2_t * tsinfo)6329 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo)
6330 {
6331 	struct turnstile *ts;
6332 	struct ipc_service_port_label *ispl = NULL;
6333 
6334 	/* acquire turnstile information and store it in the stackshot */
6335 	ts = thread_get_waiting_turnstile(thread);
6336 	tsinfo->waiter = thread_tid(thread);
6337 	kdp_turnstile_fill_tsinfo(ts, tsinfo, &ispl);
6338 	tsinfo->portlabel_id = stackshot_plh_lookup(ispl,
6339 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_SENDPORT) ? STACKSHOT_PLH_LOOKUP_SEND :
6340 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_RECEIVEPORT) ? STACKSHOT_PLH_LOOKUP_RECEIVE :
6341 	    STACKSHOT_PLH_LOOKUP_UNKNOWN);
6342 }
6343 
6344 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_v2_t * waitinfo)6345 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t *waitinfo)
6346 {
6347 	thread_waitinfo_t *waitinfo_v1 = (thread_waitinfo_t *)waitinfo;
6348 	struct ipc_service_port_label *ispl = NULL;
6349 
6350 	waitinfo->waiter        = thread_tid(thread);
6351 	waitinfo->wait_type     = thread->block_hint;
6352 	waitinfo->wait_flags    = 0;
6353 
6354 	switch (waitinfo->wait_type) {
6355 	case kThreadWaitKernelMutex:
6356 		kdp_lck_mtx_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6357 		break;
6358 	case kThreadWaitPortReceive:
6359 		kdp_mqueue_recv_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6360 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_RECEIVE);
6361 		break;
6362 	case kThreadWaitPortSend:
6363 		kdp_mqueue_send_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6364 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_SEND);
6365 		break;
6366 	case kThreadWaitSemaphore:
6367 		kdp_sema_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6368 		break;
6369 	case kThreadWaitUserLock:
6370 		kdp_ulock_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6371 		break;
6372 	case kThreadWaitKernelRWLockRead:
6373 	case kThreadWaitKernelRWLockWrite:
6374 	case kThreadWaitKernelRWLockUpgrade:
6375 		kdp_rwlck_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6376 		break;
6377 	case kThreadWaitPThreadMutex:
6378 	case kThreadWaitPThreadRWLockRead:
6379 	case kThreadWaitPThreadRWLockWrite:
6380 	case kThreadWaitPThreadCondVar:
6381 		kdp_pthread_find_owner(thread, waitinfo_v1);
6382 		break;
6383 	case kThreadWaitWorkloopSyncWait:
6384 		kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo_v1);
6385 		break;
6386 	case kThreadWaitOnProcess:
6387 		kdp_wait4_find_process(thread, thread->wait_event, waitinfo_v1);
6388 		break;
6389 	case kThreadWaitSleepWithInheritor:
6390 		kdp_sleep_with_inheritor_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6391 		break;
6392 	case kThreadWaitEventlink:
6393 		kdp_eventlink_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6394 		break;
6395 	case kThreadWaitCompressor:
6396 		kdp_compressor_busy_find_owner(thread->wait_event, waitinfo_v1);
6397 		break;
6398 #ifdef CONFIG_EXCLAVES
6399 	case kThreadWaitExclaveCore:
6400 	case kThreadWaitExclaveKit:
6401 		kdp_esync_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6402 		break;
6403 #endif /* CONFIG_EXCLAVES */
6404 	case kThreadWaitPageBusy:
6405 		kdp_vm_page_sleep_find_owner(thread->wait_event, waitinfo_v1);
6406 		break;
6407 	case kThreadWaitPagingInProgress:
6408 	case kThreadWaitPagingActivity:
6409 	case kThreadWaitPagerInit:
6410 	case kThreadWaitPagerReady:
6411 	case kThreadWaitMemoryBlocked:
6412 	case kThreadWaitPageInThrottle:
6413 		kdp_vm_object_sleep_find_owner(thread->wait_event, waitinfo->wait_type, waitinfo_v1);
6414 		break;
6415 	default:
6416 		waitinfo->owner = 0;
6417 		waitinfo->context = 0;
6418 		break;
6419 	}
6420 }
6421