xref: /xnu-11215.1.10/osfmk/kern/kern_stackshot.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 #include <mach/mach_types.h>
31 #include <mach/vm_param.h>
32 #include <mach/mach_vm.h>
33 #include <mach/clock_types.h>
34 #include <sys/code_signing.h>
35 #include <sys/errno.h>
36 #include <sys/stackshot.h>
37 #if defined(__arm64__)
38 #include <arm/cpu_internal.h>
39 #endif /* __arm64__ */
40 #ifdef IMPORTANCE_INHERITANCE
41 #include <ipc/ipc_importance.h>
42 #endif
43 #include <sys/appleapiopts.h>
44 #include <kern/debug.h>
45 #include <kern/block_hint.h>
46 #include <uuid/uuid.h>
47 
48 #include <kdp/kdp_dyld.h>
49 #include <kdp/kdp_en_debugger.h>
50 #include <kdp/processor_core.h>
51 #include <kdp/kdp_common.h>
52 
53 #include <libsa/types.h>
54 #include <libkern/version.h>
55 #include <libkern/section_keywords.h>
56 
57 #include <string.h> /* bcopy */
58 
59 #include <kern/kern_stackshot.h>
60 #include <kern/backtrace.h>
61 #include <kern/coalition.h>
62 #include <kern/exclaves_stackshot.h>
63 #include <kern/exclaves_inspection.h>
64 #include <kern/processor.h>
65 #include <kern/host_statistics.h>
66 #include <kern/counter.h>
67 #include <kern/thread.h>
68 #include <kern/thread_group.h>
69 #include <kern/task.h>
70 #include <kern/telemetry.h>
71 #include <kern/clock.h>
72 #include <kern/policy_internal.h>
73 #include <kern/socd_client.h>
74 #include <kern/startup.h>
75 #include <vm/vm_map_xnu.h>
76 #include <vm/vm_kern_xnu.h>
77 #include <vm/vm_pageout.h>
78 #include <vm/vm_fault.h>
79 #include <vm/vm_shared_region_xnu.h>
80 #include <vm/vm_compressor_xnu.h>
81 #include <libkern/OSKextLibPrivate.h>
82 #include <os/log.h>
83 
84 #ifdef CONFIG_EXCLAVES
85 #include <kern/exclaves.tightbeam.h>
86 #endif /* CONFIG_EXCLAVES */
87 
88 #include <kern/exclaves_test_stackshot.h>
89 
90 #if defined(__x86_64__)
91 #include <i386/mp.h>
92 #include <i386/cpu_threads.h>
93 #endif
94 
95 #include <pexpert/pexpert.h>
96 
97 #if CONFIG_PERVASIVE_CPI
98 #include <kern/monotonic.h>
99 #endif /* CONFIG_PERVASIVE_CPI */
100 
101 #include <san/kasan.h>
102 
103 #if DEBUG || DEVELOPMENT
104 #define STACKSHOT_COLLECTS_DIAGNOSTICS 1
105 #define STACKSHOT_COLLECTS_LATENCY_INFO 1
106 #else
107 #define STACKSHOT_COLLECTS_DIAGNOSTICS 0
108 #define STACKSHOT_COLLECTS_LATENCY_INFO 0
109 #endif /* DEBUG || DEVELOPMENT */
110 
111 #if defined(__AMP__)
112 #define STACKSHOT_NUM_WORKQUEUES 2
113 #else /* __AMP__ */
114 #define STACKSHOT_NUM_WORKQUEUES 1
115 #endif
116 
117 #if defined(__arm64__)
118 #define STACKSHOT_NUM_BUFFERS MAX_CPU_CLUSTERS
119 #else /* __arm64__ */
120 #define STACKSHOT_NUM_BUFFERS 1
121 #endif /* __arm64__ */
122 
123 /* The number of threads which will land a task in the hardest workqueue. */
124 #define STACKSHOT_HARDEST_THREADCOUNT 10
125 
126 TUNABLE_DEV_WRITEABLE(unsigned int, stackshot_single_thread, "stackshot_single_thread", 0);
127 
128 extern unsigned int not_in_kdp;
129 
130 /* indicate to the compiler that some accesses are unaligned */
131 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
132 
133 int kdp_snapshot                            = 0;
134 
135 #pragma mark ---Stackshot Struct Definitions---
136 
137 typedef struct linked_kcdata_descriptor {
138 	struct kcdata_descriptor          kcdata;
139 	struct linked_kcdata_descriptor  *next;
140 } * linked_kcdata_descriptor_t;
141 
142 struct stackshot_workitem {
143 	task_t                        sswi_task;
144 	linked_kcdata_descriptor_t    sswi_data; /* The kcdata for this task. */
145 	int                           sswi_idx;  /* The index of this job, used for ordering kcdata across multiple queues. */
146 };
147 
148 struct stackshot_workqueue {
149 	uint32_t _Atomic              sswq_num_items; /* Only modified by main CPU */
150 	uint32_t _Atomic              sswq_cur_item; /* Modified by all CPUs */
151 	size_t                        sswq_capacity; /* Constant after preflight */
152 	bool _Atomic                  sswq_populated; /* Only modified by main CPU */
153 	struct stackshot_workitem    *__counted_by(capacity) sswq_items;
154 };
155 
156 struct freelist_entry {
157 	struct freelist_entry        *fl_next; /* Next entry in the freelist */
158 	size_t                        fl_size; /* Size of the entry (must be >= sizeof(struct freelist_entry)) */
159 };
160 
161 struct stackshot_buffer {
162 	void                         *ssb_ptr; /* Base of buffer */
163 	size_t                        ssb_size;
164 	size_t _Atomic                ssb_used;
165 	struct freelist_entry        *ssb_freelist; /* First freelist entry */
166 	int _Atomic                   ssb_freelist_lock;
167 	size_t _Atomic                ssb_overhead; /* Total amount ever freed (even if re-allocated from freelist) */
168 };
169 
170 struct kdp_snapshot_args {
171 	int                           pid;
172 	void                         *buffer;
173 	struct kcdata_descriptor     *descriptor;
174 	uint32_t                      buffer_size;
175 	uint64_t                      flags;
176 	uint64_t                      since_timestamp;
177 	uint32_t                      pagetable_mask;
178 };
179 
180 /*
181  * Keep a simple cache of the most recent validation done at a page granularity
182  * to avoid the expensive software KVA-to-phys translation in the VM.
183  */
184 
185 struct _stackshot_validation_state {
186 	vm_offset_t last_valid_page_kva;
187 	size_t last_valid_size;
188 };
189 
190 /* CPU-local generation counts for PLH */
191 struct _stackshot_plh_gen_state {
192 	uint8_t                *pgs_gen;       /* last 'gen #' seen in */
193 	int16_t                 pgs_curgen_min; /* min idx seen for this gen */
194 	int16_t                 pgs_curgen_max; /* max idx seen for this gen */
195 	uint8_t                 pgs_curgen;     /* current gen */
196 };
197 
198 /*
199  * For port labels, we have a small hash table we use to track the
200  * struct ipc_service_port_label pointers we see along the way.
201  * This structure encapsulates the global state.
202  *
203  * The hash table is insert-only, similar to "intern"ing strings.  It's
204  * only used an manipulated in during the stackshot collection.  We use
205  * seperate chaining, with the hash elements and chains being int16_ts
206  * indexes into the parallel arrays, with -1 ending the chain.  Array indices are
207  * allocated using a bump allocator.
208  *
209  * The parallel arrays contain:
210  *      - plh_array[idx]	the pointer entered
211  *      - plh_chains[idx]	the hash chain
212  *      - plh_gen[idx]		the last 'generation #' seen
213  *
214  * Generation IDs are used to track entries looked up in the current
215  * task; 0 is never used, and the plh_gen array is cleared to 0 on
216  * rollover.
217  *
218  * The portlabel_ids we report externally are just the index in the array,
219  * plus 1 to avoid 0 as a value.  0 is NONE, -1 is UNKNOWN (e.g. there is
220  * one, but we ran out of space)
221  */
222 struct port_label_hash {
223 	int _Atomic             plh_lock;       /* lock for concurrent modifications to this plh */
224 	uint16_t                plh_size;       /* size of allocations; 0 disables tracking */
225 	uint16_t                plh_count;      /* count of used entries in plh_array */
226 	struct ipc_service_port_label **plh_array; /* _size allocated, _count used */
227 	int16_t                *plh_chains;    /* _size allocated */
228 	int16_t                *plh_hash;      /* (1 << STACKSHOT_PLH_SHIFT) entry hash table: hash(ptr) -> array index */
229 #if DEVELOPMENT || DEBUG
230 	/* statistics */
231 	uint32_t _Atomic        plh_lookups;    /* # lookups or inserts */
232 	uint32_t _Atomic        plh_found;
233 	uint32_t _Atomic        plh_found_depth;
234 	uint32_t _Atomic        plh_insert;
235 	uint32_t _Atomic        plh_insert_depth;
236 	uint32_t _Atomic        plh_bad;
237 	uint32_t _Atomic        plh_bad_depth;
238 	uint32_t _Atomic        plh_lookup_send;
239 	uint32_t _Atomic        plh_lookup_receive;
240 #define PLH_STAT_OP(...)    (void)(__VA_ARGS__)
241 #else /* DEVELOPMENT || DEBUG */
242 #define PLH_STAT_OP(...)    (void)(0)
243 #endif /* DEVELOPMENT || DEBUG */
244 };
245 
246 #define plh_lock(plh) while(!os_atomic_cmpxchg(&(plh)->plh_lock, 0, 1, acquire)) { loop_wait(); }
247 #define plh_unlock(plh) os_atomic_store(&(plh)->plh_lock, 0, release);
248 
249 #define STACKSHOT_PLH_SHIFT    7
250 #define STACKSHOT_PLH_SIZE_MAX ((kdp_ipc_have_splabel)? 1024 : 0)
251 size_t stackshot_port_label_size = (2 * (1u << STACKSHOT_PLH_SHIFT));
252 #define STASKSHOT_PLH_SIZE(x) MIN((x), STACKSHOT_PLH_SIZE_MAX)
253 
254 struct stackshot_cpu_context {
255 	bool                               scc_can_work; /* Whether the CPU can do more stackshot work */
256 	bool                               scc_did_work; /* Whether the CPU actually did any stackshot work */
257 	linked_kcdata_descriptor_t         scc_kcdata_head; /* See `linked_kcdata_alloc_callback */
258 	linked_kcdata_descriptor_t         scc_kcdata_tail; /* See `linked_kcdata_alloc_callback */
259 	uintptr_t                         *scc_stack_buffer; /* A buffer for stacktraces. */
260 	struct stackshot_fault_stats       scc_fault_stats;
261 	struct _stackshot_validation_state scc_validation_state;
262 	struct _stackshot_plh_gen_state    scc_plh_gen;
263 };
264 
265 /*
266  * When directly modifying the stackshot state, always use the macros below to
267  * work wth this enum - the higher order bits are used to store an error code
268  * in the case of SS_ERRORED.
269  *
270  *        +------------------------------------+-------------------+
271  *        |                                    |                   |
272  *        v                                    |                   |
273  * +-------------+     +----------+     +------------+     +------------+
274  * | SS_INACTIVE |---->| SS_SETUP |---->| SS_RUNNING |---->| SS_ERRORED |
275  * +-------------+     +----------+     +------------+     +------------+
276  *                         |  |                |                ^  |
277  *                         |  +----------------|----------------+  |
278  * +-------------+         |                   |                   |
279  * | SS_PANICKED |<--------+-------------------+                   |
280  * +-------------+                                                 |
281  *        ^                                                        |
282  *        |                                                        |
283  *        +--------------------------------------------------------+
284  */
285 __enum_closed_decl(stackshot_state_t, uint, {
286 	SS_INACTIVE = 0x0, /* -> SS_SETUP */
287 	SS_SETUP    = 0x1, /* -> SS_RUNNING, SS_ERRORED, SS_PANICKED */
288 	SS_RUNNING  = 0x2, /* -> SS_ERRORED, SS_PANICKED, SS_INACTIVE */
289 	SS_ERRORED  = 0x3, /* -> SS_INACTIVE, SS_PANICKED */
290 	SS_PANICKED = 0x4, /* -> N/A */
291 	_SS_COUNT
292 });
293 
294 static_assert(_SS_COUNT <= 0x5);
295 /* Get the stackshot state ID from a stackshot_state_t. */
296 #define SS_STATE(state) ((state) & 0x7u)
297 /* Get the error code from a stackshot_state_t. */
298 #define SS_ERRCODE(state) ((state) >> 3)
299 /* Make a stackshot error state with a given code. */
300 #define SS_MKERR(code) (((code) << 3) | SS_ERRORED)
301 
302 struct stackshot_context {
303 	/* Constants & Arguments */
304 	struct kdp_snapshot_args      sc_args;
305 	int                           sc_calling_cpuid;
306 	int                           sc_main_cpuid;
307 	bool                          sc_enable_faulting;
308 	uint64_t                      sc_microsecs; /* Timestamp */
309 	bool                          sc_panic_stackshot;
310 	size_t                        sc_min_kcdata_size;
311 	bool                          sc_is_singlethreaded;
312 
313 	/* State & Errors */
314 	stackshot_state_t _Atomic     sc_state; /* Only modified by calling CPU, main CPU, or panicking CPU. See comment above type definition for details. */
315 	kern_return_t                 sc_retval; /* The return value of the main thread */
316 	uint32_t _Atomic              sc_cpus_working;
317 
318 	/* KCData */
319 	linked_kcdata_descriptor_t    sc_pretask_kcdata;
320 	linked_kcdata_descriptor_t    sc_posttask_kcdata;
321 	kcdata_descriptor_t           sc_finalized_kcdata;
322 
323 	/* Buffers & Queues */
324 	struct stackshot_buffer       __counted_by(num_buffers) sc_buffers[STACKSHOT_NUM_BUFFERS];
325 	size_t                        sc_num_buffers;
326 	struct stackshot_workqueue    __counted_by(STACKSHOT_NUM_WORKQUEUES) sc_workqueues[STACKSHOT_NUM_WORKQUEUES];
327 	struct port_label_hash        sc_plh;
328 
329 	/* Statistics */
330 	struct stackshot_duration_v2  sc_duration;
331 	uint32_t                      sc_bytes_traced;
332 	uint32_t                      sc_bytes_uncompressed;
333 #if STACKSHOT_COLLECTS_LATENCY_INFO
334 	struct stackshot_latency_collection_v2 sc_latency;
335 #endif
336 };
337 
338 #define STACKSHOT_DEBUG_TRACEBUF_SIZE 16
339 
340 struct stackshot_trace_entry {
341 	int               sste_line_no;
342 	uint64_t          sste_timestamp;
343 	mach_vm_address_t sste_data;
344 };
345 
346 struct stackshot_trace_buffer {
347 	uint64_t                     sstb_last_trace_timestamp;
348 	size_t                       sstb_tail_idx;
349 	size_t                       sstb_size;
350 	struct stackshot_trace_entry __counted_by(STACKSHOT_DEBUG_TRACEBUF_SIZE) sstb_entries[STACKSHOT_DEBUG_TRACEBUF_SIZE];
351 };
352 
353 #pragma mark ---Stackshot State and Data---
354 
355 /*
356  * Two stackshot states, one for panic and one for normal.
357  * That way, we can take a stackshot during a panic without clobbering state.
358  */
359 #define STACKSHOT_CTX_IDX_NORMAL 0
360 #define STACKSHOT_CTX_IDX_PANIC  1
361 size_t cur_stackshot_ctx_idx   = STACKSHOT_CTX_IDX_NORMAL;
362 struct stackshot_context stackshot_contexts[2] = {{0}, {0}};
363 #define stackshot_ctx (stackshot_contexts[cur_stackshot_ctx_idx])
364 #define stackshot_args (stackshot_ctx.sc_args)
365 #define stackshot_flags (stackshot_args.flags)
366 
367 static struct {
368 	uint64_t last_abs_start;      /* start time of last stackshot */
369 	uint64_t last_abs_end;        /* end time of last stackshot */
370 	uint64_t stackshots_taken;    /* total stackshots taken since boot */
371 	uint64_t stackshots_duration; /* total abs time spent in stackshot_trap() since boot */
372 } stackshot_stats = { 0 };
373 
374 #if STACKSHOT_COLLECTS_LATENCY_INFO
375 static struct stackshot_latency_cpu PERCPU_DATA(stackshot_cpu_latency_percpu);
376 #define stackshot_cpu_latency (*PERCPU_GET(stackshot_cpu_latency_percpu))
377 #endif
378 
379 static struct stackshot_cpu_context PERCPU_DATA(stackshot_cpu_ctx_percpu);
380 #define stackshot_cpu_ctx (*PERCPU_GET(stackshot_cpu_ctx_percpu))
381 
382 static struct kcdata_descriptor PERCPU_DATA(stackshot_kcdata_percpu);
383 #define stackshot_kcdata_p (PERCPU_GET(stackshot_kcdata_percpu))
384 
385 #if STACKSHOT_COLLECTS_LATENCY_INFO
386 static bool collect_latency_info = true;
387 #endif
388 
389 static uint64_t stackshot_max_fault_time;
390 
391 #if STACKSHOT_COLLECTS_DIAGNOSTICS
392 static struct stackshot_trace_buffer PERCPU_DATA(stackshot_trace_buffer);
393 #endif
394 
395 #pragma mark ---Stackshot Global State---
396 
397 uint32_t stackshot_estimate_adj = 25; /* experiment factor: 0-100, adjust our estimate up by this amount */
398 
399 static uint32_t stackshot_initial_estimate;
400 static uint32_t stackshot_initial_estimate_adj;
401 static uint64_t stackshot_duration_prior_abs;   /* prior attempts, abs */
402 static unaligned_u64 * stackshot_duration_outer;
403 static uint64_t stackshot_tries;
404 
405 void * kernel_stackshot_buf   = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
406 int kernel_stackshot_buf_size = 0;
407 
408 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
409 
410 #if CONFIG_EXCLAVES
411 static ctid_t *stackshot_exclave_inspect_ctids = NULL;
412 static size_t stackshot_exclave_inspect_ctid_count = 0;
413 static size_t stackshot_exclave_inspect_ctid_capacity = 0;
414 
415 static kern_return_t stackshot_exclave_kr = KERN_SUCCESS;
416 #endif /* CONFIG_EXCLAVES */
417 
418 #if DEBUG || DEVELOPMENT
419 TUNABLE(bool, disable_exclave_stackshot, "-disable_exclave_stackshot", false);
420 #else
421 const bool disable_exclave_stackshot = false;
422 #endif
423 
424 #pragma mark ---Stackshot Static Function Declarations---
425 
426 __private_extern__ void stackshot_init( void );
427 static boolean_t        memory_iszero(void *addr, size_t size);
428 static void             stackshot_cpu_do_work(void);
429 static kern_return_t    stackshot_finalize_kcdata(void);
430 static kern_return_t    stackshot_finalize_singlethreaded_kcdata(void);
431 static kern_return_t    stackshot_collect_kcdata(void);
432 static int              kdp_stackshot_kcdata_format();
433 static void             kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
434 static vm_offset_t      stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags);
435 static boolean_t        stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
436 static int              stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
437 static boolean_t        stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
438 static uint64_t         proc_was_throttled_from_task(task_t task);
439 static void             stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t * waitinfo);
440 static int              stackshot_thread_has_valid_waitinfo(thread_t thread);
441 static void             stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo);
442 static int              stackshot_thread_has_valid_turnstileinfo(thread_t thread);
443 static uint32_t         get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj, uint64_t trace_flags, pid_t target_pid);
444 static kern_return_t    kdp_snapshot_preflight_internal(struct kdp_snapshot_args args);
445 
446 #if CONFIG_COALITIONS
447 static void             stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
448 static void             stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
449 #endif /* CONFIG_COALITIONS */
450 
451 #if CONFIG_THREAD_GROUPS
452 static void             stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
453 static void             stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
454 #endif /* CONFIG_THREAD_GROUPS */
455 
456 extern uint64_t         workqueue_get_task_ss_flags_from_pwq_state_kdp(void *proc);
457 
458 static kcdata_descriptor_t linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size);
459 
460 #pragma mark ---Stackshot Externs---
461 
462 struct proc;
463 extern int              proc_pid(struct proc *p);
464 extern uint64_t         proc_uniqueid(void *p);
465 extern uint64_t         proc_was_throttled(void *p);
466 extern uint64_t         proc_did_throttle(void *p);
467 extern int              proc_exiting(void *p);
468 extern int              proc_in_teardown(void *p);
469 static uint64_t         proc_did_throttle_from_task(task_t task);
470 extern void             proc_name_kdp(struct proc *p, char * buf, int size);
471 extern int              proc_threadname_kdp(void * uth, char * buf, size_t size);
472 extern void             proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
473 extern void             proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
474 extern uint64_t         proc_getcsflags_kdp(void * p);
475 extern boolean_t        proc_binary_uuid_kdp(task_t task, uuid_t uuid);
476 extern int              memorystatus_get_pressure_status_kdp(void);
477 extern void             memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
478 extern void             panic_stackshot_release_lock(void);
479 
480 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
481 
482 #if CONFIG_TELEMETRY
483 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
484 #endif /* CONFIG_TELEMETRY */
485 
486 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
487 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
488 
489 static size_t stackshot_plh_est_size(void);
490 
491 #if CONFIG_EXCLAVES
492 static kern_return_t collect_exclave_threads(uint64_t);
493 static kern_return_t stackshot_setup_exclave_waitlist(void);
494 #endif
495 
496 /*
497  * Validates that the given address for a word is both a valid page and has
498  * default caching attributes for the current map.
499  */
500 bool machine_trace_thread_validate_kva(vm_offset_t);
501 /*
502  * Validates a region that stackshot will potentially inspect.
503  */
504 static bool _stackshot_validate_kva(vm_offset_t, size_t);
505 /*
506  * Must be called whenever stackshot is re-driven.
507  */
508 static void _stackshot_validation_reset(void);
509 /*
510  * A kdp-safe strlen() call.  Returns:
511  *      -1 if we reach maxlen or a bad address before the end of the string, or
512  *      strlen(s)
513  */
514 static long _stackshot_strlen(const char *s, size_t maxlen);
515 
516 #define MAX_FRAMES 1000
517 #define STACKSHOT_PAGETABLE_BUFSZ 4000
518 #define MAX_LOADINFOS 500
519 #define MAX_DYLD_COMPACTINFO (20 * 1024)  // max bytes of compactinfo to include per proc/shared region
520 #define TASK_IMP_WALK_LIMIT 20
521 
522 typedef struct thread_snapshot *thread_snapshot_t;
523 typedef struct task_snapshot *task_snapshot_t;
524 
525 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
526 extern kdp_send_t    kdp_en_send_pkt;
527 #endif
528 
529 /*
530  * Stackshot locking and other defines.
531  */
532 LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
533 LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
534 
535 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
536 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
537 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
538 #define STACKSHOT_SUBSYS_ASSERT_LOCKED() lck_mtx_assert(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
539 
540 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
541 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
542 
543 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
544 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
545 
546 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
547 
548 /*
549  * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
550  * for non-panic stackshots where faulting is requested.
551  */
552 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
553 
554 
555 #ifndef ROUNDUP
556 #define ROUNDUP(x, y)            ((((x)+(y)-1)/(y))*(y))
557 #endif
558 
559 #define STACKSHOT_QUEUE_LABEL_MAXSIZE  64
560 
561 #pragma mark ---Stackshot Useful Macros---
562 
563 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
564 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
565 /*
566  * Use of the kcd_exit_on_error(action) macro requires a local
567  * 'kern_return_t error' variable and 'error_exit' label.
568  */
569 #define kcd_exit_on_error(action)                      \
570 	do {                                               \
571 	    if (KERN_SUCCESS != (error = (action))) {      \
572 	        STACKSHOT_TRACE(error);                    \
573 	        if (error == KERN_RESOURCE_SHORTAGE) {     \
574 	            error = KERN_INSUFFICIENT_BUFFER_SIZE; \
575 	        }                                          \
576 	        goto error_exit;                           \
577 	    }                                              \
578 	} while (0); /* end kcd_exit_on_error */
579 
580 #if defined(__arm64__)
581 #define loop_wait_noguard() __builtin_arm_wfe()
582 #elif defined(__x86_64__)
583 #define loop_wait_noguard() __builtin_ia32_pause()
584 #else
585 #define loop_wait_noguard()
586 #endif /* __x86_64__ */
587 
588 #define loop_wait() { loop_wait_noguard(); stackshot_panic_guard(); }
589 
590 static inline void stackshot_panic_guard(void);
591 
592 static __attribute__((noreturn, noinline)) void
stackshot_panic_spin(void)593 stackshot_panic_spin(void)
594 {
595 	if (stackshot_cpu_ctx.scc_can_work) {
596 		stackshot_cpu_ctx.scc_can_work = false;
597 		os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
598 	}
599 	if (stackshot_ctx.sc_calling_cpuid == cpu_number()) {
600 		while (os_atomic_load(&stackshot_ctx.sc_cpus_working, acquire) != 0) {
601 			loop_wait_noguard();
602 		}
603 		panic_stackshot_release_lock();
604 	}
605 	while (1) {
606 		loop_wait_noguard();
607 	}
608 }
609 
610 /**
611  * Immediately aborts if another CPU panicked during the stackshot.
612  */
613 static inline void
stackshot_panic_guard(void)614 stackshot_panic_guard(void)
615 {
616 	if (__improbable(os_atomic_load(&stackshot_ctx.sc_state, relaxed) == SS_PANICKED)) {
617 		stackshot_panic_spin();
618 	}
619 }
620 
621 /*
622  * Signal that we panicked during a stackshot by setting an atomic flag and
623  * waiting for others to coalesce before continuing the panic. Other CPUs will
624  * spin on this as soon as they see it set in order to prevent multiple
625  * concurrent panics. The calling CPU (i.e. the one holding the debugger lock)
626  * will release it for us in `stackshot_panic_spin` so we can continue
627  * panicking.
628  *
629  * This is called from panic_trap_to_debugger.
630  */
631 void
stackshot_cpu_signal_panic(void)632 stackshot_cpu_signal_panic(void)
633 {
634 	stackshot_state_t o_state;
635 	if (stackshot_active()) {
636 		/* Check if someone else panicked before we did. */
637 		o_state = os_atomic_xchg(&stackshot_ctx.sc_state, SS_PANICKED, seq_cst);
638 		if (o_state == SS_PANICKED) {
639 			stackshot_panic_spin();
640 		}
641 
642 		/* We're the first CPU to panic - wait for everyone to coalesce. */
643 		if (stackshot_cpu_ctx.scc_can_work) {
644 			stackshot_cpu_ctx.scc_can_work = false;
645 			os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
646 		}
647 		while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
648 			loop_wait_noguard();
649 		}
650 	}
651 }
652 
653 /*
654  * Sets the stackshot state to SS_ERRORED along with the error code.
655  * Only works if the current state is SS_RUNNING or SS_SETUP.
656  */
657 static inline void
stackshot_set_error(kern_return_t error)658 stackshot_set_error(kern_return_t error)
659 {
660 	stackshot_state_t cur_state;
661 	stackshot_state_t err_state = SS_MKERR(error);
662 	if (__improbable(!os_atomic_cmpxchgv(&stackshot_ctx.sc_state, SS_RUNNING, err_state, &cur_state, seq_cst))) {
663 		if (cur_state == SS_SETUP) {
664 			os_atomic_cmpxchg(&stackshot_ctx.sc_state, SS_SETUP, err_state, seq_cst);
665 		} else {
666 			/* Our state is something other than SS_RUNNING or SS_SETUP... Check for panic. */
667 			stackshot_panic_guard();
668 		}
669 	}
670 }
671 
672 /* Returns an error code if the current stackshot context has errored out.
673  * Also functions as a panic guard.
674  */
675 __result_use_check
676 static inline kern_return_t
stackshot_status_check(void)677 stackshot_status_check(void)
678 {
679 	stackshot_state_t state = os_atomic_load(&stackshot_ctx.sc_state, relaxed);
680 
681 	/* Check for panic */
682 	if (__improbable(SS_STATE(state) == SS_PANICKED)) {
683 		stackshot_panic_spin();
684 	}
685 
686 	/* Check for error */
687 	if (__improbable(SS_STATE(state) == SS_ERRORED)) {
688 		kern_return_t err = SS_ERRCODE(state);
689 		assert(err != KERN_SUCCESS); /* SS_ERRORED should always store an associated error code. */
690 		return err;
691 	}
692 
693 	return KERN_SUCCESS;
694 }
695 
696 #pragma mark ---Stackshot Tracing---
697 
698 #if STACKSHOT_COLLECTS_DIAGNOSTICS
699 static void
stackshot_trace(int line_no,mach_vm_address_t data)700 stackshot_trace(int line_no, mach_vm_address_t data)
701 {
702 	struct stackshot_trace_buffer *buffer = PERCPU_GET(stackshot_trace_buffer);
703 	buffer->sstb_entries[buffer->sstb_tail_idx] = (struct stackshot_trace_entry) {
704 		.sste_line_no = line_no,
705 		.sste_timestamp = mach_continuous_time(),
706 		.sste_data = data
707 	};
708 	buffer->sstb_tail_idx = (buffer->sstb_tail_idx + 1) % STACKSHOT_DEBUG_TRACEBUF_SIZE;
709 	buffer->sstb_size = MIN(buffer->sstb_size + 1, STACKSHOT_DEBUG_TRACEBUF_SIZE);
710 }
711 #define STACKSHOT_TRACE(data) stackshot_trace(__LINE__, (mach_vm_address_t) (data))
712 
713 #else /* STACKSHOT_COLLECTS_DIAGNOSTICS */
714 #define STACKSHOT_TRACE(data) ((void) data)
715 #endif /* !STACKSHOT_COLLECTS_DIAGNOSTICS */
716 
717 #pragma mark ---Stackshot Buffer Management---
718 
719 #define freelist_lock(buffer) while(!os_atomic_cmpxchg(&buffer->ssb_freelist_lock, 0, 1, acquire)) { loop_wait(); }
720 #define freelist_unlock(buffer) os_atomic_store(&buffer->ssb_freelist_lock, 0, release);
721 
722 /**
723  * Allocates some data from the shared stackshot buffer freelist.
724  * This should not be used directly, it is a last resort if we run out of space.
725  */
726 static void *
stackshot_freelist_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)727 stackshot_freelist_alloc(
728 	size_t size,
729 	struct stackshot_buffer *buffer,
730 	kern_return_t *error)
731 {
732 	struct freelist_entry **cur_freelist, **best_freelist = NULL, *ret = NULL;
733 
734 	freelist_lock(buffer);
735 
736 	cur_freelist = &buffer->ssb_freelist;
737 
738 	while (*cur_freelist != NULL) {
739 		if (((*cur_freelist)->fl_size >= size) && ((best_freelist == NULL) || ((*best_freelist)->fl_size > (*cur_freelist)->fl_size))) {
740 			best_freelist = cur_freelist;
741 			if ((*best_freelist)->fl_size == size) {
742 				break;
743 			}
744 		}
745 		cur_freelist = &((*cur_freelist)->fl_next);
746 	}
747 
748 	/* If we found a freelist entry, update the freelist */
749 	if (best_freelist != NULL) {
750 		os_atomic_sub(&buffer->ssb_overhead, size, relaxed);
751 		ret = *best_freelist;
752 
753 		/* If there's enough unused space at the end of this entry, we should make a new one */
754 		if (((*best_freelist)->fl_size - size) > sizeof(struct freelist_entry)) {
755 			struct freelist_entry *new_freelist = (struct freelist_entry*) ((mach_vm_address_t) *best_freelist + size);
756 			*new_freelist = (struct freelist_entry) {
757 				.fl_next = (*best_freelist)->fl_next,
758 				.fl_size = (*best_freelist)->fl_size - size
759 			};
760 			(*best_freelist)->fl_next = new_freelist;
761 		}
762 
763 		/* Update previous entry with next or new entry */
764 		*best_freelist = (*best_freelist)->fl_next;
765 	}
766 
767 	freelist_unlock(buffer);
768 
769 	if (error != NULL) {
770 		if (ret == NULL) {
771 			*error = KERN_INSUFFICIENT_BUFFER_SIZE;
772 		} else {
773 			*error = KERN_SUCCESS;
774 		}
775 	}
776 
777 	return ret;
778 }
779 
780 /**
781  * Allocates some data from the shared stackshot buffer.
782  * Should not be used directly - see the `stackshot_alloc` and
783  * `stackshot_alloc_arr` macros.
784  */
785 static void *
stackshot_buffer_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)786 stackshot_buffer_alloc(
787 	size_t size,
788 	struct stackshot_buffer *buffer,
789 	kern_return_t *error)
790 {
791 	size_t o_used, new_used;
792 
793 	stackshot_panic_guard();
794 	assert(!stackshot_ctx.sc_is_singlethreaded);
795 
796 	os_atomic_rmw_loop(&buffer->ssb_used, o_used, new_used, relaxed, {
797 		new_used = o_used + size;
798 		if (new_used > buffer->ssb_size) {
799 		        os_atomic_rmw_loop_give_up(return stackshot_freelist_alloc(size, buffer, error));
800 		}
801 	});
802 
803 	if (error != NULL) {
804 		*error = KERN_SUCCESS;
805 	}
806 
807 	return (void*) ((mach_vm_address_t) buffer->ssb_ptr + o_used);
808 }
809 
810 /**
811  * Finds the best stackshot buffer to use (prefer our cluster's buffer)
812  * and allocates from it.
813  * Should not be used directly - see the `stackshot_alloc` and
814  * `stackshot_alloc_arr` macros.
815  */
816 __result_use_check
817 static void *
stackshot_best_buffer_alloc(size_t size,kern_return_t * error)818 stackshot_best_buffer_alloc(size_t size, kern_return_t *error)
819 {
820 #if defined(__AMP__)
821 	kern_return_t err;
822 	int           my_cluster;
823 	void         *ret = NULL;
824 #endif /* __AMP__ */
825 
826 #if STACKSHOT_COLLECTS_LATENCY_INFO
827 	stackshot_cpu_latency.total_buf += size;
828 #endif
829 
830 #if defined(__AMP__)
831 	/* First, try our cluster's buffer */
832 	my_cluster = cpu_cluster_id();
833 	ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[my_cluster], &err);
834 
835 	/* Try other buffers now. */
836 	if (err != KERN_SUCCESS) {
837 		for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
838 			if (buf_idx == my_cluster) {
839 				continue;
840 			}
841 
842 			ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[buf_idx], &err);
843 			if (err == KERN_SUCCESS) {
844 #if STACKSHOT_COLLECTS_LATENCY_INFO
845 				stackshot_cpu_latency.intercluster_buf_used += size;
846 #endif
847 				break;
848 			}
849 		}
850 	}
851 
852 	if (error != NULL) {
853 		*error = err;
854 	}
855 
856 	return ret;
857 #else /* __AMP__ */
858 	return stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[0], error);
859 #endif /* !__AMP__ */
860 }
861 
862 /**
863  * Frees some data from the shared stackshot buffer and adds it to the freelist.
864  */
865 static void
stackshot_buffer_free(void * ptr,struct stackshot_buffer * buffer,size_t size)866 stackshot_buffer_free(
867 	void *ptr,
868 	struct stackshot_buffer *buffer,
869 	size_t size)
870 {
871 	stackshot_panic_guard();
872 
873 	/* This should never be called during a singlethreaded stackshot. */
874 	assert(!stackshot_ctx.sc_is_singlethreaded);
875 
876 	os_atomic_add(&buffer->ssb_overhead, size, relaxed);
877 
878 	/* Make sure we have enough space for the freelist entry */
879 	if (size < sizeof(struct freelist_entry)) {
880 		return;
881 	}
882 
883 	freelist_lock(buffer);
884 
885 	/* Create new freelist entry and push it to the front of the list */
886 	*((struct freelist_entry*) ptr) = (struct freelist_entry) {
887 		.fl_size = size,
888 		.fl_next = buffer->ssb_freelist
889 	};
890 	buffer->ssb_freelist = ptr;
891 
892 	freelist_unlock(buffer);
893 }
894 
895 /**
896  * Allocates some data from the stackshot buffer. Uses the bump allocator in
897  * multithreaded mode and endalloc in singlethreaded.
898  * err must ALWAYS be nonnull.
899  * Should not be used directly - see the macros in kern_stackshot.h.
900  */
901 void *
stackshot_alloc_with_size(size_t size,kern_return_t * err)902 stackshot_alloc_with_size(size_t size, kern_return_t *err)
903 {
904 	void *ptr;
905 	assert(err != NULL);
906 	assert(stackshot_active());
907 
908 	stackshot_panic_guard();
909 
910 	if (stackshot_ctx.sc_is_singlethreaded) {
911 		ptr = kcdata_endalloc(stackshot_kcdata_p, size);
912 		if (ptr == NULL) {
913 			*err = KERN_INSUFFICIENT_BUFFER_SIZE;
914 		}
915 	} else {
916 		ptr = stackshot_best_buffer_alloc(size, err);
917 		if (ptr == NULL) {
918 			/* We should always return an error if we return a null ptr */
919 			assert3u(*err, !=, KERN_SUCCESS);
920 		}
921 	}
922 
923 	return ptr;
924 }
925 
926 /**
927  * Initializes a new kcdata buffer somewhere in a linked kcdata list.
928  * Allocates a buffer for the kcdata from the shared stackshot buffer.
929  *
930  * See `linked_kcdata_alloc_callback` for the implementation details of
931  * linked kcdata for stackshot.
932  */
933 __result_use_check
934 static kern_return_t
linked_kcdata_init(linked_kcdata_descriptor_t descriptor,size_t min_size,unsigned int data_type,unsigned int flags)935 linked_kcdata_init(
936 	linked_kcdata_descriptor_t descriptor,
937 	size_t min_size,
938 	unsigned int data_type,
939 	unsigned int flags)
940 {
941 	void              *buf_ptr;
942 	kern_return_t      error;
943 	size_t             buf_size = MAX(min_size, stackshot_ctx.sc_min_kcdata_size);
944 
945 	buf_ptr = stackshot_alloc_arr(uint8_t, buf_size, &error);
946 	if (error != KERN_SUCCESS) {
947 		return error;
948 	}
949 
950 	error = kcdata_memory_static_init(&descriptor->kcdata, (mach_vm_address_t) buf_ptr, data_type, buf_size, flags);
951 	if (error != KERN_SUCCESS) {
952 		return error;
953 	}
954 
955 	descriptor->kcdata.kcd_alloc_callback = linked_kcdata_alloc_callback;
956 
957 	return KERN_SUCCESS;
958 }
959 
960 static void
stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)961 stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)
962 {
963 	/*
964 	 * If we have free space at the end of the kcdata, we can add it to the
965 	 * freelist. We always add to *our* cluster's freelist, no matter where
966 	 * the data was originally allocated.
967 	 *
968 	 * Important Note: We do not use kcdata_memory_get_used_bytes here because
969 	 * that includes extra space for the end tag (which we do not care about).
970 	 */
971 	int    buffer;
972 	size_t used_size = descriptor->kcd_addr_end - descriptor->kcd_addr_begin;
973 	size_t free_size = (descriptor->kcd_length - used_size);
974 	if (free_size > 0) {
975 #if defined(__arm64__)
976 		buffer = cpu_cluster_id();
977 #else /* __arm64__ */
978 		buffer = 0;
979 #endif /* !__arm64__ */
980 		stackshot_buffer_free((void*) descriptor->kcd_addr_end, &stackshot_ctx.sc_buffers[buffer], free_size);
981 		descriptor->kcd_length = used_size;
982 	}
983 }
984 
985 /**
986  * The callback for linked kcdata, which is called when one of the kcdata
987  * buffers runs out of space. This allocates a new kcdata descriptor &
988  * buffer in the linked list and sets it up.
989  *
990  * When kcdata calls this callback, it takes the returned descriptor
991  * and copies it to its own descriptor (which will be the per-cpu kcdata
992  * descriptor, in the case of stackshot).
993  *
994  * --- Stackshot linked kcdata details ---
995  * The way stackshot allocates kcdata buffers (in a non-panic context) is via
996  * a basic bump allocator (see `stackshot_buffer_alloc`) and a linked list of
997  * kcdata structures. The kcdata are allocated with a reasonable size based on
998  * some system heuristics (or more if whatever is being pushed into the buffer
999  * is larger). When the current kcdata buffer runs out of space, it calls this
1000  * callback, which allocates a new linked kcdata object at the tail of the
1001  * current list.
1002  *
1003  * The per-cpu `stackshot_kcdata_p` descriptor is the "tail" of the list, but
1004  * is not actually part of the linked list (this simplified implementation,
1005  * since it didn't require changing every kcdata call & a bunch of
1006  * kcdata code, since the current in-use descriptor is always in the same place
1007  * this way). When it is filled up and this callback is called, the
1008  * `stackshot_kcdata_p` descriptor is copied to the *actual* tail of the list
1009  * (in stackshot_cpu_ctx.scc_kcdata_tail), and a new linked kcdata struct is
1010  * allocated at the tail.
1011  */
1012 static kcdata_descriptor_t
linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor,size_t min_size)1013 linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size)
1014 {
1015 	kern_return_t error;
1016 	linked_kcdata_descriptor_t new_kcdata = NULL;
1017 
1018 	/* This callback should ALWAYS be coming from our per-cpu kcdata. If not, something has gone horribly wrong.*/
1019 	stackshot_panic_guard();
1020 	assert(descriptor == stackshot_kcdata_p);
1021 
1022 	/* Free the unused space in the buffer and copy it to the tail of the linked kcdata list. */
1023 	stackshot_kcdata_free_unused(descriptor);
1024 	stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *descriptor;
1025 
1026 	/* Allocate another linked_kcdata and initialize it. */
1027 	new_kcdata = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1028 	if (error != KERN_SUCCESS) {
1029 		return NULL;
1030 	}
1031 
1032 	/* It doesn't matter what we mark the data type as - we're throwing it away when weave the data together anyway. */
1033 	error = linked_kcdata_init(new_kcdata, min_size, KCDATA_BUFFER_BEGIN_STACKSHOT, descriptor->kcd_flags);
1034 	if (error != KERN_SUCCESS) {
1035 		return NULL;
1036 	}
1037 
1038 	bzero(descriptor, sizeof(struct kcdata_descriptor));
1039 	stackshot_cpu_ctx.scc_kcdata_tail->next = new_kcdata;
1040 	stackshot_cpu_ctx.scc_kcdata_tail = new_kcdata;
1041 
1042 	return &new_kcdata->kcdata;
1043 }
1044 
1045 /**
1046  * Allocates a new linked kcdata list for the current CPU and sets it up.
1047  * If there was a previous linked kcdata descriptor, you should call
1048  * `stackshot_finalize_linked_kcdata` first, or otherwise save it somewhere.
1049  */
1050 __result_use_check
1051 static kern_return_t
stackshot_new_linked_kcdata(void)1052 stackshot_new_linked_kcdata(void)
1053 {
1054 	kern_return_t error;
1055 
1056 	stackshot_panic_guard();
1057 	assert(!stackshot_ctx.sc_panic_stackshot);
1058 
1059 	stackshot_cpu_ctx.scc_kcdata_head = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1060 	if (error != KERN_SUCCESS) {
1061 		return error;
1062 	}
1063 
1064 	kcd_exit_on_error(linked_kcdata_init(stackshot_cpu_ctx.scc_kcdata_head, 0,
1065 	    KCDATA_BUFFER_BEGIN_STACKSHOT,
1066 	    KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER | KCFLAG_ALLOC_CALLBACK));
1067 
1068 	stackshot_cpu_ctx.scc_kcdata_tail = stackshot_cpu_ctx.scc_kcdata_head;
1069 	*stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_head->kcdata;
1070 
1071 error_exit:
1072 	return error;
1073 }
1074 
1075 /**
1076  * Finalizes the current linked kcdata structure for the CPU by updating the
1077  * tail of the list with the per-cpu kcdata descriptor.
1078  */
1079 static void
stackshot_finalize_linked_kcdata(void)1080 stackshot_finalize_linked_kcdata(void)
1081 {
1082 	stackshot_panic_guard();
1083 	assert(!stackshot_ctx.sc_panic_stackshot);
1084 	stackshot_kcdata_free_unused(stackshot_kcdata_p);
1085 	if (stackshot_cpu_ctx.scc_kcdata_tail != NULL) {
1086 		stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *stackshot_kcdata_p;
1087 	}
1088 	*stackshot_kcdata_p = (struct kcdata_descriptor){};
1089 }
1090 
1091 /*
1092  * Initialize the mutex governing access to the stack snapshot subsystem
1093  * and other stackshot related bits.
1094  */
1095 __private_extern__ void
stackshot_init(void)1096 stackshot_init(void)
1097 {
1098 	mach_timebase_info_data_t timebase;
1099 
1100 	clock_timebase_info(&timebase);
1101 	stackshot_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
1102 
1103 	max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
1104 
1105 	PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
1106 }
1107 
1108 /*
1109  * Called with interrupts disabled after stackshot context has been
1110  * initialized.
1111  */
1112 static kern_return_t
stackshot_trap(void)1113 stackshot_trap(void)
1114 {
1115 	kern_return_t   rv;
1116 
1117 #if defined(__x86_64__)
1118 	/*
1119 	 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
1120 	 * operation, it's essential to apply mutual exclusion to the other when one
1121 	 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
1122 	 * capture CPUs.
1123 	 *
1124 	 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
1125 	 * allowed, so we check to ensure there there is no rendezvous in progress before
1126 	 * trying to grab the lock (if there is, a deadlock will occur when we try to
1127 	 * grab the lock).  This is accomplished by setting cpu_rendezvous_in_progress to
1128 	 * TRUE in the mp rendezvous action function.  If stackshot_trap() is called by
1129 	 * a subordinate of the call chain within the mp rendezvous action, this flag will
1130 	 * be set and can be used to detect the inevitable deadlock that would occur
1131 	 * if this thread tried to grab the rendezvous lock.
1132 	 */
1133 
1134 	if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
1135 		panic("Calling stackshot from a rendezvous is not allowed!");
1136 	}
1137 
1138 	mp_rendezvous_lock();
1139 #endif
1140 
1141 	stackshot_stats.last_abs_start = mach_absolute_time();
1142 	stackshot_stats.last_abs_end = 0;
1143 
1144 	rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0, NULL);
1145 
1146 	stackshot_stats.last_abs_end = mach_absolute_time();
1147 	stackshot_stats.stackshots_taken++;
1148 	stackshot_stats.stackshots_duration += (stackshot_stats.last_abs_end - stackshot_stats.last_abs_start);
1149 
1150 #if defined(__x86_64__)
1151 	mp_rendezvous_unlock();
1152 #endif
1153 	return rv;
1154 }
1155 
1156 extern void stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration);
1157 void
stackshot_get_timing(uint64_t * last_abs_start,uint64_t * last_abs_end,uint64_t * count,uint64_t * total_duration)1158 stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration)
1159 {
1160 	STACKSHOT_SUBSYS_LOCK();
1161 	*last_abs_start = stackshot_stats.last_abs_start;
1162 	*last_abs_end = stackshot_stats.last_abs_end;
1163 	*count = stackshot_stats.stackshots_taken;
1164 	*total_duration = stackshot_stats.stackshots_duration;
1165 	STACKSHOT_SUBSYS_UNLOCK();
1166 }
1167 
1168 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)1169 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
1170 {
1171 	kern_return_t error = KERN_SUCCESS;
1172 	boolean_t istate;
1173 	struct kdp_snapshot_args args;
1174 
1175 	args = (struct kdp_snapshot_args) {
1176 		.pid =               pid,
1177 		.buffer =            buf,
1178 		.buffer_size =       size,
1179 		.flags =             flags,
1180 		.since_timestamp =   delta_since_timestamp,
1181 		.pagetable_mask =    pagetable_mask
1182 	};
1183 
1184 #if DEVELOPMENT || DEBUG
1185 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
1186 		return KERN_NOT_SUPPORTED;
1187 	}
1188 #endif
1189 	if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
1190 		return KERN_INVALID_ARGUMENT;
1191 	}
1192 
1193 	/* zero caller's buffer to match KMA_ZERO in other path */
1194 	bzero(buf, size);
1195 
1196 	/* cap in individual stackshot to max_tracebuf_size */
1197 	if (size > max_tracebuf_size) {
1198 		size = max_tracebuf_size;
1199 	}
1200 
1201 	/* Serialize tracing */
1202 	if (flags & STACKSHOT_TRYLOCK) {
1203 		if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
1204 			return KERN_LOCK_OWNED;
1205 		}
1206 	} else {
1207 		STACKSHOT_SUBSYS_LOCK();
1208 	}
1209 
1210 #if CONFIG_EXCLAVES
1211 	assert(!stackshot_exclave_inspect_ctids);
1212 #endif
1213 
1214 	stackshot_initial_estimate = 0;
1215 	stackshot_duration_prior_abs = 0;
1216 	stackshot_duration_outer = NULL;
1217 
1218 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
1219 	    flags, size, pid, delta_since_timestamp);
1220 
1221 	/* Prepare the compressor for a stackshot */
1222 	error = vm_compressor_kdp_init();
1223 	if (error != KERN_SUCCESS) {
1224 		return error;
1225 	}
1226 
1227 	istate = ml_set_interrupts_enabled(FALSE);
1228 	uint64_t time_start      = mach_absolute_time();
1229 
1230 	/* Emit a SOCD tracepoint that we are initiating a stackshot */
1231 	SOCD_TRACE_XNU_START(STACKSHOT);
1232 
1233 	/* Preload trace parameters*/
1234 	error = kdp_snapshot_preflight_internal(args);
1235 
1236 	/*
1237 	 * Trap to the debugger to obtain a coherent stack snapshot; this populates
1238 	 * the trace buffer
1239 	 */
1240 	if (error == KERN_SUCCESS) {
1241 		error = stackshot_trap();
1242 	}
1243 
1244 	uint64_t time_end = mach_absolute_time();
1245 
1246 	/* Emit a SOCD tracepoint that we have completed the stackshot */
1247 	SOCD_TRACE_XNU_END(STACKSHOT);
1248 
1249 	ml_set_interrupts_enabled(istate);
1250 
1251 #if CONFIG_EXCLAVES
1252 	/* stackshot trap should only finish successfully or with no pending Exclave threads */
1253 	assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
1254 #endif
1255 
1256 	/*
1257 	 * Stackshot is no longer active.
1258 	 * (We have to do this here for the special interrupt disable timeout case to work)
1259 	 */
1260 	os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
1261 
1262 	/* Release kdp compressor buffers */
1263 	vm_compressor_kdp_teardown();
1264 
1265 	/* Collect multithreaded kcdata into one finalized buffer */
1266 	if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
1267 		error = stackshot_collect_kcdata();
1268 	}
1269 
1270 #if CONFIG_EXCLAVES
1271 	if (stackshot_exclave_inspect_ctids) {
1272 		error = collect_exclave_threads(flags);
1273 	}
1274 #endif /* CONFIG_EXCLAVES */
1275 
1276 	if (error == KERN_SUCCESS) {
1277 		if (!stackshot_ctx.sc_is_singlethreaded) {
1278 			error = stackshot_finalize_kcdata();
1279 		} else {
1280 			error = stackshot_finalize_singlethreaded_kcdata();
1281 		}
1282 	}
1283 
1284 	if (stackshot_duration_outer) {
1285 		*stackshot_duration_outer = time_end - time_start;
1286 	}
1287 	*bytes_traced = kdp_stack_snapshot_bytes_traced();
1288 
1289 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
1290 	    error, (time_end - time_start), size, *bytes_traced);
1291 
1292 	STACKSHOT_SUBSYS_UNLOCK();
1293 	return error;
1294 }
1295 
1296 #if CONFIG_TELEMETRY
1297 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)1298 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
1299 {
1300 	int error = KERN_SUCCESS;
1301 	uint32_t bytes_traced = 0;
1302 
1303 	*retval = -1;
1304 
1305 	/*
1306 	 * Control related operations
1307 	 */
1308 	if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
1309 		telemetry_global_ctl(1);
1310 		*retval = 0;
1311 		goto exit;
1312 	} else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
1313 		telemetry_global_ctl(0);
1314 		*retval = 0;
1315 		goto exit;
1316 	}
1317 
1318 	/*
1319 	 * Data related operations
1320 	 */
1321 	*retval = -1;
1322 
1323 	if ((((void*)tracebuf) == NULL) || (tracebuf_size == 0)) {
1324 		error = KERN_INVALID_ARGUMENT;
1325 		goto exit;
1326 	}
1327 
1328 	STACKSHOT_SUBSYS_LOCK();
1329 
1330 	if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
1331 		if (tracebuf_size > max_tracebuf_size) {
1332 			error = KERN_INVALID_ARGUMENT;
1333 			goto unlock_exit;
1334 		}
1335 
1336 		bytes_traced = tracebuf_size;
1337 		error = telemetry_gather(tracebuf, &bytes_traced,
1338 		    (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? true : false);
1339 		*retval = (int)bytes_traced;
1340 		goto unlock_exit;
1341 	}
1342 
1343 unlock_exit:
1344 	STACKSHOT_SUBSYS_UNLOCK();
1345 exit:
1346 	return error;
1347 }
1348 #endif /* CONFIG_TELEMETRY */
1349 
1350 /**
1351  * Grabs the next work item from the stackshot work queue.
1352  */
1353 static struct stackshot_workitem *
stackshot_get_workitem(struct stackshot_workqueue * queue)1354 stackshot_get_workitem(struct stackshot_workqueue *queue)
1355 {
1356 	uint32_t old_count, new_count;
1357 
1358 	/* note: this relies on give_up not performing the write, just bailing out immediately */
1359 	os_atomic_rmw_loop(&queue->sswq_cur_item, old_count, new_count, acq_rel, {
1360 		if (old_count >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1361 		        os_atomic_rmw_loop_give_up(return NULL);
1362 		}
1363 		new_count = old_count + 1;
1364 	});
1365 
1366 	return &queue->sswq_items[old_count];
1367 };
1368 
1369 /**
1370  * Puts an item on the appropriate stackshot work queue.
1371  * We don't need the lock for this, but only because it's
1372  * only called by one writer..
1373  *
1374  * @returns
1375  * true if the item fit in the queue, false if not.
1376  */
1377 static kern_return_t
stackshot_put_workitem(struct stackshot_workitem item)1378 stackshot_put_workitem(struct stackshot_workitem item)
1379 {
1380 	struct stackshot_workqueue *queue;
1381 
1382 	/* Put in higher queue if task has more threads, with highest queue having >= STACKSHOT_HARDEST_THREADCOUNT threads */
1383 	size_t queue_idx = ((item.sswi_task->thread_count * (STACKSHOT_NUM_WORKQUEUES - 1)) / STACKSHOT_HARDEST_THREADCOUNT);
1384 	queue_idx = MIN(queue_idx, STACKSHOT_NUM_WORKQUEUES - 1);
1385 
1386 	queue = &stackshot_ctx.sc_workqueues[queue_idx];
1387 
1388 	size_t num_items = os_atomic_load(&queue->sswq_num_items, relaxed);
1389 
1390 	if (num_items >= queue->sswq_capacity) {
1391 		return KERN_INSUFFICIENT_BUFFER_SIZE;
1392 	}
1393 
1394 	queue->sswq_items[num_items] = item;
1395 	os_atomic_inc(&queue->sswq_num_items, release);
1396 
1397 	return KERN_SUCCESS;
1398 }
1399 
1400 #define calc_num_linked_kcdata_frames(size, kcdata_size) (1 + ((size) - 1) / (kcdata_size))
1401 #define calc_linked_kcdata_size(size, kcdata_size) (calc_num_linked_kcdata_frames((size), (kcdata_size)) * ((kcdata_size) + sizeof(struct linked_kcdata_descriptor)))
1402 
1403 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
1404 #define TASK_SHARED_CACHE_AVG_SIZE (128) /* Average space consumed by task shared cache info */
1405 #define sizeof_if_traceflag(a, flag) (((trace_flags & (flag)) != 0) ? sizeof(a) : 0)
1406 
1407 #define FUDGED_SIZE(size, adj) (((size) * ((adj) + 100)) / 100)
1408 
1409 /*
1410  * Return the estimated size of a single task (including threads)
1411  * in a stackshot with the given flags.
1412  */
1413 static uint32_t
get_stackshot_est_tasksize(uint64_t trace_flags)1414 get_stackshot_est_tasksize(uint64_t trace_flags)
1415 {
1416 	size_t total_size;
1417 	size_t threads_per_task = (((threads_count + terminated_threads_count) - 1) / (tasks_count + terminated_tasks_count)) + 1;
1418 	size_t est_thread_size = sizeof(struct thread_snapshot_v4) + 42 * sizeof(uintptr_t);
1419 	size_t est_task_size = sizeof(struct task_snapshot_v2) +
1420 	    TASK_UUID_AVG_SIZE +
1421 	    TASK_SHARED_CACHE_AVG_SIZE +
1422 	    sizeof_if_traceflag(struct io_stats_snapshot, STACKSHOT_INSTRS_CYCLES) +
1423 	    sizeof_if_traceflag(uint32_t, STACKSHOT_ASID) +
1424 	    sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ, STACKSHOT_PAGE_TABLES) +
1425 	    sizeof_if_traceflag(struct instrs_cycles_snapshot_v2, STACKSHOT_INSTRS_CYCLES) +
1426 	    sizeof(struct stackshot_cpu_architecture) +
1427 	    sizeof(struct stackshot_task_codesigning_info);
1428 
1429 #if STACKSHOT_COLLECTS_LATENCY_INFO
1430 	if (collect_latency_info) {
1431 		est_thread_size += sizeof(struct stackshot_latency_thread);
1432 		est_task_size += sizeof(struct stackshot_latency_task);
1433 	}
1434 #endif
1435 
1436 	total_size = est_task_size + threads_per_task * est_thread_size;
1437 
1438 	return total_size;
1439 }
1440 
1441 /*
1442  * Return the estimated size of a stackshot based on the
1443  * number of currently running threads and tasks.
1444  *
1445  * adj is an adjustment in units of percentage
1446  */
1447 static uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj,uint64_t trace_flags,pid_t target_pid)1448 get_stackshot_estsize(
1449 	uint32_t prev_size_hint,
1450 	uint32_t adj,
1451 	uint64_t trace_flags,
1452 	pid_t target_pid)
1453 {
1454 	vm_size_t thread_and_task_total;
1455 	uint64_t  size;
1456 	uint32_t  estimated_size;
1457 	bool      process_scoped = ((target_pid != -1) && ((trace_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0));
1458 
1459 	/*
1460 	 * We use the estimated task size (with a fudge factor) as the default
1461 	 * linked kcdata buffer size in an effort to reduce overhead (ideally, we want
1462 	 * each task to only need a single kcdata buffer.)
1463 	 */
1464 	uint32_t est_task_size = get_stackshot_est_tasksize(trace_flags);
1465 	uint32_t est_kcdata_size = FUDGED_SIZE(est_task_size, adj);
1466 	uint64_t est_preamble_size = calc_linked_kcdata_size(8192 * 4, est_kcdata_size);
1467 	uint64_t est_postamble_size = calc_linked_kcdata_size(8192 * 2, est_kcdata_size);
1468 	uint64_t est_extra_size = 0;
1469 
1470 	adj = MIN(adj, 100u);   /* no more than double our estimate */
1471 
1472 #if STACKSHOT_COLLECTS_LATENCY_INFO
1473 	est_extra_size += real_ncpus * sizeof(struct stackshot_latency_cpu);
1474 	est_extra_size += sizeof(struct stackshot_latency_collection_v2);
1475 #endif
1476 
1477 	est_extra_size += real_ncpus * MAX_FRAMES * sizeof(uintptr_t); /* Stacktrace buffers */
1478 	est_extra_size += FUDGED_SIZE(tasks_count, 10) * sizeof(uintptr_t) * STACKSHOT_NUM_WORKQUEUES; /* Work queues */
1479 	est_extra_size += sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ * real_ncpus, STACKSHOT_PAGE_TABLES);
1480 
1481 	thread_and_task_total = calc_linked_kcdata_size(est_task_size, est_kcdata_size);
1482 	if (!process_scoped) {
1483 		thread_and_task_total *= tasks_count;
1484 	}
1485 	size = thread_and_task_total + est_preamble_size + est_postamble_size + est_extra_size; /* estimate */
1486 	size = FUDGED_SIZE(size, adj); /* add adj */
1487 	size = MAX(size, prev_size_hint); /* allow hint to increase */
1488 	size += stackshot_plh_est_size(); /* add space for the port label hash */
1489 	size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK)); /* avoid overflow */
1490 	estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
1491 
1492 	return estimated_size;
1493 }
1494 
1495 /**
1496  * Copies a linked list of kcdata structures into a final kcdata structure.
1497  * Only used from stackshot_finalize_kcdata.
1498  */
1499 __result_use_check
1500 static kern_return_t
stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata,linked_kcdata_descriptor_t linked_kcdata)1501 stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata, linked_kcdata_descriptor_t linked_kcdata)
1502 {
1503 	kern_return_t error = KERN_SUCCESS;
1504 
1505 	while (linked_kcdata) {
1506 		/* Walk linked kcdata list */
1507 		kcdata_descriptor_t cur_kcdata = &linked_kcdata->kcdata;
1508 		if ((cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin) == 0) {
1509 			linked_kcdata = linked_kcdata->next;
1510 			continue;
1511 		}
1512 
1513 		/* Every item in the linked kcdata should have a header tag of type KCDATA_BUFFER_BEGIN_STACKSHOT. */
1514 		assert(((struct kcdata_item*) cur_kcdata->kcd_addr_begin)->type == KCDATA_BUFFER_BEGIN_STACKSHOT);
1515 		assert((final_kcdata->kcd_addr_begin + final_kcdata->kcd_length) > final_kcdata->kcd_addr_end);
1516 		size_t header_size = sizeof(kcdata_item_t) + kcdata_calc_padding(sizeof(kcdata_item_t));
1517 		size_t size = cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin - header_size;
1518 		size_t free = (final_kcdata->kcd_length + final_kcdata->kcd_addr_begin) - final_kcdata->kcd_addr_end;
1519 		if (free < size) {
1520 			error = KERN_INSUFFICIENT_BUFFER_SIZE;
1521 			goto error_exit;
1522 		}
1523 
1524 		/* Just memcpy the data over (and compress if we need to.) */
1525 		kcdata_compression_window_open(final_kcdata);
1526 		error = kcdata_memcpy(final_kcdata, final_kcdata->kcd_addr_end, (void*) (cur_kcdata->kcd_addr_begin + header_size), size);
1527 		if (error != KERN_SUCCESS) {
1528 			goto error_exit;
1529 		}
1530 		final_kcdata->kcd_addr_end += size;
1531 		kcdata_compression_window_close(final_kcdata);
1532 
1533 		linked_kcdata = linked_kcdata->next;
1534 	}
1535 
1536 error_exit:
1537 	return error;
1538 }
1539 
1540 /**
1541  * Copies the duration, latency, and diagnostic info into a final kcdata buffer.
1542  * Only used by stackshot_finalize_kcdata and stackshot_finalize_singlethreaded_kcdata.
1543  */
1544 __result_use_check
1545 static kern_return_t
stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)1546 stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)
1547 {
1548 	kern_return_t error;
1549 	mach_vm_address_t out_addr;
1550 	bool use_fault_path = ((stackshot_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
1551 #if STACKSHOT_COLLECTS_LATENCY_INFO
1552 	size_t            buffer_used = 0;
1553 	size_t            buffer_overhead = 0;
1554 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1555 
1556 	if (use_fault_path) {
1557 		struct stackshot_fault_stats stats = (struct stackshot_fault_stats) {
1558 			.sfs_pages_faulted_in = 0,
1559 			.sfs_time_spent_faulting = 0,
1560 			.sfs_system_max_fault_time = stackshot_max_fault_time,
1561 			.sfs_stopped_faulting = false
1562 		};
1563 		percpu_foreach_base(base) {
1564 			struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
1565 			if (!cpu_ctx->scc_did_work) {
1566 				continue;
1567 			}
1568 			stats.sfs_pages_faulted_in += cpu_ctx->scc_fault_stats.sfs_pages_faulted_in;
1569 			stats.sfs_time_spent_faulting += cpu_ctx->scc_fault_stats.sfs_time_spent_faulting;
1570 			stats.sfs_stopped_faulting = stats.sfs_stopped_faulting || cpu_ctx->scc_fault_stats.sfs_stopped_faulting;
1571 		}
1572 		kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
1573 		    sizeof(struct stackshot_fault_stats), &stats);
1574 	}
1575 
1576 #if STACKSHOT_COLLECTS_LATENCY_INFO
1577 	int num_working_cpus = 0;
1578 	if (collect_latency_info) {
1579 		/* Add per-CPU latency info */
1580 		percpu_foreach(cpu_ctx, stackshot_cpu_ctx_percpu) {
1581 			if (cpu_ctx->scc_did_work) {
1582 				num_working_cpus++;
1583 			}
1584 		}
1585 		kcdata_compression_window_open(kcdata);
1586 		kcd_exit_on_error(kcdata_get_memory_addr_for_array(
1587 			    kcdata, STACKSHOT_KCTYPE_LATENCY_INFO_CPU, sizeof(struct stackshot_latency_cpu), num_working_cpus, &out_addr));
1588 		percpu_foreach_base(base) {
1589 			if (PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu)->scc_did_work) {
1590 				kcdata_memcpy(kcdata, out_addr, PERCPU_GET_WITH_BASE(base, stackshot_cpu_latency_percpu),
1591 				    sizeof(struct stackshot_latency_cpu));
1592 				out_addr += sizeof(struct stackshot_latency_cpu);
1593 			}
1594 		}
1595 		kcd_exit_on_error(kcdata_compression_window_close(kcdata));
1596 
1597 		/* Add up buffer info */
1598 		for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
1599 			struct stackshot_buffer *buf = &stackshot_ctx.sc_buffers[buf_idx];
1600 			buffer_used += os_atomic_load(&buf->ssb_used, relaxed);
1601 			buffer_overhead += os_atomic_load(&buf->ssb_overhead, relaxed);
1602 		}
1603 		stackshot_ctx.sc_latency.buffer_size = stackshot_ctx.sc_args.buffer_size;
1604 		stackshot_ctx.sc_latency.buffer_overhead = buffer_overhead;
1605 		stackshot_ctx.sc_latency.buffer_used = buffer_used;
1606 		stackshot_ctx.sc_latency.buffer_count = stackshot_ctx.sc_num_buffers;
1607 
1608 		/* Add overall latency info */
1609 		kcd_exit_on_error(kcdata_push_data(
1610 			    kcdata, STACKSHOT_KCTYPE_LATENCY_INFO,
1611 			    sizeof(stackshot_ctx.sc_latency), &stackshot_ctx.sc_latency));
1612 	}
1613 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1614 
1615 	if ((stackshot_flags & STACKSHOT_DO_COMPRESS) == 0) {
1616 		assert(!stackshot_ctx.sc_panic_stackshot);
1617 		kcd_exit_on_error(kcdata_get_memory_addr(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
1618 		    sizeof(struct stackshot_duration_v2), &out_addr));
1619 		struct stackshot_duration_v2 *duration_p = (void *) out_addr;
1620 		memcpy(duration_p, &stackshot_ctx.sc_duration, sizeof(*duration_p));
1621 		stackshot_duration_outer = (unaligned_u64 *) &duration_p->stackshot_duration_outer;
1622 		kcd_exit_on_error(kcdata_add_uint64_with_description(kcdata, stackshot_tries, "stackshot_tries"));
1623 	} else {
1624 		kcd_exit_on_error(kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_ctx.sc_duration), &stackshot_ctx.sc_duration));
1625 		stackshot_duration_outer = NULL;
1626 	}
1627 
1628 error_exit:
1629 	return error;
1630 }
1631 
1632 /**
1633  * Allocates the final kcdata buffer for a mulitithreaded stackshot,
1634  * where all of the per-task kcdata (and exclave kcdata) will end up.
1635  */
1636 __result_use_check
1637 static kern_return_t
stackshot_alloc_final_kcdata(void)1638 stackshot_alloc_final_kcdata(void)
1639 {
1640 	vm_offset_t   final_kcdata_buffer = 0;
1641 	kern_return_t error = KERN_SUCCESS;
1642 	uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1643 	    : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
1644 	    : KCDATA_BUFFER_BEGIN_STACKSHOT;
1645 
1646 	if (stackshot_ctx.sc_is_singlethreaded) {
1647 		return KERN_SUCCESS;
1648 	}
1649 
1650 	if ((error = kmem_alloc(kernel_map, &final_kcdata_buffer, stackshot_args.buffer_size,
1651 	    KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
1652 		os_log_error(OS_LOG_DEFAULT, "stackshot: final allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, stackshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
1653 		return KERN_RESOURCE_SHORTAGE;
1654 	}
1655 
1656 	stackshot_ctx.sc_finalized_kcdata = kcdata_memory_alloc_init(final_kcdata_buffer, hdr_tag,
1657 	    stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
1658 
1659 	if (stackshot_ctx.sc_finalized_kcdata == NULL) {
1660 		kmem_free(kernel_map, final_kcdata_buffer, stackshot_args.buffer_size);
1661 		return KERN_FAILURE;
1662 	}
1663 
1664 	return KERN_SUCCESS;
1665 }
1666 
1667 /**
1668  * Frees the final kcdata buffer.
1669  */
1670 static void
stackshot_free_final_kcdata(void)1671 stackshot_free_final_kcdata(void)
1672 {
1673 	if (stackshot_ctx.sc_is_singlethreaded || (stackshot_ctx.sc_finalized_kcdata == NULL)) {
1674 		return;
1675 	}
1676 
1677 	kmem_free(kernel_map, stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1678 	kcdata_memory_destroy(stackshot_ctx.sc_finalized_kcdata);
1679 	stackshot_ctx.sc_finalized_kcdata = NULL;
1680 }
1681 
1682 /**
1683  * Called once we exit the debugger trap to collate all of the separate linked
1684  * kcdata lists into one kcdata buffer. The calling thread will run this, and
1685  * it is guaranteed that nobody else is touching any stackshot state at this
1686  * point. In the case of a panic stackshot, this is never called since we only
1687  * use one thread.
1688  *
1689  * Called with interrupts enabled, stackshot subsys lock held.
1690  */
1691 __result_use_check
1692 static kern_return_t
stackshot_collect_kcdata(void)1693 stackshot_collect_kcdata(void)
1694 {
1695 	kern_return_t error = 0;
1696 	uint32_t      hdr_tag;
1697 
1698 	assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1699 	LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1700 
1701 	/* Allocate our final kcdata buffer. */
1702 	kcd_exit_on_error(stackshot_alloc_final_kcdata());
1703 	assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1704 
1705 	/* Setup compression if we need it. */
1706 	if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
1707 		hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1708 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
1709 		kcd_exit_on_error(kcdata_init_compress(stackshot_ctx.sc_finalized_kcdata, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
1710 	}
1711 
1712 	/* Copy over all of the pre task-iteration kcdata (to preserve order as if it were single-threaded) */
1713 	kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_pretask_kcdata));
1714 
1715 	/* Set each queue's cur_item to 0. */
1716 	for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1717 		os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_cur_item, 0, relaxed);
1718 	}
1719 
1720 	/*
1721 	 * Iterate over work queue(s) and copy the kcdata in.
1722 	 */
1723 	while (true) {
1724 		struct stackshot_workitem  *next_item = NULL;
1725 		struct stackshot_workqueue *next_queue = NULL;
1726 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1727 			struct stackshot_workqueue *queue = &stackshot_ctx.sc_workqueues[i];
1728 			size_t cur_item = os_atomic_load(&queue->sswq_cur_item, relaxed);
1729 
1730 			/* Check if we're done with this queue */
1731 			if (cur_item >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1732 				continue;
1733 			}
1734 
1735 			/* Check if this workitem should come next */
1736 			struct stackshot_workitem *item = &queue->sswq_items[cur_item];
1737 			if ((next_item == NULL) || (next_item->sswi_idx > item->sswi_idx)) {
1738 				next_item = item;
1739 				next_queue = queue;
1740 			}
1741 		}
1742 
1743 		/* Queues are empty. */
1744 		if (next_item == NULL) {
1745 			break;
1746 		}
1747 
1748 		assert(next_queue);
1749 		assert(next_item->sswi_data != NULL);
1750 
1751 		os_atomic_inc(&next_queue->sswq_cur_item, relaxed);
1752 		kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, next_item->sswi_data));
1753 	}
1754 
1755 	/* Write post-task kcdata */
1756 	kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_posttask_kcdata));
1757 error_exit:
1758 	if (error != KERN_SUCCESS) {
1759 		stackshot_free_final_kcdata();
1760 	}
1761 	return error;
1762 }
1763 
1764 
1765 /**
1766  * Called at the very end of stackshot data generation, to write final timing
1767  * data to the kcdata structure and close compression. Only called for
1768  * multi-threaded stackshots; see stackshot_finalize_singlethreaded_kcata for
1769  * single-threaded variant.
1770  *
1771  * Called with interrupts enabled, stackshot subsys lock held.
1772  */
1773 __result_use_check
1774 static kern_return_t
stackshot_finalize_kcdata(void)1775 stackshot_finalize_kcdata(void)
1776 {
1777 	kern_return_t error = 0;
1778 
1779 	assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1780 	LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1781 
1782 	assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1783 
1784 	/* Write stackshot timing info */
1785 	kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1786 
1787 	/* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1788 	kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1789 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1790 	kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1791 
1792 	stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1793 	stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1794 
1795 	if (os_atomic_load(&stackshot_ctx.sc_retval, relaxed) == KERN_SUCCESS) {
1796 		/* releases and zeros done */
1797 		kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1798 	}
1799 
1800 	memcpy(stackshot_args.buffer, (void*) stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1801 
1802 	/* Fix duration_outer offset */
1803 	if (stackshot_duration_outer != NULL) {
1804 		stackshot_duration_outer = (unaligned_u64*) ((mach_vm_address_t) stackshot_args.buffer + ((mach_vm_address_t) stackshot_duration_outer - stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin));
1805 	}
1806 
1807 error_exit:
1808 	stackshot_free_final_kcdata();
1809 	return error;
1810 }
1811 
1812 /**
1813  * Finalizes the kcdata for a singlethreaded stackshot.
1814  *
1815  * May be called from interrupt/panic context.
1816  */
1817 __result_use_check
1818 static kern_return_t
stackshot_finalize_singlethreaded_kcdata(void)1819 stackshot_finalize_singlethreaded_kcdata(void)
1820 {
1821 	kern_return_t error;
1822 
1823 	assert(stackshot_ctx.sc_is_singlethreaded);
1824 
1825 	kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1826 	/* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1827 	kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1828 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1829 	kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1830 
1831 	stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1832 	stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1833 
1834 	kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1835 
1836 	if (stackshot_ctx.sc_panic_stackshot) {
1837 		*stackshot_args.descriptor = *stackshot_ctx.sc_finalized_kcdata;
1838 	}
1839 
1840 error_exit:
1841 	return error;
1842 }
1843 
1844 /*
1845  * stackshot_remap_buffer:	Utility function to remap bytes_traced bytes starting at stackshotbuf
1846  *				into the current task's user space and subsequently copy out the address
1847  *				at which the buffer has been mapped in user space to out_buffer_addr.
1848  *
1849  * Inputs:			stackshotbuf - pointer to the original buffer in the kernel's address space
1850  *				bytes_traced - length of the buffer to remap starting from stackshotbuf
1851  *				out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
1852  *				out_size_addr - pointer to be filled in with the size of the buffer
1853  *
1854  * Outputs:			ENOSPC if there is not enough free space in the task's address space to remap the buffer
1855  *				EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
1856  *				an error from copyout
1857  */
1858 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)1859 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
1860 {
1861 	int                     error = 0;
1862 	mach_vm_offset_t        stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
1863 	vm_prot_t               cur_prot = VM_PROT_NONE, max_prot = VM_PROT_NONE;
1864 
1865 	error = mach_vm_remap(current_map(), &stackshotbuf_user_addr, bytes_traced, 0,
1866 	    VM_FLAGS_ANYWHERE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE,
1867 	    &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
1868 	/*
1869 	 * If the call to mach_vm_remap fails, we return the appropriate converted error
1870 	 */
1871 	if (error == KERN_SUCCESS) {
1872 		/* If the user addr somehow didn't get set, we should make sure that we fail, and (eventually)
1873 		 * panic on development kernels to find out why
1874 		 */
1875 		if (stackshotbuf_user_addr == (mach_vm_offset_t)NULL) {
1876 #if DEVELOPMENT || DEBUG
1877 			os_log_error(OS_LOG_DEFAULT, "stackshot: mach_vm_remap succeeded with NULL\n");
1878 #endif // DEVELOPMENT || DEBUG
1879 			return KERN_FAILURE;
1880 		}
1881 
1882 		/*
1883 		 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
1884 		 * we just made in the task's user space.
1885 		 */
1886 		error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
1887 		if (error != KERN_SUCCESS) {
1888 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1889 			return error;
1890 		}
1891 		error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
1892 		if (error != KERN_SUCCESS) {
1893 			mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1894 			return error;
1895 		}
1896 	}
1897 	return error;
1898 }
1899 
1900 #if CONFIG_EXCLAVES
1901 
1902 static kern_return_t
stackshot_setup_exclave_waitlist(void)1903 stackshot_setup_exclave_waitlist(void)
1904 {
1905 	kern_return_t error = KERN_SUCCESS;
1906 	size_t exclave_threads_max = exclaves_ipc_buffer_count();
1907 	size_t waitlist_size = 0;
1908 
1909 	assert(!stackshot_exclave_inspect_ctids);
1910 
1911 	if (exclaves_inspection_is_initialized() && exclave_threads_max) {
1912 		if (os_mul_overflow(exclave_threads_max, sizeof(ctid_t), &waitlist_size)) {
1913 			error = KERN_INVALID_ARGUMENT;
1914 			goto error;
1915 		}
1916 		stackshot_exclave_inspect_ctids = stackshot_alloc_with_size(waitlist_size, &error);
1917 		if (!stackshot_exclave_inspect_ctids) {
1918 			goto error;
1919 		}
1920 		stackshot_exclave_inspect_ctid_count = 0;
1921 		stackshot_exclave_inspect_ctid_capacity = exclave_threads_max;
1922 	}
1923 
1924 error:
1925 	return error;
1926 }
1927 
1928 static kern_return_t
collect_exclave_threads(uint64_t ss_flags)1929 collect_exclave_threads(uint64_t ss_flags)
1930 {
1931 	size_t i;
1932 	ctid_t ctid;
1933 	thread_t thread;
1934 	kern_return_t kr = KERN_SUCCESS;
1935 	STACKSHOT_SUBSYS_ASSERT_LOCKED();
1936 
1937 	lck_mtx_lock(&exclaves_collect_mtx);
1938 
1939 	if (stackshot_exclave_inspect_ctid_count == 0) {
1940 		/* Nothing to do */
1941 		goto out;
1942 	}
1943 
1944 	// When asking for ASIDs, make sure we get all exclaves asids and mappings as well
1945 	exclaves_stackshot_raw_addresses = (ss_flags & STACKSHOT_ASID);
1946 	exclaves_stackshot_all_address_spaces = (ss_flags & (STACKSHOT_ASID | STACKSHOT_EXCLAVES));
1947 
1948 	/* This error is intentionally ignored: we are now committed to collecting
1949 	 * these threads, or at least properly waking them. If this fails, the first
1950 	 * collected thread should also fail to append to the kcdata, and will abort
1951 	 * further collection, properly clearing the AST and waking these threads.
1952 	 */
1953 	kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_BEGIN,
1954 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
1955 
1956 	for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
1957 		ctid = stackshot_exclave_inspect_ctids[i];
1958 		thread = ctid_get_thread(ctid);
1959 		assert(thread);
1960 		exclaves_inspection_queue_add(&exclaves_inspection_queue_stackshot, &thread->th_exclaves_inspection_queue_stackshot);
1961 	}
1962 	exclaves_inspection_begin_collecting();
1963 	exclaves_inspection_wait_complete(&exclaves_inspection_queue_stackshot);
1964 	kr = stackshot_exclave_kr; /* Read the result of work done on our behalf, by collection thread */
1965 	if (kr != KERN_SUCCESS) {
1966 		goto out;
1967 	}
1968 
1969 	kr = kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_END,
1970 	    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
1971 	if (kr != KERN_SUCCESS) {
1972 		goto out;
1973 	}
1974 out:
1975 	/* clear Exclave buffer now that it's been used */
1976 	stackshot_exclave_inspect_ctids = NULL;
1977 	stackshot_exclave_inspect_ctid_capacity = 0;
1978 	stackshot_exclave_inspect_ctid_count = 0;
1979 
1980 	lck_mtx_unlock(&exclaves_collect_mtx);
1981 	return kr;
1982 }
1983 
1984 static kern_return_t
stackshot_exclaves_process_stacktrace(const address_v__opt_s * _Nonnull st,void * kcdata_ptr)1985 stackshot_exclaves_process_stacktrace(const address_v__opt_s *_Nonnull st, void *kcdata_ptr)
1986 {
1987 	kern_return_t error = KERN_SUCCESS;
1988 	exclave_ecstackentry_addr_t * addr = NULL;
1989 	__block size_t count = 0;
1990 
1991 	if (!st->has_value) {
1992 		goto error_exit;
1993 	}
1994 
1995 	address__v_visit(&st->value, ^(size_t __unused i, const stackshottypes_address_s __unused item) {
1996 		count++;
1997 	});
1998 
1999 	kcdata_compression_window_open(kcdata_ptr);
2000 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_ECSTACK,
2001 	    sizeof(exclave_ecstackentry_addr_t), count, (mach_vm_address_t*)&addr));
2002 
2003 	address__v_visit(&st->value, ^(size_t i, const stackshottypes_address_s item) {
2004 		addr[i] = (exclave_ecstackentry_addr_t)item;
2005 	});
2006 
2007 	kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2008 
2009 error_exit:
2010 	return error;
2011 }
2012 
2013 static kern_return_t
stackshot_exclaves_process_ipcstackentry(uint64_t index,const stackshottypes_ipcstackentry_s * _Nonnull ise,void * kcdata_ptr)2014 stackshot_exclaves_process_ipcstackentry(uint64_t index, const stackshottypes_ipcstackentry_s *_Nonnull ise, void *kcdata_ptr)
2015 {
2016 	kern_return_t error = KERN_SUCCESS;
2017 
2018 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2019 	    STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2020 
2021 	struct exclave_ipcstackentry_info info = { 0 };
2022 	info.eise_asid = ise->asid;
2023 
2024 	info.eise_tnid = ise->tnid;
2025 
2026 	if (ise->invocationid.has_value) {
2027 		info.eise_flags |= kExclaveIpcStackEntryHaveInvocationID;
2028 		info.eise_invocationid = ise->invocationid.value;
2029 	} else {
2030 		info.eise_invocationid = 0;
2031 	}
2032 
2033 	info.eise_flags |= (ise->stacktrace.has_value ? kExclaveIpcStackEntryHaveStack : 0);
2034 
2035 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_INFO, sizeof(struct exclave_ipcstackentry_info), &info));
2036 
2037 	if (ise->stacktrace.has_value) {
2038 		kcd_exit_on_error(stackshot_exclaves_process_stacktrace(&ise->stacktrace, kcdata_ptr));
2039 	}
2040 
2041 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2042 	    STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2043 
2044 error_exit:
2045 	return error;
2046 }
2047 
2048 static kern_return_t
stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s * _Nonnull ipcstack,void * kcdata_ptr)2049 stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s *_Nonnull ipcstack, void *kcdata_ptr)
2050 {
2051 	__block kern_return_t kr = KERN_SUCCESS;
2052 
2053 	if (!ipcstack->has_value) {
2054 		goto error_exit;
2055 	}
2056 
2057 	stackshottypes_ipcstackentry__v_visit(&ipcstack->value, ^(size_t i, const stackshottypes_ipcstackentry_s *_Nonnull item) {
2058 		if (kr == KERN_SUCCESS) {
2059 		        kr = stackshot_exclaves_process_ipcstackentry(i, item, kcdata_ptr);
2060 		}
2061 	});
2062 
2063 error_exit:
2064 	return kr;
2065 }
2066 
2067 static kern_return_t
stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s * _Nonnull se,void * kcdata_ptr)2068 stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s *_Nonnull se, void *kcdata_ptr)
2069 {
2070 	kern_return_t error = KERN_SUCCESS;
2071 
2072 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2073 	    STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2074 
2075 	struct exclave_scresult_info info = { 0 };
2076 	info.esc_id = se->scid;
2077 	info.esc_flags = se->ipcstack.has_value ? kExclaveScresultHaveIPCStack : 0;
2078 
2079 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_SCRESULT_INFO, sizeof(struct exclave_scresult_info), &info));
2080 
2081 	if (se->ipcstack.has_value) {
2082 		kcd_exit_on_error(stackshot_exclaves_process_ipcstack(&se->ipcstack, kcdata_ptr));
2083 	}
2084 
2085 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2086 	    STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2087 
2088 error_exit:
2089 	return error;
2090 }
2091 
2092 static kern_return_t
stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2093 stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2094 {
2095 	kern_return_t error = KERN_SUCCESS;
2096 	__block struct exclave_textlayout_segment * info = NULL;
2097 
2098 	__block size_t count = 0;
2099 	stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s __unused *_Nonnull item) {
2100 		count++;
2101 	});
2102 
2103 	if (!count) {
2104 		goto error_exit;
2105 	}
2106 
2107 	kcdata_compression_window_open(kcdata_ptr);
2108 	kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_SEGMENTS,
2109 	    sizeof(struct exclave_textlayout_segment), count, (mach_vm_address_t*)&info));
2110 
2111 	stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s *_Nonnull item) {
2112 		memcpy(&info->layoutSegment_uuid, item->uuid, sizeof(uuid_t));
2113 		if (want_raw_addresses) {
2114 		        info->layoutSegment_loadAddress = item->rawloadaddress.has_value ? item->rawloadaddress.value: 0;
2115 		} else {
2116 		        info->layoutSegment_loadAddress = item->loadaddress;
2117 		}
2118 		info++;
2119 	});
2120 
2121 	kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2122 
2123 error_exit:
2124 	return error;
2125 }
2126 
2127 static kern_return_t
stackshot_exclaves_process_textlayout(uint64_t index,const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2128 stackshot_exclaves_process_textlayout(uint64_t index, const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2129 {
2130 	kern_return_t error = KERN_SUCCESS;
2131 	__block struct exclave_textlayout_info info = { 0 };
2132 
2133 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2134 	    STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, index));
2135 
2136 	info.layout_id = tl->textlayoutid;
2137 
2138 	info.etl_flags = want_raw_addresses ? 0 : kExclaveTextLayoutLoadAddressesUnslid;
2139 
2140 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_INFO, sizeof(struct exclave_textlayout_info), &info));
2141 	kcd_exit_on_error(stackshot_exclaves_process_textlayout_segments(tl, kcdata_ptr, want_raw_addresses));
2142 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2143 	    STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, index));
2144 error_exit:
2145 	return error;
2146 }
2147 
2148 static kern_return_t
stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s * _Nonnull as,void * kcdata_ptr,bool want_raw_addresses)2149 stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s *_Nonnull as, void *kcdata_ptr, bool want_raw_addresses)
2150 {
2151 	kern_return_t error = KERN_SUCCESS;
2152 	struct exclave_addressspace_info info = { 0 };
2153 	__block size_t name_len = 0;
2154 	uint8_t * name = NULL;
2155 
2156 	u8__v_visit(&as->name, ^(size_t __unused i, const uint8_t __unused item) {
2157 		name_len++;
2158 	});
2159 
2160 	info.eas_id = as->asid;
2161 
2162 	if (want_raw_addresses && as->rawaddressslide.has_value) {
2163 		info.eas_flags = kExclaveAddressSpaceHaveSlide;
2164 		info.eas_slide = as->rawaddressslide.value;
2165 	} else {
2166 		info.eas_flags = 0;
2167 		info.eas_slide = UINT64_MAX;
2168 	}
2169 
2170 	info.eas_layoutid = as->textlayoutid; // text layout for this address space
2171 	info.eas_asroot = as->asroot.has_value ? as->asroot.value : 0;
2172 
2173 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2174 	    STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2175 	kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_INFO, sizeof(struct exclave_addressspace_info), &info));
2176 
2177 	if (name_len > 0) {
2178 		kcdata_compression_window_open(kcdata_ptr);
2179 		kcd_exit_on_error(kcdata_get_memory_addr(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_NAME, name_len + 1, (mach_vm_address_t*)&name));
2180 
2181 		u8__v_visit(&as->name, ^(size_t i, const uint8_t item) {
2182 			name[i] = item;
2183 		});
2184 		name[name_len] = 0;
2185 
2186 		kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2187 	}
2188 
2189 	kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2190 	    STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2191 error_exit:
2192 	return error;
2193 }
2194 
2195 kern_return_t
2196 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses);
2197 
2198 kern_return_t
stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s * result,void * kcdata_ptr,bool want_raw_addresses)2199 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses)
2200 {
2201 	__block kern_return_t kr = KERN_SUCCESS;
2202 
2203 	stackshot_stackshotentry__v_visit(&result->stackshotentries, ^(size_t __unused i, const stackshot_stackshotentry_s *_Nonnull item) {
2204 		if (kr == KERN_SUCCESS) {
2205 		        kr = stackshot_exclaves_process_stackshotentry(item, kcdata_ptr);
2206 		}
2207 	});
2208 
2209 	stackshottypes_addressspace__v_visit(&result->addressspaces, ^(size_t __unused i, const stackshottypes_addressspace_s *_Nonnull item) {
2210 		if (kr == KERN_SUCCESS) {
2211 		        kr = stackshot_exclaves_process_addressspace(item, kcdata_ptr, want_raw_addresses);
2212 		}
2213 	});
2214 
2215 	stackshottypes_textlayout__v_visit(&result->textlayouts, ^(size_t i, const stackshottypes_textlayout_s *_Nonnull item) {
2216 		if (kr == KERN_SUCCESS) {
2217 		        kr = stackshot_exclaves_process_textlayout(i, item, kcdata_ptr, want_raw_addresses);
2218 		}
2219 	});
2220 
2221 	return kr;
2222 }
2223 
2224 kern_return_t
2225 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses);
2226 
2227 kern_return_t
stackshot_exclaves_process_result(kern_return_t collect_kr,const stackshot_stackshotresult_s * result,bool want_raw_addresses)2228 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses)
2229 {
2230 	kern_return_t kr = KERN_SUCCESS;
2231 	if (result == NULL) {
2232 		return collect_kr;
2233 	}
2234 
2235 	kr = stackshot_exclaves_process_stackshot(result, stackshot_ctx.sc_finalized_kcdata, want_raw_addresses);
2236 
2237 	stackshot_exclave_kr = kr;
2238 
2239 	return kr;
2240 }
2241 
2242 
2243 static void
commit_exclaves_ast(void)2244 commit_exclaves_ast(void)
2245 {
2246 	size_t i = 0;
2247 	thread_t thread = NULL;
2248 	size_t count;
2249 
2250 	assert(debug_mode_active());
2251 
2252 	count = os_atomic_load(&stackshot_exclave_inspect_ctid_count, acquire);
2253 
2254 	if (stackshot_exclave_inspect_ctids) {
2255 		for (i = 0; i < count; ++i) {
2256 			thread = ctid_get_thread(stackshot_exclave_inspect_ctids[i]);
2257 			assert(thread);
2258 			thread_reference(thread);
2259 			os_atomic_or(&thread->th_exclaves_inspection_state, TH_EXCLAVES_INSPECTION_STACKSHOT, relaxed);
2260 		}
2261 	}
2262 }
2263 
2264 #endif /* CONFIG_EXCLAVES */
2265 
2266 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)2267 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
2268 {
2269 	int error = 0;
2270 	boolean_t prev_interrupt_state;
2271 	bool did_copyout = false;
2272 	uint32_t bytes_traced = 0;
2273 	uint32_t stackshot_estimate = 0;
2274 	struct kdp_snapshot_args snapshot_args;
2275 
2276 	void * buf_to_free = NULL;
2277 	int size_to_free = 0;
2278 	bool is_traced = false;    /* has FUNC_START tracepoint fired? */
2279 	uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
2280 
2281 	/* Parsed arguments */
2282 	uint64_t                out_buffer_addr;
2283 	uint64_t                out_size_addr;
2284 	uint32_t                size_hint = 0;
2285 
2286 	snapshot_args.pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
2287 
2288 	if (stackshot_config == NULL) {
2289 		return KERN_INVALID_ARGUMENT;
2290 	}
2291 #if DEVELOPMENT || DEBUG
2292 	/* TBD: ask stackshot clients to avoid issuing stackshots in this
2293 	 * configuration in lieu of the kernel feature override.
2294 	 */
2295 	if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
2296 		return KERN_NOT_SUPPORTED;
2297 	}
2298 #endif
2299 
2300 	switch (stackshot_config_version) {
2301 	case STACKSHOT_CONFIG_TYPE:
2302 		if (stackshot_config_size != sizeof(stackshot_config_t)) {
2303 			return KERN_INVALID_ARGUMENT;
2304 		}
2305 		stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
2306 		out_buffer_addr = config->sc_out_buffer_addr;
2307 		out_size_addr = config->sc_out_size_addr;
2308 		snapshot_args.pid = config->sc_pid;
2309 		snapshot_args.flags = config->sc_flags;
2310 		snapshot_args.since_timestamp = config->sc_delta_timestamp;
2311 		if (config->sc_size <= max_tracebuf_size) {
2312 			size_hint = config->sc_size;
2313 		}
2314 		/*
2315 		 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
2316 		 * dump every level if the pagetable_mask is not set
2317 		 */
2318 		if (snapshot_args.flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
2319 			snapshot_args.pagetable_mask = config->sc_pagetable_mask;
2320 		}
2321 		break;
2322 	default:
2323 		return KERN_NOT_SUPPORTED;
2324 	}
2325 
2326 	/*
2327 	 * Currently saving a kernel buffer and trylock are only supported from the
2328 	 * internal/KEXT API.
2329 	 */
2330 	if (stackshot_from_user) {
2331 		if (snapshot_args.flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
2332 			return KERN_NO_ACCESS;
2333 		}
2334 #if !DEVELOPMENT && !DEBUG
2335 		if (snapshot_args.flags & (STACKSHOT_DO_COMPRESS)) {
2336 			return KERN_NO_ACCESS;
2337 		}
2338 #endif
2339 	} else {
2340 		if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2341 			return KERN_NOT_SUPPORTED;
2342 		}
2343 	}
2344 
2345 	if (!((snapshot_args.flags & STACKSHOT_KCDATA_FORMAT) || (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
2346 		return KERN_NOT_SUPPORTED;
2347 	}
2348 
2349 	/* Compresssed delta stackshots or page dumps are not yet supported */
2350 	if (((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (snapshot_args.flags & STACKSHOT_PAGE_TABLES))
2351 	    && (snapshot_args.flags & STACKSHOT_DO_COMPRESS)) {
2352 		return KERN_NOT_SUPPORTED;
2353 	}
2354 
2355 	/*
2356 	 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
2357 	 */
2358 	if ((!out_buffer_addr || !out_size_addr) && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2359 		return KERN_INVALID_ARGUMENT;
2360 	}
2361 
2362 	if (snapshot_args.since_timestamp != 0 && ((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
2363 		return KERN_INVALID_ARGUMENT;
2364 	}
2365 
2366 	/* EXCLAVES and SKIP_EXCLAVES conflict */
2367 	if ((snapshot_args.flags & (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) == (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) {
2368 		return KERN_INVALID_ARGUMENT;
2369 	}
2370 
2371 #if CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS
2372 	if (!mt_core_supported) {
2373 		snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2374 	}
2375 #else /* CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS */
2376 	snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2377 #endif /* !CONFIG_PERVASIVE_CPI || !CONFIG_CPU_COUNTERS */
2378 
2379 	STACKSHOT_TESTPOINT(TP_WAIT_START_STACKSHOT);
2380 	STACKSHOT_SUBSYS_LOCK();
2381 
2382 	stackshot_tries = 0;
2383 
2384 	if (snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
2385 		/*
2386 		 * Don't overwrite an existing stackshot
2387 		 */
2388 		if (kernel_stackshot_buf != NULL) {
2389 			error = KERN_MEMORY_PRESENT;
2390 			goto error_early_exit;
2391 		}
2392 	} else if (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
2393 		if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
2394 			error = KERN_NOT_IN_SET;
2395 			goto error_early_exit;
2396 		}
2397 		error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
2398 		    out_buffer_addr, out_size_addr);
2399 		/*
2400 		 * If we successfully remapped the buffer into the user's address space, we
2401 		 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
2402 		 * and then clear the kernel stackshot pointer and associated size.
2403 		 */
2404 		if (error == KERN_SUCCESS) {
2405 			did_copyout = true;
2406 			buf_to_free = kernel_stackshot_buf;
2407 			size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
2408 			kernel_stackshot_buf = NULL;
2409 			kernel_stackshot_buf_size = 0;
2410 		}
2411 
2412 		goto error_early_exit;
2413 	}
2414 
2415 	if (snapshot_args.flags & STACKSHOT_GET_BOOT_PROFILE) {
2416 		void *bootprofile = NULL;
2417 		uint32_t len = 0;
2418 #if CONFIG_TELEMETRY
2419 		bootprofile_get(&bootprofile, &len);
2420 #endif
2421 		if (!bootprofile || !len) {
2422 			error = KERN_NOT_IN_SET;
2423 			goto error_early_exit;
2424 		}
2425 		error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
2426 		if (error == KERN_SUCCESS) {
2427 			did_copyout = true;
2428 		}
2429 		goto error_early_exit;
2430 	}
2431 
2432 	stackshot_duration_prior_abs = 0;
2433 	stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
2434 	snapshot_args.buffer_size = stackshot_estimate =
2435 	    get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj, snapshot_args.flags, snapshot_args.pid);
2436 	stackshot_initial_estimate = stackshot_estimate;
2437 
2438 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
2439 	    snapshot_args.flags, snapshot_args.buffer_size, snapshot_args.pid, snapshot_args.since_timestamp);
2440 	is_traced = true;
2441 
2442 #if CONFIG_EXCLAVES
2443 	assert(!stackshot_exclave_inspect_ctids);
2444 #endif
2445 
2446 	for (; snapshot_args.buffer_size <= max_tracebuf_size; snapshot_args.buffer_size = MIN(snapshot_args.buffer_size << 1, max_tracebuf_size)) {
2447 		stackshot_tries++;
2448 		if ((error = kmem_alloc(kernel_map, (vm_offset_t *)&snapshot_args.buffer, snapshot_args.buffer_size,
2449 		    KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
2450 			os_log_error(OS_LOG_DEFAULT, "stackshot: initial allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, snapshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
2451 			error = KERN_RESOURCE_SHORTAGE;
2452 			goto error_exit;
2453 		}
2454 
2455 		uint32_t hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2456 		    : (snapshot_args.flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2457 		    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2458 		#pragma unused(hdr_tag)
2459 
2460 		stackshot_duration_outer = NULL;
2461 
2462 		/* if compression was requested, allocate the extra zlib scratch area */
2463 		if (snapshot_args.flags & STACKSHOT_DO_COMPRESS) {
2464 			hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2465 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2466 			if (error != KERN_SUCCESS) {
2467 				os_log_error(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
2468 				    (int) error);
2469 				goto error_exit;
2470 			}
2471 		}
2472 
2473 		/* Prepare the compressor for a stackshot */
2474 		error = vm_compressor_kdp_init();
2475 		if (error != KERN_SUCCESS) {
2476 			goto error_exit;
2477 		}
2478 
2479 		/*
2480 		 * Disable interrupts and save the current interrupt state.
2481 		 */
2482 		prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
2483 		uint64_t time_start  = mach_absolute_time();
2484 
2485 		/* Emit a SOCD tracepoint that we are initiating a stackshot */
2486 		SOCD_TRACE_XNU_START(STACKSHOT);
2487 
2488 		/*
2489 		 * Load stackshot parameters.
2490 		 */
2491 		error = kdp_snapshot_preflight_internal(snapshot_args);
2492 
2493 		if (error == KERN_SUCCESS) {
2494 			error = stackshot_trap();
2495 		}
2496 
2497 		/* Emit a SOCD tracepoint that we have completed the stackshot */
2498 		SOCD_TRACE_XNU_END(STACKSHOT);
2499 		ml_set_interrupts_enabled(prev_interrupt_state);
2500 
2501 #if CONFIG_EXCLAVES
2502 		/* stackshot trap should only finish successfully or with no pending Exclave threads */
2503 		assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
2504 #endif
2505 
2506 		/*
2507 		 * Stackshot is no longer active.
2508 		 * (We have to do this here for the special interrupt disable timeout case to work)
2509 		 */
2510 		os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
2511 
2512 		/* Release compressor kdp buffers */
2513 		vm_compressor_kdp_teardown();
2514 
2515 		/* Record duration that interrupts were disabled */
2516 		uint64_t time_end = mach_absolute_time();
2517 		tot_interrupts_off_abs += (time_end - time_start);
2518 
2519 		/* Collect multithreaded kcdata into one finalized buffer */
2520 		if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
2521 			error = stackshot_collect_kcdata();
2522 		}
2523 
2524 #if CONFIG_EXCLAVES
2525 		if (stackshot_exclave_inspect_ctids) {
2526 			if (stackshot_exclave_inspect_ctid_count > 0) {
2527 				STACKSHOT_TESTPOINT(TP_START_COLLECTION);
2528 			}
2529 			error = collect_exclave_threads(snapshot_args.flags);
2530 		}
2531 #endif /* CONFIG_EXCLAVES */
2532 
2533 		if (error == KERN_SUCCESS) {
2534 			if (stackshot_ctx.sc_is_singlethreaded) {
2535 				error = stackshot_finalize_singlethreaded_kcdata();
2536 			} else {
2537 				error = stackshot_finalize_kcdata();
2538 			}
2539 
2540 			if ((error != KERN_SUCCESS) && (error != KERN_INSUFFICIENT_BUFFER_SIZE)) {
2541 				goto error_exit;
2542 			}
2543 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE && snapshot_args.buffer_size == max_tracebuf_size) {
2544 				os_log_error(OS_LOG_DEFAULT, "stackshot: final buffer size was insufficient at maximum size\n");
2545 				error = KERN_RESOURCE_SHORTAGE;
2546 				goto error_exit;
2547 			}
2548 		}
2549 
2550 		/* record the duration that interupts were disabled + kcdata was being finalized */
2551 		if (stackshot_duration_outer) {
2552 			*stackshot_duration_outer = mach_absolute_time() - time_start;
2553 		}
2554 
2555 		if (error != KERN_SUCCESS) {
2556 			os_log_error(OS_LOG_DEFAULT, "stackshot: debugger call failed: %d, try %llu, buffer %u estimate %u\n", (int)error, stackshot_tries, snapshot_args.buffer_size, stackshot_estimate);
2557 			kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2558 			snapshot_args.buffer = NULL;
2559 			if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
2560 				/*
2561 				 * If we didn't allocate a big enough buffer, deallocate and try again.
2562 				 */
2563 				KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
2564 				    time_end - time_start, stackshot_estimate, snapshot_args.buffer_size);
2565 				stackshot_duration_prior_abs += (time_end - time_start);
2566 				if (snapshot_args.buffer_size == max_tracebuf_size) {
2567 					os_log_error(OS_LOG_DEFAULT, "stackshot: initial buffer size was insufficient at maximum size\n");
2568 					error = KERN_RESOURCE_SHORTAGE;
2569 					goto error_exit;
2570 				}
2571 				continue;
2572 			} else {
2573 				goto error_exit;
2574 			}
2575 		}
2576 
2577 		bytes_traced = kdp_stack_snapshot_bytes_traced();
2578 		if (bytes_traced <= 0) {
2579 			error = KERN_ABORTED;
2580 			goto error_exit;
2581 		}
2582 
2583 		if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2584 			error = stackshot_remap_buffer(snapshot_args.buffer, bytes_traced, out_buffer_addr, out_size_addr);
2585 			if (error == KERN_SUCCESS) {
2586 				did_copyout = true;
2587 			}
2588 			goto error_exit;
2589 		}
2590 
2591 		if (!(snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT)) {
2592 			os_log_info(OS_LOG_DEFAULT, "stackshot: succeeded, traced %u bytes to %u buffer (estimate %u) try %llu\n", bytes_traced, snapshot_args.buffer_size, stackshot_estimate, stackshot_tries);
2593 		}
2594 
2595 		/*
2596 		 * Save the stackshot in the kernel buffer.
2597 		 */
2598 		kernel_stackshot_buf = snapshot_args.buffer;
2599 		kernel_stackshot_buf_size =  bytes_traced;
2600 		/*
2601 		 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
2602 		 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
2603 		 * update size_to_free for kmem_free accordingly.
2604 		 */
2605 		size_to_free = snapshot_args.buffer_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
2606 
2607 		assert(size_to_free >= 0);
2608 
2609 		if (size_to_free != 0) {
2610 			buf_to_free = (void *)((uint64_t)snapshot_args.buffer + snapshot_args.buffer_size - size_to_free);
2611 		}
2612 
2613 		snapshot_args.buffer = NULL;
2614 		snapshot_args.buffer_size = 0;
2615 		goto error_exit;
2616 	}
2617 
2618 error_exit:
2619 	if (is_traced) {
2620 		KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
2621 		    error, tot_interrupts_off_abs, snapshot_args.buffer_size, bytes_traced);
2622 	}
2623 
2624 error_early_exit:
2625 	if (snapshot_args.buffer != NULL) {
2626 		kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2627 	}
2628 	if (buf_to_free != NULL) {
2629 		kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
2630 	}
2631 
2632 	if (error == KERN_SUCCESS && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) && !did_copyout) {
2633 		/* If we return success, we must have done the copyout to userspace. If
2634 		 * we somehow did not, we need to indicate failure instead.
2635 		 */
2636 #if DEVELOPMENT || DEBUG
2637 		os_log_error(OS_LOG_DEFAULT, "stackshot: reached end without doing copyout\n");
2638 #endif // DEVELOPMENT || DEBUG
2639 		error = KERN_FAILURE;
2640 	}
2641 
2642 	STACKSHOT_SUBSYS_UNLOCK();
2643 	STACKSHOT_TESTPOINT(TP_STACKSHOT_DONE);
2644 
2645 	return error;
2646 }
2647 
2648 /*
2649  * Set up state and parameters for a stackshot.
2650  * (This runs on the calling CPU before other CPUs enter the debugger trap.)
2651  * Called when interrupts are disabled, but we're not in the debugger trap yet.
2652  */
2653 __result_use_check
2654 static kern_return_t
kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)2655 kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)
2656 {
2657 	kern_return_t error = KERN_SUCCESS;
2658 	uint64_t microsecs = 0, secs = 0;
2659 	bool is_panic = ((args.flags & STACKSHOT_FROM_PANIC) != 0);
2660 	bool process_scoped = (stackshot_args.pid != -1) &&
2661 	    ((stackshot_args.flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
2662 	bool is_singlethreaded = stackshot_single_thread || (process_scoped || is_panic || ((args.flags & STACKSHOT_PAGE_TABLES) != 0));
2663 	clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)&microsecs);
2664 
2665 	cur_stackshot_ctx_idx = (is_panic ? STACKSHOT_CTX_IDX_PANIC : STACKSHOT_CTX_IDX_NORMAL);
2666 
2667 	/* Setup overall state */
2668 	stackshot_ctx = (struct stackshot_context) {
2669 		.sc_args               = args,
2670 		.sc_state              = SS_SETUP,
2671 		.sc_bytes_traced       = 0,
2672 		.sc_bytes_uncompressed = 0,
2673 		.sc_microsecs          = microsecs + (secs * USEC_PER_SEC),
2674 		.sc_panic_stackshot    = is_panic,
2675 		.sc_is_singlethreaded  = is_singlethreaded,
2676 		.sc_cpus_working       = 0,
2677 		.sc_retval             = 0,
2678 		.sc_calling_cpuid      = cpu_number(),
2679 		.sc_main_cpuid         = is_singlethreaded ? cpu_number() : -1,
2680 		.sc_min_kcdata_size    = get_stackshot_est_tasksize(args.flags),
2681 		.sc_enable_faulting    = false,
2682 	};
2683 
2684 	if (!stackshot_ctx.sc_panic_stackshot) {
2685 #if defined(__AMP__)
2686 		/* On AMP systems, we want to split the buffers up by cluster to avoid cache line effects. */
2687 		stackshot_ctx.sc_num_buffers = is_singlethreaded ? 1 : ml_get_cluster_count();
2688 #else /* __AMP__ */
2689 		stackshot_ctx.sc_num_buffers = 1;
2690 #endif /* !__AMP__ */
2691 		size_t bufsz = args.buffer_size / stackshot_ctx.sc_num_buffers;
2692 		for (int buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
2693 			stackshot_ctx.sc_buffers[buf_idx] = (struct stackshot_buffer) {
2694 				.ssb_ptr = (void*) ((mach_vm_address_t) args.buffer + (bufsz * buf_idx)),
2695 				.ssb_size = bufsz,
2696 				.ssb_used = 0,
2697 				.ssb_freelist = NULL,
2698 				.ssb_freelist_lock = 0,
2699 				.ssb_overhead = 0
2700 			};
2701 		}
2702 
2703 		/* Setup per-cpu state */
2704 		percpu_foreach_base(base) {
2705 			*PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu) = (struct stackshot_cpu_context) { 0 };
2706 		}
2707 
2708 		if (is_singlethreaded) {
2709 			/* If the stackshot is singlethreaded, set up the kcdata - we don't bother with linked-list kcdata in singlethreaded mode. */
2710 			uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2711 			    : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2712 			    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2713 			kcdata_memory_static_init(stackshot_kcdata_p, (mach_vm_address_t) stackshot_args.buffer, hdr_tag,
2714 			    stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
2715 			if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
2716 				hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2717 				    : KCDATA_BUFFER_BEGIN_STACKSHOT;
2718 				kcd_exit_on_error(kcdata_init_compress(stackshot_kcdata_p, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
2719 			}
2720 			stackshot_cpu_ctx.scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES);
2721 		}
2722 	} else {
2723 		/*
2724 		 * If this is a panic stackshot, we need to handle things differently.
2725 		 * The panic code hands us a kcdata descriptor to work with instead of
2726 		 * us making one ourselves.
2727 		 */
2728 		*stackshot_kcdata_p = *stackshot_args.descriptor;
2729 		stackshot_cpu_ctx = (struct stackshot_cpu_context) {
2730 			.scc_can_work = true,
2731 			.scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES)
2732 		};
2733 #if STACKSHOT_COLLECTS_LATENCY_INFO
2734 		*(PERCPU_GET(stackshot_trace_buffer)) = (struct stackshot_trace_buffer) {};
2735 #endif
2736 	}
2737 
2738 	/* Set up our cpu state */
2739 	stackshot_cpu_preflight();
2740 
2741 error_exit:
2742 	return error;
2743 }
2744 
2745 /*
2746  * The old function signature for kdp_snapshot_preflight, used in the panic path.
2747  * Called when interrupts are disabled, but we're not in the debugger trap yet.
2748  */
2749 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)2750 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
2751     kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
2752 {
2753 	__assert_only kern_return_t err;
2754 	err = kdp_snapshot_preflight_internal((struct kdp_snapshot_args) {
2755 		.pid = pid,
2756 		.buffer = tracebuf,
2757 		.buffer_size = tracebuf_size,
2758 		.flags = flags,
2759 		.descriptor = data_p,
2760 		.since_timestamp = since_timestamp,
2761 		.pagetable_mask = pagetable_mask
2762 	});
2763 
2764 
2765 	/* This shouldn't ever return an error in the panic path. */
2766 	assert(err == KERN_SUCCESS);
2767 }
2768 
2769 static void
stackshot_reset_state(void)2770 stackshot_reset_state(void)
2771 {
2772 	stackshot_ctx = (struct stackshot_context) { 0 };
2773 }
2774 
2775 void
panic_stackshot_reset_state(void)2776 panic_stackshot_reset_state(void)
2777 {
2778 	stackshot_reset_state();
2779 }
2780 
2781 boolean_t
stackshot_active(void)2782 stackshot_active(void)
2783 {
2784 	return os_atomic_load(&stackshot_ctx.sc_state, relaxed) != SS_INACTIVE;
2785 }
2786 
2787 boolean_t
panic_stackshot_active(void)2788 panic_stackshot_active(void)
2789 {
2790 	return os_atomic_load(&stackshot_contexts[STACKSHOT_CTX_IDX_PANIC].sc_state, relaxed) != SS_INACTIVE;
2791 }
2792 
2793 uint32_t
kdp_stack_snapshot_bytes_traced(void)2794 kdp_stack_snapshot_bytes_traced(void)
2795 {
2796 	return stackshot_ctx.sc_bytes_traced;
2797 }
2798 
2799 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)2800 kdp_stack_snapshot_bytes_uncompressed(void)
2801 {
2802 	return stackshot_ctx.sc_bytes_uncompressed;
2803 }
2804 
2805 static boolean_t
memory_iszero(void * addr,size_t size)2806 memory_iszero(void *addr, size_t size)
2807 {
2808 	char *data = (char *)addr;
2809 	for (size_t i = 0; i < size; i++) {
2810 		if (data[i] != 0) {
2811 			return FALSE;
2812 		}
2813 	}
2814 	return TRUE;
2815 }
2816 
2817 static void
_stackshot_validation_reset(void)2818 _stackshot_validation_reset(void)
2819 {
2820 	percpu_foreach_base(base) {
2821 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2822 		cpu_ctx->scc_validation_state.last_valid_page_kva = -1;
2823 		cpu_ctx->scc_validation_state.last_valid_size = 0;
2824 	}
2825 }
2826 
2827 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)2828 _stackshot_validate_kva(vm_offset_t addr, size_t size)
2829 {
2830 	vm_offset_t page_addr = atop_kernel(addr);
2831 	if (stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva == page_addr &&
2832 	    stackshot_cpu_ctx.scc_validation_state.last_valid_size <= size) {
2833 		return true;
2834 	}
2835 
2836 	if (ml_validate_nofault(addr, size)) {
2837 		stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva = page_addr;
2838 		stackshot_cpu_ctx.scc_validation_state.last_valid_size = size;
2839 		return true;
2840 	}
2841 	return false;
2842 }
2843 
2844 static long
_stackshot_strlen(const char * s,size_t maxlen)2845 _stackshot_strlen(const char *s, size_t maxlen)
2846 {
2847 	size_t len = 0;
2848 	for (len = 0; _stackshot_validate_kva((vm_offset_t)s, 1); len++, s++) {
2849 		if (*s == 0) {
2850 			return len;
2851 		}
2852 		if (len >= maxlen) {
2853 			return -1;
2854 		}
2855 	}
2856 	return -1; /* failed before end of string */
2857 }
2858 
2859 
2860 static size_t
stackshot_plh_est_size(void)2861 stackshot_plh_est_size(void)
2862 {
2863 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
2864 	size_t size = STASKSHOT_PLH_SIZE(stackshot_port_label_size);
2865 
2866 	if (size == 0) {
2867 		return 0;
2868 	}
2869 #define SIZE_EST(x) ROUNDUP((x), sizeof (uintptr_t))
2870 	return SIZE_EST(size * sizeof(*plh->plh_array)) +
2871 	       SIZE_EST(size * sizeof(*plh->plh_chains)) +
2872 	       SIZE_EST(size * sizeof(*stackshot_cpu_ctx.scc_plh_gen.pgs_gen) * real_ncpus) +
2873 	       SIZE_EST((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh->plh_hash));
2874 #undef SIZE_EST
2875 }
2876 
2877 static void
stackshot_plh_reset(void)2878 stackshot_plh_reset(void)
2879 {
2880 	stackshot_ctx.sc_plh = (struct port_label_hash){.plh_size = 0};  /* structure assignment */
2881 }
2882 
2883 static kern_return_t
stackshot_plh_setup(void)2884 stackshot_plh_setup(void)
2885 {
2886 	kern_return_t error;
2887 	size_t size;
2888 	bool percpu_alloc_failed = false;
2889 	struct port_label_hash plh = {
2890 		.plh_size = STASKSHOT_PLH_SIZE(stackshot_port_label_size),
2891 		.plh_count = 0,
2892 	};
2893 
2894 	stackshot_plh_reset();
2895 
2896 	percpu_foreach_base(base) {
2897 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2898 		cpu_ctx->scc_plh_gen = (struct _stackshot_plh_gen_state){
2899 			.pgs_gen = NULL,
2900 			.pgs_curgen = 1,
2901 			.pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX,
2902 			.pgs_curgen_max = 0,
2903 		};
2904 	}
2905 
2906 	size = plh.plh_size;
2907 	if (size == 0) {
2908 		return KERN_SUCCESS;
2909 	}
2910 	plh.plh_array = stackshot_alloc_with_size(size * sizeof(*plh.plh_array), &error);
2911 	plh.plh_chains = stackshot_alloc_with_size(size * sizeof(*plh.plh_chains), &error);
2912 	percpu_foreach_base(base) {
2913 		struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2914 		cpu_ctx->scc_plh_gen.pgs_gen = stackshot_alloc_with_size(size * sizeof(*cpu_ctx->scc_plh_gen.pgs_gen), &error);
2915 		if (cpu_ctx->scc_plh_gen.pgs_gen == NULL) {
2916 			percpu_alloc_failed = true;
2917 			break;
2918 		}
2919 		for (int x = 0; x < size; x++) {
2920 			cpu_ctx->scc_plh_gen.pgs_gen[x] = 0;
2921 		}
2922 	}
2923 	plh.plh_hash = stackshot_alloc_with_size((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh.plh_hash), &error);
2924 	if (error != KERN_SUCCESS) {
2925 		return error;
2926 	}
2927 	if (plh.plh_array == NULL || plh.plh_chains == NULL || percpu_alloc_failed || plh.plh_hash == NULL) {
2928 		PLH_STAT_OP(os_atomic_inc(&stackshot_ctx.sc_plh.plh_bad, relaxed));
2929 		return KERN_SUCCESS;
2930 	}
2931 	for (int x = 0; x < size; x++) {
2932 		plh.plh_array[x] = NULL;
2933 		plh.plh_chains[x] = -1;
2934 	}
2935 	for (int x = 0; x < (1ul << STACKSHOT_PLH_SHIFT); x++) {
2936 		plh.plh_hash[x] = -1;
2937 	}
2938 	stackshot_ctx.sc_plh = plh;  /* structure assignment */
2939 	return KERN_SUCCESS;
2940 }
2941 
2942 static int16_t
stackshot_plh_hash(struct ipc_service_port_label * ispl)2943 stackshot_plh_hash(struct ipc_service_port_label *ispl)
2944 {
2945 	uintptr_t ptr = (uintptr_t)ispl;
2946 	static_assert(STACKSHOT_PLH_SHIFT < 16, "plh_hash must fit in 15 bits");
2947 #define PLH_HASH_STEP(ptr, x) \
2948 	    ((((x) * STACKSHOT_PLH_SHIFT) < (sizeof(ispl) * CHAR_BIT)) ? ((ptr) >> ((x) * STACKSHOT_PLH_SHIFT)) : 0)
2949 	ptr ^= PLH_HASH_STEP(ptr, 16);
2950 	ptr ^= PLH_HASH_STEP(ptr, 8);
2951 	ptr ^= PLH_HASH_STEP(ptr, 4);
2952 	ptr ^= PLH_HASH_STEP(ptr, 2);
2953 	ptr ^= PLH_HASH_STEP(ptr, 1);
2954 #undef PLH_HASH_STEP
2955 	return (int16_t)(ptr & ((1ul << STACKSHOT_PLH_SHIFT) - 1));
2956 }
2957 
2958 enum stackshot_plh_lookup_type {
2959 	STACKSHOT_PLH_LOOKUP_UNKNOWN,
2960 	STACKSHOT_PLH_LOOKUP_SEND,
2961 	STACKSHOT_PLH_LOOKUP_RECEIVE,
2962 };
2963 
2964 static void
stackshot_plh_resetgen(void)2965 stackshot_plh_resetgen(void)
2966 {
2967 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
2968 	uint16_t plh_size = stackshot_ctx.sc_plh.plh_size;
2969 
2970 	if (pgs->pgs_curgen_min == STACKSHOT_PLH_SIZE_MAX && pgs->pgs_curgen_max == 0) {
2971 		return;  // no lookups, nothing using the current generation
2972 	}
2973 	pgs->pgs_curgen++;
2974 	pgs->pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX;
2975 	pgs->pgs_curgen_max = 0;
2976 	if (pgs->pgs_curgen == 0) { // wrapped, zero the array and increment the generation
2977 		for (int x = 0; x < plh_size; x++) {
2978 			pgs->pgs_gen[x] = 0;
2979 		}
2980 		pgs->pgs_curgen = 1;
2981 	}
2982 }
2983 
2984 static int16_t
stackshot_plh_lookup_locked(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)2985 stackshot_plh_lookup_locked(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
2986 {
2987 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
2988 	int depth;
2989 	int16_t cur;
2990 	if (ispl == NULL) {
2991 		return STACKSHOT_PORTLABELID_NONE;
2992 	}
2993 	switch (type) {
2994 	case STACKSHOT_PLH_LOOKUP_SEND:
2995 		PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_send, relaxed));
2996 		break;
2997 	case STACKSHOT_PLH_LOOKUP_RECEIVE:
2998 		PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_receive, relaxed));
2999 		break;
3000 	default:
3001 		break;
3002 	}
3003 	PLH_STAT_OP(os_atomic_inc(&plh->plh_lookups, relaxed));
3004 	if (plh->plh_size == 0) {
3005 		return STACKSHOT_PORTLABELID_MISSING;
3006 	}
3007 	int16_t hash = stackshot_plh_hash(ispl);
3008 	assert(hash >= 0 && hash < (1ul << STACKSHOT_PLH_SHIFT));
3009 	depth = 0;
3010 	for (cur = plh->plh_hash[hash]; cur >= 0; cur = plh->plh_chains[cur]) {
3011 		/* cur must be in-range, and chain depth can never be above our # allocated */
3012 		if (cur >= plh->plh_count || depth > plh->plh_count || depth > plh->plh_size) {
3013 			PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3014 			PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3015 			return STACKSHOT_PORTLABELID_MISSING;
3016 		}
3017 		assert(cur < plh->plh_count);
3018 		if (plh->plh_array[cur] == ispl) {
3019 			PLH_STAT_OP(os_atomic_inc(&plh->plh_found, relaxed));
3020 			PLH_STAT_OP(os_atomic_add(&plh->plh_found_depth, depth, relaxed));
3021 			goto found;
3022 		}
3023 		depth++;
3024 	}
3025 	/* not found in hash table, so alloc and insert it */
3026 	if (cur != -1) {
3027 		PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3028 		PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3029 		return STACKSHOT_PORTLABELID_MISSING; /* bad end of chain */
3030 	}
3031 	PLH_STAT_OP(os_atomic_inc(&plh->plh_insert, relaxed));
3032 	PLH_STAT_OP(os_atomic_add(&plh->plh_insert_depth, depth, relaxed));
3033 	if (plh->plh_count >= plh->plh_size) {
3034 		return STACKSHOT_PORTLABELID_MISSING; /* no space */
3035 	}
3036 	cur = plh->plh_count;
3037 	plh->plh_count++;
3038 	plh->plh_array[cur] = ispl;
3039 	plh->plh_chains[cur] = plh->plh_hash[hash];
3040 	plh->plh_hash[hash] = cur;
3041 found:  ;
3042 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3043 	pgs->pgs_gen[cur] = pgs->pgs_curgen;
3044 	if (pgs->pgs_curgen_min > cur) {
3045 		pgs->pgs_curgen_min = cur;
3046 	}
3047 	if (pgs->pgs_curgen_max < cur) {
3048 		pgs->pgs_curgen_max = cur;
3049 	}
3050 	return cur + 1;   /* offset to avoid 0 */
3051 }
3052 
3053 static kern_return_t
kdp_stackshot_plh_record_locked(void)3054 kdp_stackshot_plh_record_locked(void)
3055 {
3056 	kern_return_t error = KERN_SUCCESS;
3057 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3058 	struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3059 	uint16_t count = plh->plh_count;
3060 	uint8_t curgen = pgs->pgs_curgen;
3061 	int16_t curgen_min = pgs->pgs_curgen_min;
3062 	int16_t curgen_max = pgs->pgs_curgen_max;
3063 	if (curgen_min <= curgen_max && curgen_max < count &&
3064 	    count <= plh->plh_size && plh->plh_size <= STACKSHOT_PLH_SIZE_MAX) {
3065 		struct ipc_service_port_label **arr = plh->plh_array;
3066 		size_t ispl_size, max_namelen;
3067 		kdp_ipc_splabel_size(&ispl_size, &max_namelen);
3068 		for (int idx = curgen_min; idx <= curgen_max; idx++) {
3069 			struct ipc_service_port_label *ispl = arr[idx];
3070 			struct portlabel_info spl = {
3071 				.portlabel_id = (idx + 1),
3072 			};
3073 			const char *name = NULL;
3074 			long name_sz = 0;
3075 			if (pgs->pgs_gen[idx] != curgen) {
3076 				continue;
3077 			}
3078 			if (_stackshot_validate_kva((vm_offset_t)ispl, ispl_size)) {
3079 				kdp_ipc_fill_splabel(ispl, &spl, &name);
3080 			}
3081 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3082 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3083 			if (name != NULL && (name_sz = _stackshot_strlen(name, max_namelen)) > 0) {   /* validates the kva */
3084 				kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL_NAME, name_sz + 1, name));
3085 			} else {
3086 				spl.portlabel_flags |= STACKSHOT_PORTLABEL_READFAILED;
3087 			}
3088 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL, sizeof(spl), &spl));
3089 			kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3090 			    STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3091 		}
3092 	}
3093 
3094 error_exit:
3095 	return error;
3096 }
3097 
3098 // record any PLH referenced since the last stackshot_plh_resetgen() call
3099 static kern_return_t
kdp_stackshot_plh_record(void)3100 kdp_stackshot_plh_record(void)
3101 {
3102 	kern_return_t error;
3103 	plh_lock(&stackshot_ctx.sc_plh);
3104 	error = kdp_stackshot_plh_record_locked();
3105 	plh_unlock(&stackshot_ctx.sc_plh);
3106 	return error;
3107 }
3108 
3109 static int16_t
stackshot_plh_lookup(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3110 stackshot_plh_lookup(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3111 {
3112 	int16_t result;
3113 	plh_lock(&stackshot_ctx.sc_plh);
3114 	result = stackshot_plh_lookup_locked(ispl, type);
3115 	plh_unlock(&stackshot_ctx.sc_plh);
3116 	return result;
3117 }
3118 
3119 #if DEVELOPMENT || DEBUG
3120 static kern_return_t
kdp_stackshot_plh_stats(void)3121 kdp_stackshot_plh_stats(void)
3122 {
3123 	kern_return_t error = KERN_SUCCESS;
3124 	struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3125 
3126 #define PLH_STAT(x) do { if (os_atomic_load(&plh->x, relaxed) != 0) { \
3127 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, os_atomic_load(&plh->x, relaxed), "stackshot_" #x)); \
3128 } } while (0)
3129 	PLH_STAT(plh_size);
3130 	PLH_STAT(plh_lookups);
3131 	PLH_STAT(plh_found);
3132 	PLH_STAT(plh_found_depth);
3133 	PLH_STAT(plh_insert);
3134 	PLH_STAT(plh_insert_depth);
3135 	PLH_STAT(plh_bad);
3136 	PLH_STAT(plh_bad_depth);
3137 	PLH_STAT(plh_lookup_send);
3138 	PLH_STAT(plh_lookup_receive);
3139 #undef PLH_STAT
3140 
3141 error_exit:
3142 	return error;
3143 }
3144 #endif /* DEVELOPMENT || DEBUG */
3145 
3146 static uint64_t
kcdata_get_task_ss_flags(task_t task)3147 kcdata_get_task_ss_flags(task_t task)
3148 {
3149 	uint64_t ss_flags = 0;
3150 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3151 	void *bsd_info = get_bsdtask_info(task);
3152 
3153 	if (task_64bit_addr) {
3154 		ss_flags |= kUser64_p;
3155 	}
3156 	if (!task->active || task_is_a_corpse(task) || proc_exiting(bsd_info)) {
3157 		ss_flags |= kTerminatedSnapshot;
3158 	}
3159 	if (task->pidsuspended) {
3160 		ss_flags |= kPidSuspended;
3161 	}
3162 	if (task->frozen) {
3163 		ss_flags |= kFrozen;
3164 	}
3165 	if (task->effective_policy.tep_darwinbg == 1) {
3166 		ss_flags |= kTaskDarwinBG;
3167 	}
3168 	if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
3169 		ss_flags |= kTaskIsForeground;
3170 	}
3171 	if (task->requested_policy.trp_boosted == 1) {
3172 		ss_flags |= kTaskIsBoosted;
3173 	}
3174 	if (task->effective_policy.tep_sup_active == 1) {
3175 		ss_flags |= kTaskIsSuppressed;
3176 	}
3177 #if CONFIG_MEMORYSTATUS
3178 
3179 	boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
3180 	memorystatus_proc_flags_unsafe(bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
3181 	if (dirty) {
3182 		ss_flags |= kTaskIsDirty;
3183 	}
3184 	if (dirty_tracked) {
3185 		ss_flags |= kTaskIsDirtyTracked;
3186 	}
3187 	if (allow_idle_exit) {
3188 		ss_flags |= kTaskAllowIdleExit;
3189 	}
3190 
3191 #endif
3192 	if (task->effective_policy.tep_tal_engaged) {
3193 		ss_flags |= kTaskTALEngaged;
3194 	}
3195 
3196 	ss_flags |= workqueue_get_task_ss_flags_from_pwq_state_kdp(bsd_info);
3197 
3198 #if IMPORTANCE_INHERITANCE
3199 	if (task->task_imp_base) {
3200 		if (task->task_imp_base->iit_donor) {
3201 			ss_flags |= kTaskIsImpDonor;
3202 		}
3203 		if (task->task_imp_base->iit_live_donor) {
3204 			ss_flags |= kTaskIsLiveImpDonor;
3205 		}
3206 	}
3207 #endif
3208 	return ss_flags;
3209 }
3210 
3211 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)3212 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
3213 {
3214 	kern_return_t error = KERN_SUCCESS;
3215 
3216 	uint64_t shared_cache_slide = 0;
3217 	uint64_t shared_cache_first_mapping = 0;
3218 	uint32_t kdp_fault_results = 0;
3219 	uint32_t shared_cache_id = 0;
3220 	struct dyld_shared_cache_loadinfo shared_cache_data = {0};
3221 
3222 
3223 	assert(task_snap_ss_flags != NULL);
3224 
3225 	/* Get basic info about the shared region pointer, regardless of any failures */
3226 	if (task->shared_region == NULL) {
3227 		*task_snap_ss_flags |= kTaskSharedRegionNone;
3228 	} else if (task->shared_region == primary_system_shared_region) {
3229 		*task_snap_ss_flags |= kTaskSharedRegionSystem;
3230 	} else {
3231 		*task_snap_ss_flags |= kTaskSharedRegionOther;
3232 	}
3233 
3234 	if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
3235 		struct vm_shared_region *sr = task->shared_region;
3236 		shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
3237 
3238 		shared_cache_id = sr->sr_id;
3239 	} else {
3240 		*task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
3241 		goto error_exit;
3242 	}
3243 
3244 	/* We haven't copied in the shared region UUID yet as part of setup */
3245 	if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
3246 		goto error_exit;
3247 	}
3248 
3249 
3250 	/*
3251 	 * No refcounting here, but we are in debugger context, so that should be safe.
3252 	 */
3253 	shared_cache_slide = task->shared_region->sr_slide;
3254 
3255 	if (task->shared_region == primary_system_shared_region) {
3256 		/* skip adding shared cache info -- it's the same as the system level one */
3257 		goto error_exit;
3258 	}
3259 	/*
3260 	 * New-style shared cache reference: for non-primary shared regions,
3261 	 * just include the ID of the shared cache we're attached to.  Consumers
3262 	 * should use the following info from the task's ts_ss_flags as well:
3263 	 *
3264 	 * kTaskSharedRegionNone - task is not attached to a shared region
3265 	 * kTaskSharedRegionSystem - task is attached to the shared region
3266 	 *     with kSharedCacheSystemPrimary set in sharedCacheFlags.
3267 	 * kTaskSharedRegionOther - task is attached to the shared region with
3268 	 *     sharedCacheID matching the STACKSHOT_KCTYPE_SHAREDCACHE_ID entry.
3269 	 */
3270 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_ID, sizeof(shared_cache_id), &shared_cache_id));
3271 
3272 	/*
3273 	 * For backwards compatibility; this should eventually be removed.
3274 	 *
3275 	 * Historically, this data was in a dyld_uuid_info_64 structure, but the
3276 	 * naming of both the structure and fields for this use wasn't great.  The
3277 	 * dyld_shared_cache_loadinfo structure has better names, but the same
3278 	 * layout and content as the original.
3279 	 *
3280 	 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
3281 	 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
3282 	 * entries; here, it's the slid first mapping, and we leave it that way
3283 	 * for backwards compatibility.
3284 	 */
3285 	shared_cache_data.sharedCacheSlide = shared_cache_slide;
3286 	kdp_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
3287 	shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
3288 	shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
3289 	kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
3290 
3291 error_exit:
3292 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3293 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
3294 	}
3295 
3296 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3297 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3298 	}
3299 
3300 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3301 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3302 	}
3303 
3304 	return error;
3305 }
3306 
3307 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)3308 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
3309 {
3310 	bool save_loadinfo_p         = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
3311 	bool save_kextloadinfo_p     = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
3312 	bool save_compactinfo_p      = ((trace_flags & STACKSHOT_SAVE_DYLD_COMPACTINFO) != 0);
3313 	bool should_fault            = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
3314 
3315 	kern_return_t error        = KERN_SUCCESS;
3316 	mach_vm_address_t out_addr = 0;
3317 
3318 	mach_vm_address_t dyld_compactinfo_addr = 0;
3319 	uint32_t dyld_compactinfo_size = 0;
3320 
3321 	uint32_t uuid_info_count         = 0;
3322 	mach_vm_address_t uuid_info_addr = 0;
3323 	uint64_t uuid_info_timestamp     = 0;
3324 	#pragma unused(uuid_info_timestamp)
3325 	kdp_fault_result_flags_t kdp_fault_results = 0;
3326 
3327 
3328 	assert(task_snap_ss_flags != NULL);
3329 
3330 	int task_pid     = pid_from_task(task);
3331 	boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3332 
3333 	if ((save_loadinfo_p || save_compactinfo_p) && have_pmap && task->active && task_pid > 0) {
3334 		/* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
3335 		if (task_64bit_addr) {
3336 			struct user64_dyld_all_image_infos task_image_infos;
3337 			if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3338 			    sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3339 				uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
3340 				uuid_info_addr = task_image_infos.uuidArray;
3341 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3342 					uuid_info_timestamp = task_image_infos.timestamp;
3343 				}
3344 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3345 					dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3346 					dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3347 				}
3348 
3349 			}
3350 		} else {
3351 			struct user32_dyld_all_image_infos task_image_infos;
3352 			if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3353 			    sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3354 				uuid_info_count = task_image_infos.uuidArrayCount;
3355 				uuid_info_addr = task_image_infos.uuidArray;
3356 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3357 					uuid_info_timestamp = task_image_infos.timestamp;
3358 				}
3359 				if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3360 					dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3361 					dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3362 				}
3363 			}
3364 		}
3365 
3366 		/*
3367 		 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
3368 		 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
3369 		 * for this task.
3370 		 */
3371 		if (!uuid_info_addr) {
3372 			uuid_info_count = 0;
3373 		}
3374 
3375 		if (!dyld_compactinfo_addr) {
3376 			dyld_compactinfo_size = 0;
3377 		}
3378 
3379 	}
3380 
3381 	if (have_pmap && task_pid == 0) {
3382 		if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
3383 			uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
3384 		} else {
3385 			uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
3386 		}
3387 	}
3388 
3389 	if (save_compactinfo_p && task_pid > 0) {
3390 		if (dyld_compactinfo_size == 0) {
3391 			*task_snap_ss_flags |= kTaskDyldCompactInfoNone;
3392 		} else if (dyld_compactinfo_size > MAX_DYLD_COMPACTINFO) {
3393 			*task_snap_ss_flags |= kTaskDyldCompactInfoTooBig;
3394 		} else {
3395 			kdp_fault_result_flags_t ci_kdp_fault_results = 0;
3396 
3397 			/* Open a compression window to avoid overflowing the stack */
3398 			kcdata_compression_window_open(kcd);
3399 			kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_DYLD_COMPACTINFO,
3400 			    dyld_compactinfo_size, &out_addr));
3401 
3402 			if (!stackshot_copyin(task->map, dyld_compactinfo_addr, (void *)out_addr,
3403 			    dyld_compactinfo_size, should_fault, &ci_kdp_fault_results)) {
3404 				bzero((void *)out_addr, dyld_compactinfo_size);
3405 			}
3406 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3407 				*task_snap_ss_flags |= kTaskDyldCompactInfoMissing;
3408 			}
3409 
3410 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3411 				*task_snap_ss_flags |= kTaskDyldCompactInfoTriedFault;
3412 			}
3413 
3414 			if (ci_kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3415 				*task_snap_ss_flags |= kTaskDyldCompactInfoFaultedIn;
3416 			}
3417 
3418 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
3419 		}
3420 	}
3421 	if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
3422 		uint32_t copied_uuid_count = 0;
3423 		uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
3424 		uint32_t uuid_info_array_size = 0;
3425 
3426 		/* Open a compression window to avoid overflowing the stack */
3427 		kcdata_compression_window_open(kcd);
3428 
3429 		/* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
3430 		if (uuid_info_count > 0) {
3431 			uuid_info_array_size = uuid_info_count * uuid_info_size;
3432 
3433 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3434 			    uuid_info_size, uuid_info_count, &out_addr));
3435 
3436 			if (!stackshot_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
3437 				bzero((void *)out_addr, uuid_info_array_size);
3438 			} else {
3439 				copied_uuid_count = uuid_info_count;
3440 			}
3441 		}
3442 
3443 		uuid_t binary_uuid;
3444 		if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
3445 			/* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
3446 			if (uuid_info_array_size == 0) {
3447 				/* We just need to store one UUID */
3448 				uuid_info_array_size = uuid_info_size;
3449 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3450 				    uuid_info_size, 1, &out_addr));
3451 			}
3452 
3453 			if (task_64bit_addr) {
3454 				struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
3455 				uint64_t image_load_address = task->mach_header_vm_address;
3456 
3457 				kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3458 				kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3459 			} else {
3460 				struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
3461 				uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
3462 
3463 				kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3464 				kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3465 			}
3466 		}
3467 
3468 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
3469 	} else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
3470 		uintptr_t image_load_address;
3471 
3472 		do {
3473 #if defined(__arm64__)
3474 			if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
3475 				struct dyld_uuid_info_64 kc_uuid = {0};
3476 				kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3477 				kdp_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
3478 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
3479 				break;
3480 			}
3481 #endif /* defined(__arm64__) */
3482 
3483 			if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
3484 				/* Kernel UUID not found or inaccessible */
3485 				break;
3486 			}
3487 
3488 			uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
3489 			if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
3490 				uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
3491 #if  defined(__arm64__)
3492 				kc_format_t primary_kc_type = KCFormatUnknown;
3493 				if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
3494 					/* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
3495 					uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
3496 				}
3497 #endif
3498 			}
3499 
3500 			/*
3501 			 * The element count of the array can vary - avoid overflowing the
3502 			 * stack by opening a window.
3503 			 */
3504 			kcdata_compression_window_open(kcd);
3505 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
3506 			    sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
3507 			kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
3508 
3509 			image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
3510 #if defined(__arm64__)
3511 			if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3512 				/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3513 				extern vm_offset_t segTEXTEXECB;
3514 				image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
3515 			}
3516 #endif
3517 			uuid_info_array[0].imageLoadAddress = image_load_address;
3518 			kdp_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
3519 
3520 			if (save_kextloadinfo_p &&
3521 			    _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
3522 			    _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
3523 			    gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
3524 				uint32_t kexti;
3525 				for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
3526 					image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
3527 #if defined(__arm64__)
3528 					if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3529 						/* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3530 						image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
3531 					}
3532 #endif
3533 					uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
3534 					kdp_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
3535 				}
3536 			}
3537 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
3538 		} while (0);
3539 	}
3540 
3541 error_exit:
3542 	if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3543 		*task_snap_ss_flags |= kTaskUUIDInfoMissing;
3544 	}
3545 
3546 	if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3547 		*task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3548 	}
3549 
3550 	if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3551 		*task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3552 	}
3553 
3554 	return error;
3555 }
3556 
3557 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)3558 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
3559 {
3560 	kern_return_t error = KERN_SUCCESS;
3561 	mach_vm_address_t out_addr = 0;
3562 
3563 	/* I/O Statistics if any counters are non zero */
3564 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
3565 	if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
3566 		/* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
3567 		kcdata_compression_window_open(kcd);
3568 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
3569 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
3570 		_iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
3571 		_iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
3572 		_iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
3573 		_iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
3574 		_iostat->ss_paging_count = task->task_io_stats->paging.count;
3575 		_iostat->ss_paging_size = task->task_io_stats->paging.size;
3576 		_iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
3577 		_iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
3578 		_iostat->ss_metadata_count = task->task_io_stats->metadata.count;
3579 		_iostat->ss_metadata_size = task->task_io_stats->metadata.size;
3580 		_iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
3581 		_iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
3582 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
3583 			_iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
3584 			_iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
3585 		}
3586 		kcd_exit_on_error(kcdata_compression_window_close(kcd));
3587 	}
3588 
3589 
3590 error_exit:
3591 	return error;
3592 }
3593 
3594 #if CONFIG_PERVASIVE_CPI
3595 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)3596 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
3597 {
3598 	struct instrs_cycles_snapshot_v2 instrs_cycles = { 0 };
3599 	struct recount_usage usage = { 0 };
3600 	struct recount_usage perf_only = { 0 };
3601 	recount_task_terminated_usage_perf_only(task, &usage, &perf_only);
3602 	instrs_cycles.ics_instructions = recount_usage_instructions(&usage);
3603 	instrs_cycles.ics_cycles = recount_usage_cycles(&usage);
3604 	instrs_cycles.ics_p_instructions = recount_usage_instructions(&perf_only);
3605 	instrs_cycles.ics_p_cycles = recount_usage_cycles(&perf_only);
3606 
3607 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
3608 }
3609 #endif /* CONFIG_PERVASIVE_CPI */
3610 
3611 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)3612 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
3613 {
3614 	struct stackshot_cpu_architecture cpu_architecture = {0};
3615 	int32_t cputype;
3616 	int32_t cpusubtype;
3617 
3618 	proc_archinfo_kdp(get_bsdtask_info(task), &cputype, &cpusubtype);
3619 	cpu_architecture.cputype = cputype;
3620 	cpu_architecture.cpusubtype = cpusubtype;
3621 
3622 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
3623 }
3624 
3625 static kern_return_t
kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd,task_t task)3626 kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd, task_t task)
3627 {
3628 	struct stackshot_task_codesigning_info codesigning_info = {};
3629 	void * bsdtask_info = NULL;
3630 	uint32_t trust = 0;
3631 	kern_return_t ret = 0;
3632 	pmap_t pmap = get_task_pmap(task);
3633 	if (task != kernel_task) {
3634 		bsdtask_info = get_bsdtask_info(task);
3635 		codesigning_info.csflags = proc_getcsflags_kdp(bsdtask_info);
3636 		ret = get_trust_level_kdp(pmap, &trust);
3637 		if (ret != KERN_SUCCESS) {
3638 			trust = KCDATA_INVALID_CS_TRUST_LEVEL;
3639 		}
3640 		codesigning_info.cs_trust_level = trust;
3641 	} else {
3642 		return KERN_SUCCESS;
3643 	}
3644 	return kcdata_push_data(kcd, STACKSHOT_KCTYPE_CODESIGNING_INFO, sizeof(struct stackshot_task_codesigning_info), &codesigning_info);
3645 }
3646 
3647 static kern_return_t
kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd,task_t task)3648 kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd, task_t task)
3649 {
3650 	uint64_t jit_start_addr = 0;
3651 	uint64_t jit_end_addr = 0;
3652 	struct crashinfo_jit_address_range range = {};
3653 	kern_return_t ret = 0;
3654 	pmap_t pmap = get_task_pmap(task);
3655 	if (task == kernel_task || NULL == pmap) {
3656 		return KERN_SUCCESS;
3657 	}
3658 	ret = get_jit_address_range_kdp(pmap, (uintptr_t*)&jit_start_addr, (uintptr_t*)&jit_end_addr);
3659 	if (KERN_SUCCESS == ret) {
3660 		range.start_address = jit_start_addr;
3661 		range.end_address = jit_end_addr;
3662 		return kcdata_push_data(kcd, TASK_CRASHINFO_JIT_ADDRESS_RANGE, sizeof(struct crashinfo_jit_address_range), &range);
3663 	} else {
3664 		return KERN_SUCCESS;
3665 	}
3666 }
3667 
3668 #if CONFIG_TASK_SUSPEND_STATS
3669 static kern_return_t
kcdata_record_task_suspension_info(kcdata_descriptor_t kcd,task_t task)3670 kcdata_record_task_suspension_info(kcdata_descriptor_t kcd, task_t task)
3671 {
3672 	kern_return_t ret = KERN_SUCCESS;
3673 	struct stackshot_suspension_info suspension_info = {};
3674 	task_suspend_stats_data_t suspend_stats;
3675 	task_suspend_source_array_t suspend_sources;
3676 	struct stackshot_suspension_source suspension_sources[TASK_SUSPEND_SOURCES_MAX];
3677 	int i;
3678 
3679 	if (task == kernel_task) {
3680 		return KERN_SUCCESS;
3681 	}
3682 
3683 	ret = task_get_suspend_stats_kdp(task, &suspend_stats);
3684 	if (ret != KERN_SUCCESS) {
3685 		return ret;
3686 	}
3687 
3688 	suspension_info.tss_count = suspend_stats.tss_count;
3689 	suspension_info.tss_duration = suspend_stats.tss_duration;
3690 	suspension_info.tss_last_end = suspend_stats.tss_last_end;
3691 	suspension_info.tss_last_start = suspend_stats.tss_last_start;
3692 	ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_SUSPENSION_INFO, sizeof(suspension_info), &suspension_info);
3693 	if (ret != KERN_SUCCESS) {
3694 		return ret;
3695 	}
3696 
3697 	ret = task_get_suspend_sources_kdp(task, suspend_sources);
3698 	if (ret != KERN_SUCCESS) {
3699 		return ret;
3700 	}
3701 
3702 	for (i = 0; i < TASK_SUSPEND_SOURCES_MAX; ++i) {
3703 		suspension_sources[i].tss_pid = suspend_sources[i].tss_pid;
3704 		strlcpy(suspension_sources[i].tss_procname, suspend_sources[i].tss_procname, sizeof(suspend_sources[i].tss_procname));
3705 		suspension_sources[i].tss_tid = suspend_sources[i].tss_tid;
3706 		suspension_sources[i].tss_time = suspend_sources[i].tss_time;
3707 	}
3708 	return kcdata_push_array(kcd, STACKSHOT_KCTYPE_SUSPENSION_SOURCE, sizeof(suspension_sources[0]), TASK_SUSPEND_SOURCES_MAX, &suspension_sources);
3709 }
3710 #endif /* CONFIG_TASK_SUSPEND_STATS */
3711 
3712 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)3713 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
3714 {
3715 	kern_return_t error                 = KERN_SUCCESS;
3716 	mach_vm_address_t out_addr          = 0;
3717 	struct transitioning_task_snapshot * cur_tsnap = NULL;
3718 
3719 	int task_pid           = pid_from_task(task);
3720 	/* Is returning -1 ok for terminating task ok ??? */
3721 	uint64_t task_uniqueid = get_task_uniqueid(task);
3722 
3723 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3724 		/*
3725 		 * if this task is a transit task from another one, show the pid as
3726 		 * negative
3727 		 */
3728 		task_pid = 0 - task_pid;
3729 	}
3730 
3731 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
3732 	kcdata_compression_window_open(kcd);
3733 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
3734 	cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
3735 	bzero(cur_tsnap, sizeof(*cur_tsnap));
3736 
3737 	cur_tsnap->tts_unique_pid = task_uniqueid;
3738 	cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task);
3739 	cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
3740 	cur_tsnap->tts_transition_type = transition_type;
3741 	cur_tsnap->tts_pid = task_pid;
3742 
3743 	/* Add the BSD process identifiers */
3744 	if (task_pid != -1 && get_bsdtask_info(task) != NULL) {
3745 		proc_name_kdp(get_bsdtask_info(task), cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
3746 	} else {
3747 		cur_tsnap->tts_p_comm[0] = '\0';
3748 	}
3749 
3750 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
3751 
3752 error_exit:
3753 	return error;
3754 }
3755 
3756 static kern_return_t
3757 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)3758 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
3759 #else
3760 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
3761 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3762 {
3763 	bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3764 	bool collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
3765 #if CONFIG_PERVASIVE_CPI
3766 	bool collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
3767 #endif /* CONFIG_PERVASIVE_CPI */
3768 #if __arm64__
3769 	bool collect_asid            = ((trace_flags & STACKSHOT_ASID) != 0);
3770 #endif
3771 	bool collect_pagetables      = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
3772 
3773 
3774 	kern_return_t error                 = KERN_SUCCESS;
3775 	mach_vm_address_t out_addr          = 0;
3776 	struct task_snapshot_v2 * cur_tsnap = NULL;
3777 #if STACKSHOT_COLLECTS_LATENCY_INFO
3778 	latency_info->cur_tsnap_latency = mach_absolute_time();
3779 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3780 
3781 	int task_pid           = pid_from_task(task);
3782 	uint64_t task_uniqueid = get_task_uniqueid(task);
3783 	void *bsd_info = get_bsdtask_info(task);
3784 	uint64_t proc_starttime_secs = 0;
3785 
3786 	if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3787 		/*
3788 		 * if this task is a transit task from another one, show the pid as
3789 		 * negative
3790 		 */
3791 		task_pid = 0 - task_pid;
3792 	}
3793 
3794 	/* the task_snapshot_v2 struct is large - avoid overflowing the stack */
3795 	kcdata_compression_window_open(kcd);
3796 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
3797 	cur_tsnap = (struct task_snapshot_v2 *)out_addr;
3798 	bzero(cur_tsnap, sizeof(*cur_tsnap));
3799 
3800 	cur_tsnap->ts_unique_pid = task_uniqueid;
3801 	cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
3802 	cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
3803 
3804 	struct recount_usage term_usage = { 0 };
3805 	recount_task_terminated_usage(task, &term_usage);
3806 	struct recount_times_mach term_times = recount_usage_times_mach(&term_usage);
3807 	cur_tsnap->ts_user_time_in_terminated_threads = term_times.rtm_user;
3808 	cur_tsnap->ts_system_time_in_terminated_threads = term_times.rtm_system;
3809 
3810 	proc_starttime_kdp(bsd_info, &proc_starttime_secs, NULL, NULL);
3811 	cur_tsnap->ts_p_start_sec = proc_starttime_secs;
3812 	cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
3813 	cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
3814 	cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
3815 	cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
3816 
3817 	cur_tsnap->ts_suspend_count = task->suspend_count;
3818 	cur_tsnap->ts_faults = counter_load(&task->faults);
3819 	cur_tsnap->ts_pageins = counter_load(&task->pageins);
3820 	cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
3821 	cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
3822 	    LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
3823 	cur_tsnap->ts_pid = task_pid;
3824 
3825 	/* Add the BSD process identifiers */
3826 	if (task_pid != -1 && bsd_info != NULL) {
3827 		proc_name_kdp(bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
3828 	} else {
3829 		cur_tsnap->ts_p_comm[0] = '\0';
3830 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
3831 		if (task->task_imp_base != NULL) {
3832 			kdp_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
3833 			    MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
3834 		}
3835 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
3836 	}
3837 
3838 	kcd_exit_on_error(kcdata_compression_window_close(kcd));
3839 
3840 #if CONFIG_COALITIONS
3841 	if (task_pid != -1 && bsd_info != NULL &&
3842 	    (task->coalition[COALITION_TYPE_JETSAM] != NULL)) {
3843 		/*
3844 		 * The jetsam coalition ID is always saved, even if
3845 		 * STACKSHOT_SAVE_JETSAM_COALITIONS is not set.
3846 		 */
3847 		uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
3848 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
3849 	}
3850 #endif /* CONFIG_COALITIONS */
3851 
3852 #if __arm64__
3853 	if (collect_asid && have_pmap) {
3854 		uint32_t asid = PMAP_VASID(task->map->pmap);
3855 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
3856 	}
3857 #endif
3858 
3859 #if STACKSHOT_COLLECTS_LATENCY_INFO
3860 	latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
3861 	latency_info->pmap_latency = mach_absolute_time();
3862 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3863 
3864 	if (collect_pagetables && have_pmap) {
3865 #if SCHED_HYGIENE_DEBUG
3866 		// pagetable dumps can be large; reset the interrupt timeout to avoid a panic
3867 		ml_spin_debug_clear_self();
3868 #endif
3869 		assert(stackshot_ctx.sc_is_singlethreaded);
3870 		size_t bytes_dumped = 0;
3871 		error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stackshot_args.pagetable_mask, &bytes_dumped);
3872 		if (error != KERN_SUCCESS) {
3873 			goto error_exit;
3874 		} else {
3875 			/* Variable size array - better not have it on the stack. */
3876 			kcdata_compression_window_open(kcd);
3877 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
3878 			    sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
3879 			kcd_exit_on_error(kcdata_compression_window_close(kcd));
3880 		}
3881 	}
3882 
3883 #if STACKSHOT_COLLECTS_LATENCY_INFO
3884 	latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
3885 	latency_info->bsd_proc_ids_latency = mach_absolute_time();
3886 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3887 
3888 #if STACKSHOT_COLLECTS_LATENCY_INFO
3889 	latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
3890 	latency_info->end_latency = mach_absolute_time();
3891 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3892 
3893 	if (collect_iostats) {
3894 		kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
3895 	}
3896 
3897 #if CONFIG_PERVASIVE_CPI
3898 	if (collect_instrs_cycles) {
3899 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
3900 	}
3901 #endif /* CONFIG_PERVASIVE_CPI */
3902 
3903 	kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
3904 	kcd_exit_on_error(kcdata_record_task_codesigning_info(kcd, task));
3905 	kcd_exit_on_error(kcdata_record_task_jit_address_range(kcd, task));
3906 
3907 #if CONFIG_TASK_SUSPEND_STATS
3908 	kcd_exit_on_error(kcdata_record_task_suspension_info(kcd, task));
3909 #endif /* CONFIG_TASK_SUSPEND_STATS */
3910 
3911 #if STACKSHOT_COLLECTS_LATENCY_INFO
3912 	latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
3913 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3914 
3915 error_exit:
3916 	return error;
3917 }
3918 
3919 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)3920 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
3921 {
3922 #if !CONFIG_PERVASIVE_CPI
3923 #pragma unused(trace_flags)
3924 #endif /* !CONFIG_PERVASIVE_CPI */
3925 	kern_return_t error                       = KERN_SUCCESS;
3926 	struct task_delta_snapshot_v2 * cur_tsnap = NULL;
3927 	mach_vm_address_t out_addr                = 0;
3928 	(void) trace_flags;
3929 #if __arm64__
3930 	boolean_t collect_asid                    = ((trace_flags & STACKSHOT_ASID) != 0);
3931 #endif
3932 #if CONFIG_PERVASIVE_CPI
3933 	boolean_t collect_instrs_cycles           = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
3934 #endif /* CONFIG_PERVASIVE_CPI */
3935 
3936 	uint64_t task_uniqueid = get_task_uniqueid(task);
3937 
3938 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
3939 
3940 	cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
3941 
3942 	cur_tsnap->tds_unique_pid = task_uniqueid;
3943 	cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
3944 	cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
3945 
3946 	struct recount_usage usage = { 0 };
3947 	recount_task_terminated_usage(task, &usage);
3948 	struct recount_times_mach term_times = recount_usage_times_mach(&usage);
3949 
3950 	cur_tsnap->tds_user_time_in_terminated_threads = term_times.rtm_user;
3951 	cur_tsnap->tds_system_time_in_terminated_threads = term_times.rtm_system;
3952 
3953 	cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
3954 
3955 	cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
3956 	cur_tsnap->tds_suspend_count = task->suspend_count;
3957 	cur_tsnap->tds_faults            = counter_load(&task->faults);
3958 	cur_tsnap->tds_pageins           = counter_load(&task->pageins);
3959 	cur_tsnap->tds_cow_faults        = counter_load(&task->cow_faults);
3960 	cur_tsnap->tds_was_throttled     = (uint32_t)proc_was_throttled_from_task(task);
3961 	cur_tsnap->tds_did_throttle      = (uint32_t)proc_did_throttle_from_task(task);
3962 	cur_tsnap->tds_latency_qos       = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
3963 	    ? LATENCY_QOS_TIER_UNSPECIFIED
3964 	    : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
3965 
3966 #if __arm64__
3967 	if (collect_asid && have_pmap) {
3968 		uint32_t asid = PMAP_VASID(task->map->pmap);
3969 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
3970 		kdp_memcpy((void*)out_addr, &asid, sizeof(asid));
3971 	}
3972 #endif
3973 
3974 #if CONFIG_PERVASIVE_CPI
3975 	if (collect_instrs_cycles) {
3976 		kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
3977 	}
3978 #endif /* CONFIG_PERVASIVE_CPI */
3979 
3980 error_exit:
3981 	return error;
3982 }
3983 
3984 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)3985 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
3986 {
3987 	kern_return_t error = KERN_SUCCESS;
3988 	mach_vm_address_t out_addr = 0;
3989 
3990 	/* I/O Statistics */
3991 	assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
3992 	if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
3993 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
3994 		struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
3995 		_iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
3996 		_iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
3997 		_iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
3998 		_iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
3999 		_iostat->ss_paging_count = thread->thread_io_stats->paging.count;
4000 		_iostat->ss_paging_size = thread->thread_io_stats->paging.size;
4001 		_iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
4002 		_iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
4003 		_iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
4004 		_iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
4005 		_iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
4006 		_iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
4007 		for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
4008 			_iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
4009 			_iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
4010 		}
4011 	}
4012 
4013 error_exit:
4014 	return error;
4015 }
4016 
4017 bool
machine_trace_thread_validate_kva(vm_offset_t addr)4018 machine_trace_thread_validate_kva(vm_offset_t addr)
4019 {
4020 	return _stackshot_validate_kva(addr, sizeof(uintptr_t));
4021 }
4022 
4023 struct _stackshot_backtrace_context {
4024 	vm_map_t sbc_map;
4025 	vm_offset_t sbc_prev_page;
4026 	vm_offset_t sbc_prev_kva;
4027 	uint32_t sbc_flags;
4028 	bool sbc_allow_faulting;
4029 };
4030 
4031 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)4032 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
4033 {
4034 	struct _stackshot_backtrace_context *ctx = vctx;
4035 	size_t map_page_mask = 0;
4036 	size_t __assert_only map_page_size = kdp_vm_map_get_page_size(ctx->sbc_map,
4037 	    &map_page_mask);
4038 	assert(size < map_page_size);
4039 	if (src & (size - 1)) {
4040 		// The source should be aligned to the size passed in, like a stack
4041 		// frame or word.
4042 		return EINVAL;
4043 	}
4044 
4045 	vm_offset_t src_page = src & ~map_page_mask;
4046 	vm_offset_t src_kva = 0;
4047 
4048 	if (src_page != ctx->sbc_prev_page) {
4049 		uint32_t res = 0;
4050 		uint32_t flags = 0;
4051 		vm_offset_t src_pa = stackshot_find_phys(ctx->sbc_map, src,
4052 		    ctx->sbc_allow_faulting, &res);
4053 
4054 		flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
4055 		flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
4056 		flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
4057 		ctx->sbc_flags |= flags;
4058 		if (src_pa == 0) {
4059 			return EFAULT;
4060 		}
4061 
4062 		src_kva = phystokv(src_pa);
4063 		ctx->sbc_prev_page = src_page;
4064 		ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
4065 	} else {
4066 		src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
4067 	}
4068 
4069 #if KASAN
4070 	/*
4071 	 * KASan does not monitor accesses to userspace pages. Therefore, it is
4072 	 * pointless to maintain a shadow map for them. Instead, they are all
4073 	 * mapped to a single, always valid shadow map page. This approach saves
4074 	 * a considerable amount of shadow map pages which are limited and
4075 	 * precious.
4076 	 */
4077 	kasan_notify_address_nopoison(src_kva, size);
4078 #endif
4079 	memcpy(dst, (const void *)src_kva, size);
4080 
4081 	return 0;
4082 }
4083 
4084 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)4085 kcdata_record_thread_snapshot(kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
4086 {
4087 	boolean_t dispatch_p              = ((trace_flags & STACKSHOT_GET_DQ) != 0);
4088 	boolean_t active_kthreads_only_p  = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4089 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4090 	boolean_t collect_iostats         = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
4091 #if CONFIG_PERVASIVE_CPI
4092 	boolean_t collect_instrs_cycles   = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
4093 #endif /* CONFIG_PERVASIVE_CPI */
4094 	kern_return_t error        = KERN_SUCCESS;
4095 
4096 #if STACKSHOT_COLLECTS_LATENCY_INFO
4097 	struct stackshot_latency_thread latency_info;
4098 	latency_info.cur_thsnap1_latency = mach_absolute_time();
4099 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4100 
4101 	mach_vm_address_t out_addr = 0;
4102 	int saved_count            = 0;
4103 
4104 	struct thread_snapshot_v4 * cur_thread_snap = NULL;
4105 	char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
4106 
4107 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
4108 	cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
4109 
4110 	/* Populate the thread snapshot header */
4111 	cur_thread_snap->ths_ss_flags = 0;
4112 	cur_thread_snap->ths_thread_id = thread_tid(thread);
4113 	cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
4114 	cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
4115 	cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
4116 
4117 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4118 		cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4119 	} else {
4120 		cur_thread_snap->ths_voucher_identifier = 0;
4121 	}
4122 
4123 #if STACKSHOT_COLLECTS_LATENCY_INFO
4124 	latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
4125 	latency_info.dispatch_serial_latency = mach_absolute_time();
4126 	latency_info.dispatch_label_latency = 0;
4127 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4128 
4129 	cur_thread_snap->ths_dqserialnum = 0;
4130 	if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
4131 		uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
4132 		if (dqkeyaddr != 0) {
4133 			uint64_t dqaddr = 0;
4134 			boolean_t copyin_ok = stackshot_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
4135 			if (copyin_ok && dqaddr != 0) {
4136 				uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
4137 				uint64_t dqserialnum = 0;
4138 				copyin_ok = stackshot_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
4139 				if (copyin_ok) {
4140 					cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
4141 					cur_thread_snap->ths_dqserialnum = dqserialnum;
4142 				}
4143 
4144 #if STACKSHOT_COLLECTS_LATENCY_INFO
4145 				latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
4146 				latency_info.dispatch_label_latency = mach_absolute_time();
4147 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4148 
4149 				/* try copying in the queue label */
4150 				uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
4151 				if (label_offs) {
4152 					uint64_t dqlabeladdr = dqaddr + label_offs;
4153 					uint64_t actual_dqlabeladdr = 0;
4154 
4155 					copyin_ok = stackshot_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
4156 					if (copyin_ok && actual_dqlabeladdr != 0) {
4157 						char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
4158 						int len;
4159 
4160 						bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
4161 						len = stackshot_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
4162 						if (len > 0) {
4163 							mach_vm_address_t label_addr = 0;
4164 							kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
4165 							kdp_strlcpy((char*)label_addr, &label_buf[0], len);
4166 						}
4167 					}
4168 				}
4169 #if STACKSHOT_COLLECTS_LATENCY_INFO
4170 				latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
4171 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4172 			}
4173 		}
4174 	}
4175 
4176 #if STACKSHOT_COLLECTS_LATENCY_INFO
4177 	if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
4178 		latency_info.dispatch_serial_latency = 0;
4179 	}
4180 	latency_info.cur_thsnap2_latency = mach_absolute_time();
4181 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4182 
4183 	struct recount_times_mach times = recount_thread_times(thread);
4184 	cur_thread_snap->ths_user_time = times.rtm_user;
4185 	cur_thread_snap->ths_sys_time = times.rtm_system;
4186 
4187 	if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
4188 		cur_thread_snap->ths_ss_flags |= kThreadMain;
4189 	}
4190 	if (thread->effective_policy.thep_darwinbg) {
4191 		cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
4192 	}
4193 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4194 		cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
4195 	}
4196 	if (thread->suspend_count > 0) {
4197 		cur_thread_snap->ths_ss_flags |= kThreadSuspended;
4198 	}
4199 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4200 		cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
4201 	}
4202 #if CONFIG_EXCLAVES
4203 	/* save exclave thread for later collection */
4204 	if ((thread->th_exclaves_state & TH_EXCLAVES_RPC) && stackshot_exclave_inspect_ctids && !stackshot_ctx.sc_panic_stackshot) {
4205 		/* certain threads, like the collector, must never be inspected */
4206 		if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) == 0) {
4207 			uint32_t ctid_index = os_atomic_inc_orig(&stackshot_exclave_inspect_ctid_count, acq_rel);
4208 			if (ctid_index < stackshot_exclave_inspect_ctid_capacity) {
4209 				stackshot_exclave_inspect_ctids[ctid_index] = thread_get_ctid(thread);
4210 			} else {
4211 				os_atomic_store(&stackshot_exclave_inspect_ctid_count, stackshot_exclave_inspect_ctid_capacity, release);
4212 			}
4213 			if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_STACKSHOT) != 0) {
4214 				panic("stackshot: trying to inspect already-queued thread");
4215 			}
4216 		}
4217 	}
4218 #endif /* CONFIG_EXCLAVES */
4219 	if (thread_on_core) {
4220 		cur_thread_snap->ths_ss_flags |= kThreadOnCore;
4221 	}
4222 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4223 		cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
4224 	}
4225 
4226 	/* make sure state flags defined in kcdata.h still match internal flags */
4227 	static_assert(SS_TH_WAIT == TH_WAIT);
4228 	static_assert(SS_TH_SUSP == TH_SUSP);
4229 	static_assert(SS_TH_RUN == TH_RUN);
4230 	static_assert(SS_TH_UNINT == TH_UNINT);
4231 	static_assert(SS_TH_TERMINATE == TH_TERMINATE);
4232 	static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
4233 	static_assert(SS_TH_IDLE == TH_IDLE);
4234 
4235 	cur_thread_snap->ths_last_run_time           = thread->last_run_time;
4236 	cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
4237 	cur_thread_snap->ths_state                   = thread->state;
4238 	cur_thread_snap->ths_sched_flags             = thread->sched_flags;
4239 	cur_thread_snap->ths_base_priority = thread->base_pri;
4240 	cur_thread_snap->ths_sched_priority = thread->sched_pri;
4241 	cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
4242 	cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
4243 	cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
4244 	    thread->requested_policy.thrp_qos_workq_override);
4245 	cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4246 	cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
4247 
4248 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4249 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4250 	cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4251 	cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4252 
4253 #if STACKSHOT_COLLECTS_LATENCY_INFO
4254 	latency_info.cur_thsnap2_latency = mach_absolute_time()  - latency_info.cur_thsnap2_latency;
4255 	latency_info.thread_name_latency = mach_absolute_time();
4256 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4257 
4258 	/* if there is thread name then add to buffer */
4259 	cur_thread_name[0] = '\0';
4260 	proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
4261 	if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
4262 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
4263 		kdp_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
4264 	}
4265 
4266 #if STACKSHOT_COLLECTS_LATENCY_INFO
4267 	latency_info.thread_name_latency = mach_absolute_time()  - latency_info.thread_name_latency;
4268 	latency_info.sur_times_latency = mach_absolute_time();
4269 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4270 
4271 	/* record system, user, and runnable times */
4272 	time_value_t runnable_time;
4273 	thread_read_times(thread, NULL, NULL, &runnable_time);
4274 	clock_sec_t user_sec = 0, system_sec = 0;
4275 	clock_usec_t user_usec = 0, system_usec = 0;
4276 	absolutetime_to_microtime(times.rtm_user, &user_sec, &user_usec);
4277 	absolutetime_to_microtime(times.rtm_system, &system_sec, &system_usec);
4278 
4279 	kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
4280 	struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
4281 	*stackshot_cpu_times = (struct stackshot_cpu_times_v2){
4282 		.user_usec = user_sec * USEC_PER_SEC + user_usec,
4283 		.system_usec = system_sec * USEC_PER_SEC + system_usec,
4284 		.runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
4285 	};
4286 
4287 #if STACKSHOT_COLLECTS_LATENCY_INFO
4288 	latency_info.sur_times_latency = mach_absolute_time()  - latency_info.sur_times_latency;
4289 	latency_info.user_stack_latency = mach_absolute_time();
4290 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4291 
4292 	/* Trace user stack, if any */
4293 	if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
4294 		uint32_t user_ths_ss_flags = 0;
4295 
4296 		/*
4297 		 * We don't know how big the stacktrace will be, so read it into our
4298 		 * per-cpu buffer, then copy it to the kcdata.
4299 		 */
4300 		struct _stackshot_backtrace_context ctx = {
4301 			.sbc_map = task->map,
4302 			.sbc_allow_faulting = stackshot_ctx.sc_enable_faulting,
4303 			.sbc_prev_page = -1,
4304 			.sbc_prev_kva = -1,
4305 		};
4306 		struct backtrace_control ctl = {
4307 			.btc_user_thread = thread,
4308 			.btc_user_copy = _stackshot_backtrace_copy,
4309 			.btc_user_copy_context = &ctx,
4310 		};
4311 		struct backtrace_user_info info = BTUINFO_INIT;
4312 
4313 		saved_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4314 		    &info);
4315 		if (saved_count > 0) {
4316 #if __LP64__
4317 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
4318 #else // __LP64__
4319 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
4320 #endif // !__LP64__
4321 			/* Now, copy the stacktrace into kcdata. */
4322 			kcd_exit_on_error(kcdata_push_array(kcd, STACKLR_WORDS, sizeof(uintptr_t),
4323 			    saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4324 			if (info.btui_info & BTI_64_BIT) {
4325 				user_ths_ss_flags |= kUser64_p;
4326 			}
4327 			if ((info.btui_info & BTI_TRUNCATED) ||
4328 			    (ctx.sbc_flags & kThreadTruncatedBT)) {
4329 				user_ths_ss_flags |= kThreadTruncatedBT;
4330 				user_ths_ss_flags |= kThreadTruncUserBT;
4331 			}
4332 			user_ths_ss_flags |= ctx.sbc_flags;
4333 			ctx.sbc_flags = 0;
4334 #if __LP64__
4335 			/* We only support async stacks on 64-bit kernels */
4336 			if (info.btui_async_frame_addr != 0) {
4337 				uint32_t async_start_offset = info.btui_async_start_index;
4338 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
4339 				    sizeof(async_start_offset), &async_start_offset));
4340 				ctl.btc_frame_addr = info.btui_async_frame_addr;
4341 				ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
4342 				info = BTUINFO_INIT;
4343 				unsigned int async_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4344 				    &info);
4345 				if (async_count > 0) {
4346 					kcd_exit_on_error(kcdata_push_array(kcd, STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64,
4347 					    sizeof(uintptr_t), async_count, stackshot_cpu_ctx.scc_stack_buffer));
4348 					if ((info.btui_info & BTI_TRUNCATED) ||
4349 					    (ctx.sbc_flags & kThreadTruncatedBT)) {
4350 						user_ths_ss_flags |= kThreadTruncatedBT;
4351 						user_ths_ss_flags |= kThreadTruncUserAsyncBT;
4352 					}
4353 					user_ths_ss_flags |= ctx.sbc_flags;
4354 				}
4355 			}
4356 #endif /* _LP64 */
4357 		}
4358 		if (user_ths_ss_flags != 0) {
4359 			cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
4360 		}
4361 	}
4362 
4363 #if STACKSHOT_COLLECTS_LATENCY_INFO
4364 	latency_info.user_stack_latency = mach_absolute_time()  - latency_info.user_stack_latency;
4365 	latency_info.kernel_stack_latency = mach_absolute_time();
4366 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4367 
4368 	/* Call through to the machine specific trace routines
4369 	 * Frames are added past the snapshot header.
4370 	 */
4371 	if (thread->kernel_stack != 0) {
4372 		uint32_t kern_ths_ss_flags = 0;
4373 #if defined(__LP64__)
4374 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
4375 		extern int machine_trace_thread64(thread_t thread, char *tracepos,
4376 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
4377 		saved_count = machine_trace_thread64(
4378 #else
4379 		uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
4380 		extern int machine_trace_thread(thread_t thread, char *tracepos,
4381 		    char *tracebound, int nframes, uint32_t *thread_trace_flags);
4382 		saved_count = machine_trace_thread(
4383 #endif
4384 			thread, (char*) stackshot_cpu_ctx.scc_stack_buffer,
4385 			(char *) (stackshot_cpu_ctx.scc_stack_buffer + MAX_FRAMES), MAX_FRAMES,
4386 			&kern_ths_ss_flags);
4387 		if (saved_count > 0) {
4388 			int frame_size = sizeof(uintptr_t);
4389 #if defined(__LP64__)
4390 			cur_thread_snap->ths_ss_flags |= kKernel64_p;
4391 #endif
4392 #if CONFIG_EXCLAVES
4393 			if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
4394 				struct thread_exclaves_info info = { 0 };
4395 
4396 				info.tei_flags = kExclaveRPCActive;
4397 				if (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) {
4398 					info.tei_flags |= kExclaveSchedulerRequest;
4399 				}
4400 				if (thread->th_exclaves_state & TH_EXCLAVES_UPCALL) {
4401 					info.tei_flags |= kExclaveUpcallActive;
4402 				}
4403 				info.tei_scid = thread->th_exclaves_ipc_ctx.scid;
4404 				info.tei_thread_offset = exclaves_stack_offset(stackshot_cpu_ctx.scc_stack_buffer, saved_count / frame_size, false);
4405 
4406 				kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERN_EXCLAVES_THREADINFO, sizeof(struct thread_exclaves_info), &info));
4407 			}
4408 #endif /* CONFIG_EXCLAVES */
4409 			kcd_exit_on_error(kcdata_push_array(kcd, stack_kcdata_type,
4410 			    frame_size, saved_count / frame_size, stackshot_cpu_ctx.scc_stack_buffer));
4411 		}
4412 		if (kern_ths_ss_flags & kThreadTruncatedBT) {
4413 			kern_ths_ss_flags |= kThreadTruncKernBT;
4414 		}
4415 		if (kern_ths_ss_flags != 0) {
4416 			cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
4417 		}
4418 	}
4419 
4420 #if STACKSHOT_COLLECTS_LATENCY_INFO
4421 	latency_info.kernel_stack_latency = mach_absolute_time()  - latency_info.kernel_stack_latency;
4422 	latency_info.misc_latency = mach_absolute_time();
4423 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4424 
4425 #if CONFIG_THREAD_GROUPS
4426 	if (trace_flags & STACKSHOT_THREAD_GROUP) {
4427 		uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
4428 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
4429 		kdp_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
4430 	}
4431 #endif /* CONFIG_THREAD_GROUPS */
4432 
4433 	if (collect_iostats) {
4434 		kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
4435 	}
4436 
4437 #if CONFIG_PERVASIVE_CPI
4438 	if (collect_instrs_cycles) {
4439 		struct recount_usage usage = { 0 };
4440 		recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
4441 		    &usage);
4442 
4443 		kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
4444 		struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
4445 		    instrs_cycles->ics_instructions = recount_usage_instructions(&usage);
4446 		    instrs_cycles->ics_cycles = recount_usage_cycles(&usage);
4447 	}
4448 #endif /* CONFIG_PERVASIVE_CPI */
4449 
4450 #if STACKSHOT_COLLECTS_LATENCY_INFO
4451 	latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4452 	if (collect_latency_info) {
4453 		kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
4454 	}
4455 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4456 
4457 error_exit:
4458 	return error;
4459 }
4460 
4461 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)4462 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
4463 {
4464 	cur_thread_snap->tds_thread_id = thread_tid(thread);
4465 	if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4466 		cur_thread_snap->tds_voucher_identifier  = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4467 	} else {
4468 		cur_thread_snap->tds_voucher_identifier = 0;
4469 	}
4470 
4471 	cur_thread_snap->tds_ss_flags = 0;
4472 	if (thread->effective_policy.thep_darwinbg) {
4473 		cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
4474 	}
4475 	if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4476 		cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
4477 	}
4478 	if (thread->suspend_count > 0) {
4479 		cur_thread_snap->tds_ss_flags |= kThreadSuspended;
4480 	}
4481 	if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4482 		cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
4483 	}
4484 	if (thread_on_core) {
4485 		cur_thread_snap->tds_ss_flags |= kThreadOnCore;
4486 	}
4487 	if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4488 		cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
4489 	}
4490 
4491 	cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
4492 	cur_thread_snap->tds_state                   = thread->state;
4493 	cur_thread_snap->tds_sched_flags             = thread->sched_flags;
4494 	cur_thread_snap->tds_base_priority           = thread->base_pri;
4495 	cur_thread_snap->tds_sched_priority          = thread->sched_pri;
4496 	cur_thread_snap->tds_eqos                    = thread->effective_policy.thep_qos;
4497 	cur_thread_snap->tds_rqos                    = thread->requested_policy.thrp_qos;
4498 	cur_thread_snap->tds_rqos_override           = MAX(thread->requested_policy.thrp_qos_override,
4499 	    thread->requested_policy.thrp_qos_workq_override);
4500 	cur_thread_snap->tds_io_tier                 = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4501 
4502 	static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4503 	static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4504 	cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4505 	cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4506 
4507 	return 0;
4508 }
4509 
4510 /*
4511  * Why 12?  12 strikes a decent balance between allocating a large array on
4512  * the stack and having large kcdata item overheads for recording nonrunable
4513  * tasks.
4514  */
4515 #define UNIQUEIDSPERFLUSH 12
4516 
4517 struct saved_uniqueids {
4518 	uint64_t ids[UNIQUEIDSPERFLUSH];
4519 	unsigned count;
4520 };
4521 
4522 enum thread_classification {
4523 	tc_full_snapshot,  /* take a full snapshot */
4524 	tc_delta_snapshot, /* take a delta snapshot */
4525 };
4526 
4527 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)4528 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
4529 {
4530 	processor_t last_processor = thread->last_processor;
4531 
4532 	boolean_t thread_on_core = FALSE;
4533 	if (last_processor != PROCESSOR_NULL) {
4534 		/* Idle threads are always treated as on-core, since the processor state can change while they are running. */
4535 		thread_on_core = (thread == last_processor->idle_thread) ||
4536 		    (last_processor->state == PROCESSOR_RUNNING &&
4537 		    last_processor->active_thread == thread);
4538 	}
4539 
4540 	*thread_on_core_p = thread_on_core;
4541 
4542 	/* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
4543 	 * previous full stackshot */
4544 	if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stackshot_args.since_timestamp)) {
4545 		return tc_full_snapshot;
4546 	} else {
4547 		return tc_delta_snapshot;
4548 	}
4549 }
4550 
4551 
4552 static kern_return_t
kdp_stackshot_record_task(task_t task)4553 kdp_stackshot_record_task(task_t task)
4554 {
4555 	boolean_t active_kthreads_only_p  = ((stackshot_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4556 	boolean_t save_donating_pids_p    = ((stackshot_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
4557 	boolean_t collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4558 	boolean_t save_owner_info         = ((stackshot_flags & STACKSHOT_THREAD_WAITINFO) != 0);
4559 	boolean_t include_drivers         = ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) != 0);
4560 
4561 	kern_return_t error = KERN_SUCCESS;
4562 	mach_vm_address_t out_addr = 0;
4563 	int saved_count = 0;
4564 
4565 	int task_pid                   = 0;
4566 	uint64_t task_uniqueid         = 0;
4567 	int num_delta_thread_snapshots = 0;
4568 	int num_waitinfo_threads       = 0;
4569 	int num_turnstileinfo_threads  = 0;
4570 
4571 	uint64_t task_start_abstime    = 0;
4572 	boolean_t have_map = FALSE, have_pmap = FALSE;
4573 	boolean_t some_thread_ran = FALSE;
4574 	unaligned_u64 task_snap_ss_flags = 0;
4575 #if STACKSHOT_COLLECTS_LATENCY_INFO
4576 	struct stackshot_latency_task latency_info;
4577 	latency_info.setup_latency = mach_absolute_time();
4578 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4579 
4580 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4581 	uint64_t task_begin_cpu_cycle_count = 0;
4582 	if (!stackshot_ctx.sc_panic_stackshot) {
4583 		task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
4584 	}
4585 #endif
4586 
4587 	if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
4588 		error = KERN_FAILURE;
4589 		goto error_exit;
4590 	}
4591 
4592 	void *bsd_info = get_bsdtask_info(task);
4593 	boolean_t task_in_teardown        = (bsd_info == NULL) || proc_in_teardown(bsd_info);// has P_LPEXIT set during proc_exit()
4594 	boolean_t task_in_transition      = task_in_teardown;         // here we can add other types of transition.
4595 	uint32_t  container_type          = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
4596 	uint32_t  transition_type         = (task_in_teardown) ? kTaskIsTerminated : 0;
4597 
4598 	if (task_in_transition) {
4599 		collect_delta_stackshot = FALSE;
4600 	}
4601 
4602 	have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
4603 	have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
4604 
4605 	task_pid = pid_from_task(task);
4606 	/* Is returning -1 ok for terminating task ok ??? */
4607 	task_uniqueid = get_task_uniqueid(task);
4608 
4609 	if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
4610 		/*
4611 		 * Not interested in terminated tasks without threads.
4612 		 */
4613 		if (queue_empty(&task->threads) || task_pid == -1) {
4614 			return KERN_SUCCESS;
4615 		}
4616 	}
4617 
4618 	/* All PIDs should have the MSB unset */
4619 	assert((task_pid & (1ULL << 31)) == 0);
4620 
4621 #if STACKSHOT_COLLECTS_LATENCY_INFO
4622 	latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
4623 	latency_info.task_uniqueid = task_uniqueid;
4624 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4625 
4626 	/* Trace everything, unless a process was specified. Add in driver tasks if requested. */
4627 	if ((stackshot_args.pid == -1) || (stackshot_args.pid == task_pid) || (include_drivers && task_is_driver(task))) {
4628 #if STACKSHOT_COLLECTS_LATENCY_INFO
4629 		stackshot_cpu_latency.tasks_processed++;
4630 #endif
4631 
4632 		/* add task snapshot marker */
4633 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4634 		    container_type, task_uniqueid));
4635 
4636 		if (collect_delta_stackshot) {
4637 			/*
4638 			 * For delta stackshots we need to know if a thread from this task has run since the
4639 			 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
4640 			 */
4641 			thread_t thread = THREAD_NULL;
4642 			queue_iterate(&task->threads, thread, thread_t, task_threads)
4643 			{
4644 				if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4645 					error = KERN_FAILURE;
4646 					goto error_exit;
4647 				}
4648 
4649 				if (active_kthreads_only_p && thread->kernel_stack == 0) {
4650 					continue;
4651 				}
4652 
4653 				boolean_t thread_on_core;
4654 				enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4655 
4656 				switch (thread_classification) {
4657 				case tc_full_snapshot:
4658 					some_thread_ran = TRUE;
4659 					break;
4660 				case tc_delta_snapshot:
4661 					num_delta_thread_snapshots++;
4662 					break;
4663 				}
4664 			}
4665 		}
4666 
4667 		if (collect_delta_stackshot) {
4668 			proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &task_start_abstime);
4669 		}
4670 
4671 		/* Next record any relevant UUID info and store the task snapshot */
4672 		if (task_in_transition ||
4673 		    !collect_delta_stackshot ||
4674 		    (task_start_abstime == 0) ||
4675 		    (task_start_abstime > stackshot_args.since_timestamp) ||
4676 		    some_thread_ran) {
4677 			/*
4678 			 * Collect full task information in these scenarios:
4679 			 *
4680 			 * 1) a full stackshot or the task is in transition
4681 			 * 2) a delta stackshot where the task started after the previous full stackshot
4682 			 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
4683 			 *
4684 			 * because the task may have exec'ed, changing its name, architecture, load info, etc
4685 			 */
4686 
4687 			kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
4688 			kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, stackshot_flags, have_pmap, &task_snap_ss_flags));
4689 #if STACKSHOT_COLLECTS_LATENCY_INFO
4690 			if (!task_in_transition) {
4691 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags, &latency_info));
4692 			} else {
4693 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4694 			}
4695 #else
4696 			if (!task_in_transition) {
4697 				kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4698 			} else {
4699 				kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4700 			}
4701 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4702 		} else {
4703 			kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4704 		}
4705 
4706 #if STACKSHOT_COLLECTS_LATENCY_INFO
4707 		latency_info.misc_latency = mach_absolute_time();
4708 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4709 
4710 		struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
4711 		int current_delta_snapshot_index                  = 0;
4712 		if (num_delta_thread_snapshots > 0) {
4713 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
4714 			    sizeof(struct thread_delta_snapshot_v3),
4715 			    num_delta_thread_snapshots, &out_addr));
4716 			delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
4717 		}
4718 
4719 
4720 #if STACKSHOT_COLLECTS_LATENCY_INFO
4721 		latency_info.task_thread_count_loop_latency = mach_absolute_time();
4722 #endif
4723 		/*
4724 		 * Iterate over the task threads to save thread snapshots and determine
4725 		 * how much space we need for waitinfo and turnstile info
4726 		 */
4727 		thread_t thread = THREAD_NULL;
4728 		queue_iterate(&task->threads, thread, thread_t, task_threads)
4729 		{
4730 			if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4731 				error = KERN_FAILURE;
4732 				goto error_exit;
4733 			}
4734 
4735 			uint64_t thread_uniqueid;
4736 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
4737 				continue;
4738 			}
4739 			thread_uniqueid = thread_tid(thread);
4740 
4741 			boolean_t thread_on_core;
4742 			enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4743 
4744 #if STACKSHOT_COLLECTS_LATENCY_INFO
4745 			stackshot_cpu_latency.threads_processed++;
4746 #endif
4747 
4748 			switch (thread_classification) {
4749 			case tc_full_snapshot:
4750 				/* add thread marker */
4751 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4752 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
4753 
4754 				/* thread snapshot can be large, including strings, avoid overflowing the stack. */
4755 				kcdata_compression_window_open(stackshot_kcdata_p);
4756 
4757 				kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, stackshot_flags, have_pmap, thread_on_core));
4758 
4759 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4760 
4761 				/* mark end of thread snapshot data */
4762 				kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
4763 				    STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
4764 				break;
4765 			case tc_delta_snapshot:
4766 				kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
4767 				break;
4768 			}
4769 
4770 			/*
4771 			 * We want to report owner information regardless of whether a thread
4772 			 * has changed since the last delta, whether it's a normal stackshot,
4773 			 * or whether it's nonrunnable
4774 			 */
4775 			if (save_owner_info) {
4776 				if (stackshot_thread_has_valid_waitinfo(thread)) {
4777 					num_waitinfo_threads++;
4778 				}
4779 
4780 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
4781 					num_turnstileinfo_threads++;
4782 				}
4783 			}
4784 		}
4785 #if STACKSHOT_COLLECTS_LATENCY_INFO
4786 		latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
4787 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4788 
4789 		thread_waitinfo_v2_t *thread_waitinfo           = NULL;
4790 		thread_turnstileinfo_v2_t *thread_turnstileinfo = NULL;
4791 		int current_waitinfo_index              = 0;
4792 		int current_turnstileinfo_index         = 0;
4793 		/* allocate space for the wait and turnstil info */
4794 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
4795 			/* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
4796 			kcdata_compression_window_open(stackshot_kcdata_p);
4797 
4798 			if (num_waitinfo_threads > 0) {
4799 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
4800 				    sizeof(thread_waitinfo_v2_t), num_waitinfo_threads, &out_addr));
4801 				thread_waitinfo = (thread_waitinfo_v2_t *)out_addr;
4802 			}
4803 
4804 			if (num_turnstileinfo_threads > 0) {
4805 				/* get space for the turnstile info */
4806 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
4807 				    sizeof(thread_turnstileinfo_v2_t), num_turnstileinfo_threads, &out_addr));
4808 				thread_turnstileinfo = (thread_turnstileinfo_v2_t *)out_addr;
4809 			}
4810 
4811 			stackshot_plh_resetgen();  // so we know which portlabel_ids are referenced
4812 		}
4813 
4814 #if STACKSHOT_COLLECTS_LATENCY_INFO
4815 		latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4816 		latency_info.task_thread_data_loop_latency = mach_absolute_time();
4817 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4818 
4819 		/* Iterate over the task's threads to save the wait and turnstile info */
4820 		queue_iterate(&task->threads, thread, thread_t, task_threads)
4821 		{
4822 			uint64_t thread_uniqueid;
4823 			#pragma unused(thread_uniqueid)
4824 
4825 			if (active_kthreads_only_p && thread->kernel_stack == 0) {
4826 				continue;
4827 			}
4828 
4829 			thread_uniqueid = thread_tid(thread);
4830 
4831 			/* If we want owner info, we should capture it regardless of its classification */
4832 			if (save_owner_info) {
4833 				if (stackshot_thread_has_valid_waitinfo(thread)) {
4834 					stackshot_thread_wait_owner_info(
4835 						thread,
4836 						&thread_waitinfo[current_waitinfo_index++]);
4837 				}
4838 
4839 				if (stackshot_thread_has_valid_turnstileinfo(thread)) {
4840 					stackshot_thread_turnstileinfo(
4841 						thread,
4842 						&thread_turnstileinfo[current_turnstileinfo_index++]);
4843 				}
4844 			}
4845 		}
4846 
4847 #if STACKSHOT_COLLECTS_LATENCY_INFO
4848 		latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
4849 		latency_info.misc2_latency = mach_absolute_time();
4850 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4851 
4852 #if DEBUG || DEVELOPMENT
4853 		if (current_delta_snapshot_index != num_delta_thread_snapshots) {
4854 			panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
4855 			    num_delta_thread_snapshots, current_delta_snapshot_index);
4856 		}
4857 		if (current_waitinfo_index != num_waitinfo_threads) {
4858 			panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
4859 			    num_waitinfo_threads, current_waitinfo_index);
4860 		}
4861 #endif
4862 
4863 		if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
4864 			kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4865 			// now, record the portlabel hashes.
4866 			kcd_exit_on_error(kdp_stackshot_plh_record());
4867 		}
4868 
4869 #if IMPORTANCE_INHERITANCE
4870 		if (save_donating_pids_p) {
4871 			/* Ensure the buffer is big enough, since we're using the stack buffer for this. */
4872 			static_assert(TASK_IMP_WALK_LIMIT * sizeof(int32_t) <= MAX_FRAMES * sizeof(uintptr_t));
4873 			saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
4874 			    (char*) stackshot_cpu_ctx.scc_stack_buffer, TASK_IMP_WALK_LIMIT);
4875 			if (saved_count > 0) {
4876 				/* Variable size array - better not have it on the stack. */
4877 				kcdata_compression_window_open(stackshot_kcdata_p);
4878 				kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
4879 				    sizeof(int32_t), saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4880 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4881 			}
4882 		}
4883 #endif
4884 
4885 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4886 		if (!stackshot_ctx.sc_panic_stackshot) {
4887 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
4888 			    "task_cpu_cycle_count"));
4889 		}
4890 #endif
4891 
4892 #if STACKSHOT_COLLECTS_LATENCY_INFO
4893 		latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
4894 		if (collect_latency_info) {
4895 			kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
4896 		}
4897 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4898 
4899 		/* mark end of task snapshot data */
4900 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
4901 		    task_uniqueid));
4902 	}
4903 
4904 
4905 error_exit:
4906 	return error;
4907 }
4908 
4909 /* Record global shared regions */
4910 static kern_return_t
kdp_stackshot_shared_regions(uint64_t trace_flags)4911 kdp_stackshot_shared_regions(uint64_t trace_flags)
4912 {
4913 	kern_return_t error        = KERN_SUCCESS;
4914 
4915 	boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4916 	extern queue_head_t vm_shared_region_queue;
4917 	vm_shared_region_t sr;
4918 
4919 	extern queue_head_t vm_shared_region_queue;
4920 	queue_iterate(&vm_shared_region_queue,
4921 	    sr,
4922 	    vm_shared_region_t,
4923 	    sr_q) {
4924 		struct dyld_shared_cache_loadinfo_v2 scinfo = {0};
4925 		if (!_stackshot_validate_kva((vm_offset_t)sr, sizeof(*sr))) {
4926 			break;
4927 		}
4928 		if (collect_delta_stackshot && sr->sr_install_time < stackshot_args.since_timestamp) {
4929 			continue; // only include new shared caches in delta stackshots
4930 		}
4931 		uint32_t sharedCacheFlags = ((sr == primary_system_shared_region) ? kSharedCacheSystemPrimary : 0) |
4932 		    (sr->sr_driverkit ? kSharedCacheDriverkit : 0);
4933 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4934 		    STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
4935 		kdp_memcpy(scinfo.sharedCacheUUID, sr->sr_uuid, sizeof(sr->sr_uuid));
4936 		scinfo.sharedCacheSlide = sr->sr_slide;
4937 		scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_base_address + sr->sr_first_mapping;
4938 		scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
4939 		scinfo.sharedCacheID = sr->sr_id;
4940 		scinfo.sharedCacheFlags = sharedCacheFlags;
4941 
4942 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_INFO,
4943 		    sizeof(scinfo), &scinfo));
4944 
4945 		if ((trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) && sr->sr_images != NULL &&
4946 		    _stackshot_validate_kva((vm_offset_t)sr->sr_images, sr->sr_images_count * sizeof(struct dyld_uuid_info_64))) {
4947 			assert(sr->sr_images_count != 0);
4948 			kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
4949 		}
4950 		kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
4951 		    STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
4952 	}
4953 
4954 	/*
4955 	 * For backwards compatibility; this will eventually be removed.
4956 	 * Another copy of the Primary System Shared Region, for older readers.
4957 	 */
4958 	sr = primary_system_shared_region;
4959 	/* record system level shared cache load info (if available) */
4960 	if (!collect_delta_stackshot && sr &&
4961 	    _stackshot_validate_kva((vm_offset_t)sr, sizeof(struct vm_shared_region))) {
4962 		struct dyld_shared_cache_loadinfo scinfo = {0};
4963 
4964 		/*
4965 		 * Historically, this data was in a dyld_uuid_info_64 structure, but the
4966 		 * naming of both the structure and fields for this use isn't great.  The
4967 		 * dyld_shared_cache_loadinfo structure has better names, but the same
4968 		 * layout and content as the original.
4969 		 *
4970 		 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
4971 		 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
4972 		 * entries; here, it's the slid base address, and we leave it that way
4973 		 * for backwards compatibility.
4974 		 */
4975 		kdp_memcpy(scinfo.sharedCacheUUID, &sr->sr_uuid, sizeof(sr->sr_uuid));
4976 		scinfo.sharedCacheSlide = sr->sr_slide;
4977 		scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_slide + sr->sr_base_address;
4978 		scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
4979 
4980 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
4981 		    sizeof(scinfo), &scinfo));
4982 
4983 		if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
4984 			/*
4985 			 * Include a map of the system shared cache layout if it has been populated
4986 			 * (which is only when the system is using a custom shared cache).
4987 			 */
4988 			if (sr->sr_images && _stackshot_validate_kva((vm_offset_t)sr->sr_images,
4989 			    (sr->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
4990 				assert(sr->sr_images_count != 0);
4991 				kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
4992 			}
4993 		}
4994 	}
4995 
4996 error_exit:
4997 	return error;
4998 }
4999 
5000 static kern_return_t
kdp_stackshot_kcdata_format(void)5001 kdp_stackshot_kcdata_format(void)
5002 {
5003 	kern_return_t error        = KERN_SUCCESS;
5004 	mach_vm_address_t out_addr = 0;
5005 	uint64_t abs_time = 0;
5006 	uint64_t system_state_flags = 0;
5007 	task_t task = TASK_NULL;
5008 	mach_timebase_info_data_t timebase = {0, 0};
5009 	uint32_t length_to_copy = 0, tmp32 = 0;
5010 	abs_time = mach_absolute_time();
5011 	uint64_t last_task_start_time = 0;
5012 	int cur_workitem_index = 0;
5013 	uint64_t tasks_in_stackshot = 0;
5014 	uint64_t threads_in_stackshot = 0;
5015 
5016 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5017 	uint64_t stackshot_begin_cpu_cycle_count = 0;
5018 
5019 	if (!stackshot_ctx.sc_panic_stackshot) {
5020 		stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5021 	}
5022 #endif
5023 
5024 	/* the CPU entering here is participating in the stackshot */
5025 	stackshot_cpu_ctx.scc_did_work = true;
5026 
5027 #if STACKSHOT_COLLECTS_LATENCY_INFO
5028 	collect_latency_info = stackshot_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
5029 #endif
5030 	/* process the flags */
5031 	bool collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
5032 	bool collect_exclaves        = !disable_exclave_stackshot && ((stackshot_flags & STACKSHOT_SKIP_EXCLAVES) == 0);
5033 	stackshot_ctx.sc_enable_faulting = (stackshot_flags & (STACKSHOT_ENABLE_BT_FAULTING));
5034 
5035 	/* Currently we only support returning explicit KEXT load info on fileset kernels */
5036 	kc_format_t primary_kc_type = KCFormatUnknown;
5037 	if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
5038 		stackshot_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
5039 	}
5040 
5041 	if (sizeof(void *) == 8) {
5042 		system_state_flags |= kKernel64_p;
5043 	}
5044 
5045 #if CONFIG_EXCLAVES
5046 	if (!stackshot_ctx.sc_panic_stackshot && collect_exclaves) {
5047 		kcd_exit_on_error(stackshot_setup_exclave_waitlist()); /* Allocate list of exclave threads */
5048 	}
5049 #else
5050 #pragma unused(collect_exclaves)
5051 #endif /* CONFIG_EXCLAVES */
5052 
5053 	/* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
5054 	clock_timebase_info(&timebase);
5055 
5056 	/* begin saving data into the buffer */
5057 	if (stackshot_ctx.sc_bytes_uncompressed) {
5058 		stackshot_ctx.sc_bytes_uncompressed = 0;
5059 	}
5060 
5061 	/*
5062 	 * Setup pre-task linked kcdata buffer.
5063 	 * The idea here is that we want the kcdata to be in (roughly) the same order as it was
5064 	 * before we made this multithreaded, so we have separate buffers for pre and post task-iteration,
5065 	 * since that's the parallelized part.
5066 	 */
5067 	if (!stackshot_ctx.sc_is_singlethreaded) {
5068 		kcd_exit_on_error(stackshot_new_linked_kcdata());
5069 		stackshot_ctx.sc_pretask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5070 	}
5071 
5072 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, stackshot_flags, "stackshot_in_flags"));
5073 	kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)stackshot_flags, "stackshot_in_pid"));
5074 	kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
5075 	if (stackshot_flags & STACKSHOT_PAGE_TABLES) {
5076 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_args.pagetable_mask, "stackshot_pagetable_mask"));
5077 	}
5078 	if (stackshot_initial_estimate != 0) {
5079 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
5080 		kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
5081 	}
5082 
5083 #if STACKSHOT_COLLECTS_LATENCY_INFO
5084 	stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time();
5085 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5086 
5087 #if CONFIG_JETSAM
5088 	tmp32 = memorystatus_get_pressure_status_kdp();
5089 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
5090 #endif
5091 
5092 	if (!collect_delta_stackshot) {
5093 		tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
5094 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
5095 
5096 		tmp32 = PAGE_SIZE;
5097 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
5098 
5099 		/* save boot-args and osversion string */
5100 		length_to_copy =  MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
5101 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
5102 		length_to_copy = MIN((uint32_t)(strlen(osversion) + 1), OSVERSIZE);
5103 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OS_BUILD_VERSION, length_to_copy, (void *)osversion));
5104 
5105 
5106 		length_to_copy =  MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
5107 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
5108 
5109 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
5110 	} else {
5111 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stackshot_args.since_timestamp));
5112 	}
5113 
5114 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
5115 
5116 	kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_ctx.sc_microsecs));
5117 
5118 	kcd_exit_on_error(kdp_stackshot_shared_regions(stackshot_flags));
5119 
5120 	/* Add requested information first */
5121 	if (stackshot_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
5122 		struct mem_and_io_snapshot mais = {0};
5123 		kdp_mem_and_io_snapshot(&mais);
5124 		kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
5125 	}
5126 
5127 #if CONFIG_THREAD_GROUPS
5128 	struct thread_group_snapshot_v3 *thread_groups = NULL;
5129 	int num_thread_groups = 0;
5130 
5131 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5132 	uint64_t thread_group_begin_cpu_cycle_count = 0;
5133 
5134 	if (!stackshot_ctx.sc_is_singlethreaded && (stackshot_flags & STACKSHOT_THREAD_GROUP)) {
5135 		thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5136 	}
5137 #endif
5138 
5139 	/* Iterate over thread group names */
5140 	if (stackshot_flags & STACKSHOT_THREAD_GROUP) {
5141 		/* Variable size array - better not have it on the stack. */
5142 		kcdata_compression_window_open(stackshot_kcdata_p);
5143 
5144 		if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
5145 			stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5146 		}
5147 
5148 		if (num_thread_groups > 0) {
5149 			kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v3), num_thread_groups, &out_addr));
5150 			thread_groups = (struct thread_group_snapshot_v3 *)out_addr;
5151 		}
5152 
5153 		if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
5154 			error = KERN_FAILURE;
5155 			goto error_exit;
5156 		}
5157 
5158 		kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5159 	}
5160 
5161 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5162 	if (!stackshot_ctx.sc_panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
5163 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
5164 		    "thread_groups_cpu_cycle_count"));
5165 	}
5166 #endif
5167 #else
5168 	stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5169 #endif /* CONFIG_THREAD_GROUPS */
5170 
5171 
5172 #if STACKSHOT_COLLECTS_LATENCY_INFO
5173 	stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.setup_latency_mt;
5174 	if (stackshot_ctx.sc_is_singlethreaded) {
5175 		stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time();
5176 	} else {
5177 		stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time();
5178 	}
5179 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5180 
5181 	bool const process_scoped = (stackshot_args.pid != -1) &&
5182 	    ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
5183 
5184 	/* Iterate over tasks */
5185 	queue_iterate(&tasks, task, task_t, tasks)
5186 	{
5187 		stackshot_panic_guard();
5188 
5189 		if (collect_delta_stackshot) {
5190 			uint64_t abstime;
5191 			proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &abstime);
5192 
5193 			if (abstime > last_task_start_time) {
5194 				last_task_start_time = abstime;
5195 			}
5196 		}
5197 
5198 		pid_t task_pid = pid_from_task(task);
5199 
5200 		if (process_scoped && (task_pid != stackshot_args.pid)) {
5201 			continue;
5202 		}
5203 
5204 		if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5205 		    (!queue_empty(&task->threads) && task_pid != -1)) {
5206 			tasks_in_stackshot++;
5207 			threads_in_stackshot += task->thread_count;
5208 		}
5209 
5210 		/* If this is a singlethreaded stackshot, don't use the work queues. */
5211 		if (stackshot_ctx.sc_is_singlethreaded) {
5212 			kcd_exit_on_error(kdp_stackshot_record_task(task));
5213 		} else {
5214 			kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5215 				.sswi_task = task,
5216 				.sswi_data = NULL,
5217 				.sswi_idx = cur_workitem_index++
5218 			}));
5219 		}
5220 
5221 		if (process_scoped) {
5222 			/* Only targeting one process, we're done now. */
5223 			break;
5224 		}
5225 	}
5226 
5227 #if STACKSHOT_COLLECTS_LATENCY_INFO
5228 	if (stackshot_ctx.sc_is_singlethreaded) {
5229 		stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_task_iteration_latency_mt;
5230 	} else {
5231 		stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.task_queue_building_latency_mt;
5232 	}
5233 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5234 
5235 	/* Setup post-task kcdata buffer */
5236 	if (!stackshot_ctx.sc_is_singlethreaded) {
5237 		stackshot_finalize_linked_kcdata();
5238 		kcd_exit_on_error(stackshot_new_linked_kcdata());
5239 		stackshot_ctx.sc_posttask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5240 	}
5241 
5242 #if CONFIG_COALITIONS
5243 	/* Don't collect jetsam coalition snapshots in delta stackshots - these don't change */
5244 	if (!collect_delta_stackshot || (last_task_start_time > stackshot_args.since_timestamp)) {
5245 		int num_coalitions = 0;
5246 		struct jetsam_coalition_snapshot *coalitions = NULL;
5247 
5248 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5249 		uint64_t coalition_begin_cpu_cycle_count = 0;
5250 
5251 		if (!stackshot_ctx.sc_panic_stackshot && (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
5252 			coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5253 		}
5254 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5255 
5256 		/* Iterate over coalitions */
5257 		if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5258 			if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5259 				stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5260 			}
5261 		}
5262 		if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5263 			if (num_coalitions > 0) {
5264 				/* Variable size array - better not have it on the stack. */
5265 				kcdata_compression_window_open(stackshot_kcdata_p);
5266 				kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
5267 				coalitions = (struct jetsam_coalition_snapshot*)out_addr;
5268 
5269 				if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5270 					error = KERN_FAILURE;
5271 					goto error_exit;
5272 				}
5273 
5274 				kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5275 			}
5276 		}
5277 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5278 		if (!stackshot_ctx.sc_panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
5279 			kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
5280 			    "coalitions_cpu_cycle_count"));
5281 		}
5282 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5283 	}
5284 #else
5285 	stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5286 #endif /* CONFIG_COALITIONS */
5287 
5288 	stackshot_panic_guard();
5289 
5290 #if STACKSHOT_COLLECTS_LATENCY_INFO
5291 	if (stackshot_ctx.sc_is_singlethreaded) {
5292 		stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time();
5293 	} else {
5294 		stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time();
5295 	}
5296 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5297 
5298 	/*
5299 	 * Iterate over the tasks in the terminated tasks list. We only inspect
5300 	 * tasks that have a valid bsd_info pointer. The check for task transition
5301 	 * like past P_LPEXIT during proc_exit() is now checked for inside the
5302 	 * kdp_stackshot_record_task(), and then a safer and minimal
5303 	 * transitioning_task_snapshot struct is collected via
5304 	 * kcdata_record_transitioning_task_snapshot()
5305 	 */
5306 	queue_iterate(&terminated_tasks, task, task_t, tasks)
5307 	{
5308 		stackshot_panic_guard();
5309 
5310 		if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5311 		    (!queue_empty(&task->threads) && pid_from_task(task) != -1)) {
5312 			tasks_in_stackshot++;
5313 			threads_in_stackshot += task->thread_count;
5314 		}
5315 
5316 		/* Only use workqueues on non-panic and non-scoped stackshots. */
5317 		if (stackshot_ctx.sc_is_singlethreaded) {
5318 			kcd_exit_on_error(kdp_stackshot_record_task(task));
5319 		} else {
5320 			kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5321 				.sswi_task = task,
5322 				.sswi_data = NULL,
5323 				.sswi_idx = cur_workitem_index++
5324 			}));
5325 		}
5326 	}
5327 
5328 	/* Mark the queue(s) as populated. */
5329 	for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5330 		os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_populated, true, release);
5331 	}
5332 
5333 #if DEVELOPMENT || DEBUG
5334 	kcd_exit_on_error(kdp_stackshot_plh_stats());
5335 #endif /* DEVELOPMENT || DEBUG */
5336 
5337 #if STACKSHOT_COLLECTS_LATENCY_INFO
5338 	if (stackshot_ctx.sc_is_singlethreaded) {
5339 		stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt;
5340 	} else {
5341 		stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt;
5342 	}
5343 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5344 
5345 #if STACKSHOT_COLLECTS_LATENCY_INFO
5346 	if (collect_latency_info) {
5347 		stackshot_ctx.sc_latency.latency_version = 2;
5348 		stackshot_ctx.sc_latency.main_cpu_number = stackshot_ctx.sc_main_cpuid;
5349 		stackshot_ctx.sc_latency.calling_cpu_number = stackshot_ctx.sc_calling_cpuid;
5350 	}
5351 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5352 
5353 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5354 	if (!stackshot_ctx.sc_panic_stackshot) {
5355 		kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
5356 		    "stackshot_total_cpu_cycle_cnt"));
5357 	}
5358 #endif
5359 
5360 	kcdata_add_uint64_with_description(stackshot_kcdata_p, tasks_in_stackshot, "stackshot_tasks_count");
5361 	kcdata_add_uint64_with_description(stackshot_kcdata_p, threads_in_stackshot, "stackshot_threads_count");
5362 
5363 	stackshot_panic_guard();
5364 
5365 	if (!stackshot_ctx.sc_is_singlethreaded) {
5366 		/* Chip away at the queue. */
5367 		stackshot_finalize_linked_kcdata();
5368 		stackshot_cpu_do_work();
5369 		*stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_tail->kcdata;
5370 	}
5371 
5372 #if CONFIG_EXCLAVES
5373 	/* If this is the panic stackshot, check if Exclaves panic left its stackshot in the shared region */
5374 	if (stackshot_ctx.sc_panic_stackshot) {
5375 		struct exclaves_panic_stackshot excl_ss;
5376 		kdp_read_panic_exclaves_stackshot(&excl_ss);
5377 
5378 		if (excl_ss.stackshot_buffer != NULL && excl_ss.stackshot_buffer_size != 0) {
5379 			tb_error_t tberr = TB_ERROR_SUCCESS;
5380 			exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_FOUND;
5381 
5382 			/* this block does not escape, so this is okay... */
5383 			kern_return_t *error_in_block = &error;
5384 			kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
5385 			    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5386 			tberr = stackshot_stackshotresult__unmarshal(excl_ss.stackshot_buffer, excl_ss.stackshot_buffer_size, ^(stackshot_stackshotresult_s result){
5387 				*error_in_block = stackshot_exclaves_process_stackshot(&result, stackshot_kcdata_p, true);
5388 			});
5389 			kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5390 			    STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5391 			if (tberr != TB_ERROR_SUCCESS) {
5392 				exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_DECODE_FAILED;
5393 			}
5394 		} else {
5395 			exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_NOT_FOUND;
5396 		}
5397 
5398 		/* check error from the block */
5399 		kcd_exit_on_error(error);
5400 	}
5401 #endif
5402 
5403 	/*  === END of populating stackshot data === */
5404 error_exit:;
5405 	if (error != KERN_SUCCESS) {
5406 		stackshot_set_error(error);
5407 	}
5408 
5409 	stackshot_panic_guard();
5410 
5411 	return error;
5412 }
5413 
5414 static uint64_t
proc_was_throttled_from_task(task_t task)5415 proc_was_throttled_from_task(task_t task)
5416 {
5417 	uint64_t was_throttled = 0;
5418 	void *bsd_info = get_bsdtask_info(task);
5419 
5420 	if (bsd_info) {
5421 		was_throttled = proc_was_throttled(bsd_info);
5422 	}
5423 
5424 	return was_throttled;
5425 }
5426 
5427 static uint64_t
proc_did_throttle_from_task(task_t task)5428 proc_did_throttle_from_task(task_t task)
5429 {
5430 	uint64_t did_throttle = 0;
5431 	void *bsd_info = get_bsdtask_info(task);
5432 
5433 	if (bsd_info) {
5434 		did_throttle = proc_did_throttle(bsd_info);
5435 	}
5436 
5437 	return did_throttle;
5438 }
5439 
5440 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)5441 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
5442 {
5443 	unsigned int pages_reclaimed;
5444 	unsigned int pages_wanted;
5445 	kern_return_t kErr;
5446 
5447 	uint64_t compressions = 0;
5448 	uint64_t decompressions = 0;
5449 
5450 	compressions = counter_load(&vm_statistics_compressions);
5451 	decompressions = counter_load(&vm_statistics_decompressions);
5452 
5453 	memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
5454 	memio_snap->free_pages = vm_page_free_count;
5455 	memio_snap->active_pages = vm_page_active_count;
5456 	memio_snap->inactive_pages = vm_page_inactive_count;
5457 	memio_snap->purgeable_pages = vm_page_purgeable_count;
5458 	memio_snap->wired_pages = vm_page_wire_count;
5459 	memio_snap->speculative_pages = vm_page_speculative_count;
5460 	memio_snap->throttled_pages = vm_page_throttled_count;
5461 	memio_snap->busy_buffer_count = count_busy_buffers();
5462 	memio_snap->filebacked_pages = vm_page_pageable_external_count;
5463 	memio_snap->compressions = (uint32_t)compressions;
5464 	memio_snap->decompressions = (uint32_t)decompressions;
5465 	memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
5466 	kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
5467 
5468 	if (!kErr) {
5469 		memio_snap->pages_wanted = (uint32_t)pages_wanted;
5470 		memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
5471 		memio_snap->pages_wanted_reclaimed_valid = 1;
5472 	} else {
5473 		memio_snap->pages_wanted = 0;
5474 		memio_snap->pages_reclaimed = 0;
5475 		memio_snap->pages_wanted_reclaimed_valid = 0;
5476 	}
5477 }
5478 
5479 static vm_offset_t
stackshot_find_phys(vm_map_t map,vm_offset_t target_addr,kdp_fault_flags_t fault_flags,uint32_t * kdp_fault_result_flags)5480 stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags)
5481 {
5482 	vm_offset_t result;
5483 	struct kdp_fault_result fault_results = {0};
5484 	if (stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting) {
5485 		fault_flags &= ~KDP_FAULT_FLAGS_ENABLE_FAULTING;
5486 	}
5487 	if (!stackshot_ctx.sc_panic_stackshot) {
5488 		fault_flags |= KDP_FAULT_FLAGS_MULTICPU;
5489 	}
5490 
5491 	result = kdp_find_phys(map, target_addr, fault_flags, &fault_results);
5492 
5493 	if ((fault_results.flags & KDP_FAULT_RESULT_TRIED_FAULT) || (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN)) {
5494 		stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting += fault_results.time_spent_faulting;
5495 
5496 #if STACKSHOT_COLLECTS_LATENCY_INFO
5497 		stackshot_cpu_latency.faulting_time_mt += fault_results.time_spent_faulting;
5498 #endif
5499 
5500 		if ((stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting >= stackshot_max_fault_time) && !stackshot_ctx.sc_panic_stackshot) {
5501 			stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
5502 		}
5503 	}
5504 
5505 	if (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN) {
5506 		stackshot_cpu_ctx.scc_fault_stats.sfs_pages_faulted_in++;
5507 	}
5508 
5509 	if (kdp_fault_result_flags) {
5510 		*kdp_fault_result_flags = fault_results.flags;
5511 	}
5512 
5513 	return result;
5514 }
5515 
5516 /*
5517  * Wrappers around kdp_generic_copyin, kdp_generic_copyin_word, kdp_generic_copyin_string that use stackshot_find_phys
5518  * in order to:
5519  *   1. collect statistics on the number of pages faulted in
5520  *   2. stop faulting if the time spent faulting has exceeded the limit.
5521  */
5522 static boolean_t
stackshot_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5523 stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5524 {
5525 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5526 	if (try_fault) {
5527 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5528 	}
5529 	return kdp_generic_copyin(map, uaddr, dest, size, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5530 }
5531 static boolean_t
stackshot_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5532 stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5533 {
5534 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5535 	if (try_fault) {
5536 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5537 	}
5538 	return kdp_generic_copyin_word(task, addr, result, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5539 }
5540 static int
stackshot_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5541 stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5542 {
5543 	kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5544 	if (try_fault) {
5545 		fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5546 	}
5547 	return kdp_generic_copyin_string(task, addr, buf, buf_sz, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags);
5548 }
5549 
5550 kern_return_t
do_stackshot(void * context)5551 do_stackshot(void *context)
5552 {
5553 #pragma unused(context)
5554 	kern_return_t error;
5555 	size_t queue_size;
5556 	uint64_t abs_time = mach_absolute_time(), abs_time_end = 0;
5557 	kdp_snapshot++;
5558 
5559 	_stackshot_validation_reset();
5560 	error = stackshot_plh_setup(); /* set up port label hash */
5561 
5562 	if (!stackshot_ctx.sc_is_singlethreaded) {
5563 		/* Set up queues. These numbers shouldn't change, but slightly fudge queue size just in case. */
5564 		queue_size = FUDGED_SIZE(tasks_count + terminated_tasks_count, 10);
5565 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5566 			stackshot_ctx.sc_workqueues[i] = (struct stackshot_workqueue) {
5567 				.sswq_items     = stackshot_alloc_arr(struct stackshot_workitem, queue_size, &error),
5568 				.sswq_capacity  = queue_size,
5569 				.sswq_num_items = 0,
5570 				.sswq_cur_item  = 0,
5571 				.sswq_populated = false
5572 			};
5573 			if (error != KERN_SUCCESS) {
5574 				break;
5575 			}
5576 		}
5577 	}
5578 
5579 	if (error != KERN_SUCCESS) {
5580 		stackshot_set_error(error);
5581 		return error;
5582 	}
5583 
5584 	/*
5585 	 * If no main CPU has been selected at this point, (since every CPU has
5586 	 * called stackshot_cpu_preflight by now), then there was no CLPC
5587 	 * recommended P-core available. In that case, we should volunteer ourself
5588 	 * to be the main CPU, because someone has to do it.
5589 	 */
5590 	if (stackshot_ctx.sc_main_cpuid == -1) {
5591 		os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, cpu_number(), acquire);
5592 		stackshot_cpu_ctx.scc_can_work = true;
5593 	}
5594 
5595 	/* After this, auxiliary CPUs can begin work. */
5596 	os_atomic_store(&stackshot_ctx.sc_state, SS_RUNNING, release);
5597 
5598 	/* If we are the main CPU, populate the queues / do other main CPU work. */
5599 	if (stackshot_ctx.sc_panic_stackshot || (stackshot_ctx.sc_main_cpuid == cpu_number())) {
5600 		stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5601 	} else if (stackshot_cpu_ctx.scc_can_work) {
5602 		stackshot_cpu_do_work();
5603 	}
5604 
5605 	/* Wait for every CPU to finish. */
5606 #if STACKSHOT_COLLECTS_LATENCY_INFO
5607 	stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time();
5608 #endif
5609 	if (stackshot_cpu_ctx.scc_can_work) {
5610 		os_atomic_dec(&stackshot_ctx.sc_cpus_working, seq_cst);
5611 		stackshot_cpu_ctx.scc_can_work = false;
5612 	}
5613 	while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
5614 		loop_wait();
5615 	}
5616 	stackshot_panic_guard();
5617 #if STACKSHOT_COLLECTS_LATENCY_INFO
5618 	stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.cpu_wait_latency_mt;
5619 #endif
5620 
5621 	/* update timestamp of the stackshot */
5622 	abs_time_end = mach_absolute_time();
5623 	stackshot_ctx.sc_duration = (struct stackshot_duration_v2) {
5624 		.stackshot_duration       = (abs_time_end - abs_time),
5625 		.stackshot_duration_outer = 0,
5626 		.stackshot_duration_prior = stackshot_duration_prior_abs,
5627 	};
5628 
5629 	stackshot_plh_reset();
5630 
5631 	/* Check interrupts disabled time. */
5632 #if SCHED_HYGIENE_DEBUG
5633 	bool disable_interrupts_masked_check = kern_feature_override(
5634 		KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
5635 	    (stackshot_flags & STACKSHOT_DO_COMPRESS) != 0;
5636 
5637 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
5638 	disable_interrupts_masked_check = true;
5639 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
5640 
5641 	if (disable_interrupts_masked_check) {
5642 		ml_spin_debug_clear_self();
5643 	}
5644 
5645 	if (!stackshot_ctx.sc_panic_stackshot && interrupt_masked_debug_mode) {
5646 		/*
5647 		 * Try to catch instances where stackshot takes too long BEFORE returning from
5648 		 * the debugger
5649 		 */
5650 		ml_handle_stackshot_interrupt_disabled_duration(current_thread());
5651 	}
5652 #endif /* SCHED_HYGIENE_DEBUG */
5653 
5654 	kdp_snapshot--;
5655 
5656 	/* If any other CPU had an error, make sure we return it */
5657 	if (stackshot_ctx.sc_retval == KERN_SUCCESS) {
5658 		stackshot_ctx.sc_retval = stackshot_status_check();
5659 	}
5660 
5661 #if CONFIG_EXCLAVES
5662 	/* Avoid setting AST until as late as possible, in case the stackshot fails */
5663 	if (!stackshot_ctx.sc_panic_stackshot && stackshot_ctx.sc_retval == KERN_SUCCESS) {
5664 		commit_exclaves_ast();
5665 	}
5666 	if (stackshot_ctx.sc_retval != KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
5667 		/* Clear inspection CTID list: no need to wait for these threads */
5668 		stackshot_exclave_inspect_ctid_count = 0;
5669 		stackshot_exclave_inspect_ctid_capacity = 0;
5670 		stackshot_exclave_inspect_ctids = NULL;
5671 	}
5672 #endif
5673 
5674 	/* If this is a singlethreaded stackshot, the "final" kcdata buffer is just our CPU's kcdata buffer */
5675 	if (stackshot_ctx.sc_is_singlethreaded) {
5676 		stackshot_ctx.sc_finalized_kcdata = stackshot_kcdata_p;
5677 	}
5678 
5679 	return stackshot_ctx.sc_retval;
5680 }
5681 
5682 kern_return_t
do_panic_stackshot(void * context)5683 do_panic_stackshot(void *context)
5684 {
5685 	kern_return_t ret = do_stackshot(context);
5686 	if (ret != KERN_SUCCESS) {
5687 		goto out;
5688 	}
5689 
5690 	ret = stackshot_finalize_singlethreaded_kcdata();
5691 
5692 out:
5693 	return ret;
5694 }
5695 
5696 /*
5697  * Set up needed state for this CPU before participating in a stackshot.
5698  * Namely, we want to signal that we're available to do work.
5699  * Called while interrupts are disabled & in the debugger trap.
5700  */
5701 void
stackshot_cpu_preflight(void)5702 stackshot_cpu_preflight(void)
5703 {
5704 	bool is_recommended, is_calling_cpu;
5705 	int my_cpu_no = cpu_number();
5706 
5707 #if STACKSHOT_COLLECTS_LATENCY_INFO
5708 	stackshot_cpu_latency = (typeof(stackshot_cpu_latency)) {
5709 		.cpu_number            =  cpu_number(),
5710 #if defined(__AMP__)
5711 		.cluster_type          =  current_cpu_datap()->cpu_cluster_type,
5712 #else /* __AMP__ */
5713 		.cluster_type = CLUSTER_TYPE_SMP,
5714 #endif /* __AMP__ */
5715 		.faulting_time_mt      = 0,
5716 		.total_buf             = 0,
5717 		.intercluster_buf_used = 0
5718 	};
5719 #if CONFIG_PERVASIVE_CPI
5720 	mt_cur_cpu_cycles_instrs_speculative(&stackshot_cpu_latency.total_cycles, &stackshot_cpu_latency.total_instrs);
5721 #endif /* CONFIG_PERVASIVE_CPI */
5722 	stackshot_cpu_latency.init_latency_mt = stackshot_cpu_latency.total_latency_mt = mach_absolute_time();
5723 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5724 
5725 	is_recommended = current_processor()->is_recommended;
5726 
5727 	/* If this is a recommended P-core (or SMP), try making it the main CPU */
5728 	if (is_recommended
5729 #if defined(__AMP__)
5730 	    && current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_P
5731 #endif /* __AMP__ */
5732 	    ) {
5733 		os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, my_cpu_no, acquire);
5734 	}
5735 
5736 	is_calling_cpu = stackshot_ctx.sc_calling_cpuid == my_cpu_no;
5737 
5738 	stackshot_cpu_ctx.scc_did_work = false;
5739 	stackshot_cpu_ctx.scc_can_work = is_calling_cpu || (is_recommended && !stackshot_ctx.sc_is_singlethreaded);
5740 
5741 	if (stackshot_cpu_ctx.scc_can_work) {
5742 		os_atomic_inc(&stackshot_ctx.sc_cpus_working, relaxed);
5743 	}
5744 }
5745 
5746 __result_use_check
5747 static kern_return_t
stackshot_cpu_work_on_queue(struct stackshot_workqueue * queue)5748 stackshot_cpu_work_on_queue(struct stackshot_workqueue *queue)
5749 {
5750 	struct stackshot_workitem     *cur_workitemp;
5751 	kern_return_t                  error = KERN_SUCCESS;
5752 
5753 	while (((cur_workitemp = stackshot_get_workitem(queue)) != NULL || !os_atomic_load(&queue->sswq_populated, acquire))) {
5754 		/* Check to make sure someone hasn't errored out or panicked. */
5755 		if (__improbable(stackshot_status_check() != KERN_SUCCESS)) {
5756 			return KERN_ABORTED;
5757 		}
5758 
5759 		if (cur_workitemp) {
5760 			kcd_exit_on_error(stackshot_new_linked_kcdata());
5761 			cur_workitemp->sswi_data = stackshot_cpu_ctx.scc_kcdata_head;
5762 			kcd_exit_on_error(kdp_stackshot_record_task(cur_workitemp->sswi_task));
5763 			stackshot_finalize_linked_kcdata();
5764 		} else {
5765 #if STACKSHOT_COLLECTS_LATENCY_INFO
5766 			uint64_t time_begin = mach_absolute_time();
5767 #endif
5768 			loop_wait();
5769 #if STACKSHOT_COLLECTS_LATENCY_INFO
5770 			stackshot_cpu_latency.workqueue_latency_mt += mach_absolute_time() - time_begin;
5771 #endif
5772 		}
5773 	}
5774 
5775 error_exit:
5776 	return error;
5777 }
5778 
5779 static void
stackshot_cpu_do_work(void)5780 stackshot_cpu_do_work(void)
5781 {
5782 	kern_return_t                  error;
5783 
5784 	stackshot_cpu_ctx.scc_stack_buffer = stackshot_alloc_arr(uintptr_t, MAX_FRAMES, &error);
5785 	if (error != KERN_SUCCESS) {
5786 		goto error_exit;
5787 	}
5788 
5789 #if STACKSHOT_COLLECTS_LATENCY_INFO
5790 	stackshot_cpu_latency.init_latency_mt = mach_absolute_time() - stackshot_cpu_latency.init_latency_mt;
5791 #endif
5792 
5793 	bool high_perf = true;
5794 
5795 #if defined(__AMP__)
5796 	if (current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_E) {
5797 		high_perf = false;
5798 	}
5799 #endif /* __AMP__ */
5800 
5801 	if (high_perf) {
5802 		/* Non-E cores: Work from most difficult to least difficult */
5803 		for (size_t i = STACKSHOT_NUM_WORKQUEUES; i > 0; i--) {
5804 			kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i - 1]));
5805 		}
5806 	} else {
5807 		/* E: Work from least difficult to most difficult */
5808 		for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5809 			kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i]));
5810 		}
5811 	}
5812 #if STACKSHOT_COLLECTS_LATENCY_INFO
5813 	stackshot_cpu_latency.total_latency_mt = mach_absolute_time() - stackshot_cpu_latency.total_latency_mt;
5814 #if CONFIG_PERVASIVE_CPI
5815 	uint64_t cycles, instrs;
5816 	mt_cur_cpu_cycles_instrs_speculative(&cycles, &instrs);
5817 	stackshot_cpu_latency.total_cycles = cycles - stackshot_cpu_latency.total_cycles;
5818 	stackshot_cpu_latency.total_instrs = instrs - stackshot_cpu_latency.total_instrs;
5819 #endif /* CONFIG_PERVASIVE_CPI */
5820 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5821 
5822 error_exit:
5823 	if (error != KERN_SUCCESS) {
5824 		stackshot_set_error(error);
5825 	}
5826 	stackshot_panic_guard();
5827 }
5828 
5829 /*
5830  * This is where the other CPUs will end up when we take a stackshot.
5831  * If they're available to do work, they'll do so here.
5832  * Called with interrupts disabled & from the debugger trap.
5833  */
5834 void
stackshot_aux_cpu_entry(void)5835 stackshot_aux_cpu_entry(void)
5836 {
5837 	/*
5838 	 * This is where the other CPUs will end up when we take a stackshot.
5839 	 * Also, the main CPU will call this in the middle of its work to chip
5840 	 * away at the queue.
5841 	 */
5842 
5843 	/* Don't do work if we said we couldn't... */
5844 	if (!stackshot_cpu_ctx.scc_can_work) {
5845 		return;
5846 	}
5847 
5848 	/* Spin until we're ready to run. */
5849 	while (os_atomic_load(&stackshot_ctx.sc_state, acquire) == SS_SETUP) {
5850 		loop_wait();
5851 	}
5852 
5853 	/* Check to make sure the setup didn't error out or panic. */
5854 	if (stackshot_status_check() != KERN_SUCCESS) {
5855 		goto exit;
5856 	}
5857 
5858 	/* the CPU entering here is participating in the stackshot */
5859 	stackshot_cpu_ctx.scc_did_work = true;
5860 
5861 	if (stackshot_ctx.sc_main_cpuid == cpu_number()) {
5862 		stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5863 	} else {
5864 		stackshot_cpu_do_work();
5865 	}
5866 
5867 exit:
5868 	os_atomic_dec(&stackshot_ctx.sc_cpus_working, release);
5869 }
5870 
5871 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)5872 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
5873 {
5874 	/* When the pthread kext puts a worker thread to sleep, it will
5875 	 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
5876 	 * struct. See parkit() in kern/kern_support.c in libpthread.
5877 	 */
5878 	return (thread->state & TH_WAIT) &&
5879 	       (thread->block_hint == kThreadWaitParkedWorkQueue);
5880 }
5881 
5882 #if CONFIG_COALITIONS
5883 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)5884 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
5885 {
5886 #pragma unused(i, coal)
5887 	unsigned int *coalition_count = (unsigned int*)arg;
5888 	(*coalition_count)++;
5889 }
5890 
5891 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)5892 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
5893 {
5894 	if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
5895 		return;
5896 	}
5897 
5898 	struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
5899 	struct jetsam_coalition_snapshot *jcs = &coalitions[i];
5900 	task_t leader = TASK_NULL;
5901 	jcs->jcs_id = coalition_id(coal);
5902 	jcs->jcs_flags = 0;
5903 	jcs->jcs_thread_group = 0;
5904 
5905 	if (coalition_term_requested(coal)) {
5906 		jcs->jcs_flags |= kCoalitionTermRequested;
5907 	}
5908 	if (coalition_is_terminated(coal)) {
5909 		jcs->jcs_flags |= kCoalitionTerminated;
5910 	}
5911 	if (coalition_is_reaped(coal)) {
5912 		jcs->jcs_flags |= kCoalitionReaped;
5913 	}
5914 	if (coalition_is_privileged(coal)) {
5915 		jcs->jcs_flags |= kCoalitionPrivileged;
5916 	}
5917 
5918 #if CONFIG_THREAD_GROUPS
5919 	struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
5920 	if (thread_group) {
5921 		jcs->jcs_thread_group = thread_group_get_id(thread_group);
5922 	}
5923 #endif /* CONFIG_THREAD_GROUPS */
5924 
5925 	leader = kdp_coalition_get_leader(coal);
5926 	if (leader) {
5927 		jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
5928 	} else {
5929 		jcs->jcs_leader_task_uniqueid = 0;
5930 	}
5931 }
5932 #endif /* CONFIG_COALITIONS */
5933 
5934 #if CONFIG_THREAD_GROUPS
5935 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)5936 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
5937 {
5938 #pragma unused(i, tg)
5939 	unsigned int *n = (unsigned int*)arg;
5940 	(*n)++;
5941 }
5942 
5943 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)5944 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
5945 {
5946 	struct thread_group_snapshot_v3 *thread_groups = arg;
5947 	struct thread_group_snapshot_v3 *tgs = &thread_groups[i];
5948 	const char *name = thread_group_get_name(tg);
5949 	uint32_t flags = thread_group_get_flags(tg);
5950 	tgs->tgs_id = thread_group_get_id(tg);
5951 	static_assert(THREAD_GROUP_MAXNAME > sizeof(tgs->tgs_name));
5952 	kdp_memcpy(tgs->tgs_name, name, sizeof(tgs->tgs_name));
5953 	kdp_memcpy(tgs->tgs_name_cont, name + sizeof(tgs->tgs_name),
5954 	    sizeof(tgs->tgs_name_cont));
5955 	tgs->tgs_flags =
5956 	    ((flags & THREAD_GROUP_FLAGS_EFFICIENT)     ? kThreadGroupEfficient     : 0) |
5957 	    ((flags & THREAD_GROUP_FLAGS_APPLICATION)   ? kThreadGroupApplication   : 0) |
5958 	    ((flags & THREAD_GROUP_FLAGS_CRITICAL)      ? kThreadGroupCritical      : 0) |
5959 	    ((flags & THREAD_GROUP_FLAGS_BEST_EFFORT)   ? kThreadGroupBestEffort    : 0) |
5960 	    ((flags & THREAD_GROUP_FLAGS_UI_APP)        ? kThreadGroupUIApplication : 0) |
5961 	    ((flags & THREAD_GROUP_FLAGS_MANAGED)       ? kThreadGroupManaged       : 0) |
5962 	    ((flags & THREAD_GROUP_FLAGS_STRICT_TIMERS) ? kThreadGroupStrictTimers  : 0) |
5963 	    0;
5964 }
5965 #endif /* CONFIG_THREAD_GROUPS */
5966 
5967 /* Determine if a thread has waitinfo that stackshot can provide */
5968 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)5969 stackshot_thread_has_valid_waitinfo(thread_t thread)
5970 {
5971 	if (!(thread->state & TH_WAIT)) {
5972 		return 0;
5973 	}
5974 
5975 	switch (thread->block_hint) {
5976 	// If set to None or is a parked work queue, ignore it
5977 	case kThreadWaitParkedWorkQueue:
5978 	case kThreadWaitNone:
5979 		return 0;
5980 	// There is a short window where the pthread kext removes a thread
5981 	// from its ksyn wait queue before waking the thread up
5982 	case kThreadWaitPThreadMutex:
5983 	case kThreadWaitPThreadRWLockRead:
5984 	case kThreadWaitPThreadRWLockWrite:
5985 	case kThreadWaitPThreadCondVar:
5986 		return kdp_pthread_get_thread_kwq(thread) != NULL;
5987 	// All other cases are valid block hints if in a wait state
5988 	default:
5989 		return 1;
5990 	}
5991 }
5992 
5993 /* Determine if a thread has turnstileinfo that stackshot can provide */
5994 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)5995 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
5996 {
5997 	struct turnstile *ts = thread_get_waiting_turnstile(thread);
5998 
5999 	return stackshot_thread_has_valid_waitinfo(thread) &&
6000 	       ts != TURNSTILE_NULL;
6001 }
6002 
6003 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_v2_t * tsinfo)6004 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo)
6005 {
6006 	struct turnstile *ts;
6007 	struct ipc_service_port_label *ispl = NULL;
6008 
6009 	/* acquire turnstile information and store it in the stackshot */
6010 	ts = thread_get_waiting_turnstile(thread);
6011 	tsinfo->waiter = thread_tid(thread);
6012 	kdp_turnstile_fill_tsinfo(ts, tsinfo, &ispl);
6013 	tsinfo->portlabel_id = stackshot_plh_lookup(ispl,
6014 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_SENDPORT) ? STACKSHOT_PLH_LOOKUP_SEND :
6015 	    (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_RECEIVEPORT) ? STACKSHOT_PLH_LOOKUP_RECEIVE :
6016 	    STACKSHOT_PLH_LOOKUP_UNKNOWN);
6017 }
6018 
6019 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_v2_t * waitinfo)6020 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t *waitinfo)
6021 {
6022 	thread_waitinfo_t *waitinfo_v1 = (thread_waitinfo_t *)waitinfo;
6023 	struct ipc_service_port_label *ispl = NULL;
6024 
6025 	waitinfo->waiter        = thread_tid(thread);
6026 	waitinfo->wait_type     = thread->block_hint;
6027 	waitinfo->wait_flags    = 0;
6028 
6029 	switch (waitinfo->wait_type) {
6030 	case kThreadWaitKernelMutex:
6031 		kdp_lck_mtx_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6032 		break;
6033 	case kThreadWaitPortReceive:
6034 		kdp_mqueue_recv_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6035 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_RECEIVE);
6036 		break;
6037 	case kThreadWaitPortSend:
6038 		kdp_mqueue_send_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6039 		waitinfo->portlabel_id  = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_SEND);
6040 		break;
6041 	case kThreadWaitSemaphore:
6042 		kdp_sema_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6043 		break;
6044 	case kThreadWaitUserLock:
6045 		kdp_ulock_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6046 		break;
6047 	case kThreadWaitKernelRWLockRead:
6048 	case kThreadWaitKernelRWLockWrite:
6049 	case kThreadWaitKernelRWLockUpgrade:
6050 		kdp_rwlck_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6051 		break;
6052 	case kThreadWaitPThreadMutex:
6053 	case kThreadWaitPThreadRWLockRead:
6054 	case kThreadWaitPThreadRWLockWrite:
6055 	case kThreadWaitPThreadCondVar:
6056 		kdp_pthread_find_owner(thread, waitinfo_v1);
6057 		break;
6058 	case kThreadWaitWorkloopSyncWait:
6059 		kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo_v1);
6060 		break;
6061 	case kThreadWaitOnProcess:
6062 		kdp_wait4_find_process(thread, thread->wait_event, waitinfo_v1);
6063 		break;
6064 	case kThreadWaitSleepWithInheritor:
6065 		kdp_sleep_with_inheritor_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6066 		break;
6067 	case kThreadWaitEventlink:
6068 		kdp_eventlink_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6069 		break;
6070 	case kThreadWaitCompressor:
6071 		kdp_compressor_busy_find_owner(thread->wait_event, waitinfo_v1);
6072 		break;
6073 	case kThreadWaitPageBusy:
6074 		kdp_vm_page_sleep_find_owner(thread->wait_event, waitinfo_v1);
6075 		break;
6076 	case kThreadWaitPagingInProgress:
6077 	case kThreadWaitPagingActivity:
6078 	case kThreadWaitPagerInit:
6079 	case kThreadWaitPagerReady:
6080 	case kThreadWaitMemoryBlocked:
6081 	case kThreadWaitPageInThrottle:
6082 		kdp_vm_object_sleep_find_owner(thread->wait_event, waitinfo->wait_type, waitinfo_v1);
6083 		break;
6084 	default:
6085 		waitinfo->owner = 0;
6086 		waitinfo->context = 0;
6087 		break;
6088 	}
6089 }
6090