1 /*
2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #include <mach/mach_types.h>
31 #include <mach/vm_param.h>
32 #include <mach/mach_vm.h>
33 #include <mach/clock_types.h>
34 #include <sys/code_signing.h>
35 #include <sys/errno.h>
36 #include <sys/stackshot.h>
37 #if defined(__arm64__)
38 #include <arm/cpu_internal.h>
39 #endif /* __arm64__ */
40 #ifdef IMPORTANCE_INHERITANCE
41 #include <ipc/ipc_importance.h>
42 #endif
43 #include <sys/appleapiopts.h>
44 #include <kern/debug.h>
45 #include <kern/block_hint.h>
46 #include <uuid/uuid.h>
47
48 #include <kdp/kdp_dyld.h>
49 #include <kdp/kdp_en_debugger.h>
50 #include <kdp/processor_core.h>
51 #include <kdp/kdp_common.h>
52
53 #include <libsa/types.h>
54 #include <libkern/version.h>
55 #include <libkern/section_keywords.h>
56
57 #include <string.h> /* bcopy */
58
59 #include <kern/kern_stackshot.h>
60 #include <kern/kcdata_private.h>
61 #include <kern/backtrace.h>
62 #include <kern/coalition.h>
63 #include <kern/epoch_sync.h>
64 #include <kern/exclaves_stackshot.h>
65 #include <kern/exclaves_inspection.h>
66 #include <kern/processor.h>
67 #include <kern/host_statistics.h>
68 #include <kern/counter.h>
69 #include <kern/thread.h>
70 #include <kern/thread_group.h>
71 #include <kern/task.h>
72 #include <kern/telemetry.h>
73 #include <kern/clock.h>
74 #include <kern/policy_internal.h>
75 #include <kern/socd_client.h>
76 #include <kern/startup.h>
77 #include <vm/pmap.h>
78 #include <vm/vm_map_xnu.h>
79 #include <vm/vm_kern_xnu.h>
80 #include <vm/vm_pageout.h>
81 #include <vm/vm_fault.h>
82 #include <vm/vm_shared_region_xnu.h>
83 #include <vm/vm_compressor_xnu.h>
84 #include <libkern/OSKextLibPrivate.h>
85 #include <os/log.h>
86
87
88
89 #ifdef CONFIG_EXCLAVES
90 #include <kern/exclaves.tightbeam.h>
91 #endif /* CONFIG_EXCLAVES */
92
93 #include <kern/exclaves_test_stackshot.h>
94
95 #include <libkern/coreanalytics/coreanalytics.h>
96
97 #if defined(__x86_64__)
98 #include <i386/mp.h>
99 #include <i386/cpu_threads.h>
100 #endif
101
102 #include <pexpert/pexpert.h>
103
104 #if CONFIG_PERVASIVE_CPI
105 #include <kern/monotonic.h>
106 #endif /* CONFIG_PERVASIVE_CPI */
107
108 #include <san/kasan.h>
109
110 #if DEBUG || DEVELOPMENT
111 #define STACKSHOT_COLLECTS_DIAGNOSTICS 1
112 #define STACKSHOT_COLLECTS_LATENCY_INFO 1
113 #else
114 #define STACKSHOT_COLLECTS_DIAGNOSTICS 0
115 #define STACKSHOT_COLLECTS_LATENCY_INFO 0
116 #endif /* DEBUG || DEVELOPMENT */
117
118 #define STACKSHOT_COLLECTS_RDAR_126582377_DATA 0
119
120 #if defined(__AMP__)
121 #define STACKSHOT_NUM_WORKQUEUES 2
122 #else /* __AMP__ */
123 #define STACKSHOT_NUM_WORKQUEUES 1
124 #endif
125
126 #if defined(__arm64__)
127 #define STACKSHOT_NUM_BUFFERS MAX_CPU_CLUSTERS
128 #else /* __arm64__ */
129 #define STACKSHOT_NUM_BUFFERS 1
130 #endif /* __arm64__ */
131
132 /* The number of threads which will land a task in the hardest workqueue. */
133 #define STACKSHOT_HARDEST_THREADCOUNT 10
134
135 TUNABLE_DEV_WRITEABLE(unsigned int, stackshot_single_thread, "stackshot_single_thread", 0);
136
137 extern unsigned int not_in_kdp;
138
139 /* indicate to the compiler that some accesses are unaligned */
140 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
141
142 int kdp_snapshot = 0;
143
144 #pragma mark ---Stackshot Struct Definitions---
145
146 typedef struct linked_kcdata_descriptor {
147 struct kcdata_descriptor kcdata;
148 struct linked_kcdata_descriptor *next;
149 } * linked_kcdata_descriptor_t;
150
151 struct stackshot_workitem {
152 task_t sswi_task;
153 linked_kcdata_descriptor_t sswi_data; /* The kcdata for this task. */
154 int sswi_idx; /* The index of this job, used for ordering kcdata across multiple queues. */
155 };
156
157 struct stackshot_workqueue {
158 uint32_t _Atomic sswq_num_items; /* Only modified by main CPU */
159 uint32_t _Atomic sswq_cur_item; /* Modified by all CPUs */
160 size_t sswq_capacity; /* Constant after preflight */
161 bool _Atomic sswq_populated; /* Only modified by main CPU */
162 struct stackshot_workitem *__counted_by(capacity) sswq_items;
163 };
164
165 struct freelist_entry {
166 struct freelist_entry *fl_next; /* Next entry in the freelist */
167 size_t fl_size; /* Size of the entry (must be >= sizeof(struct freelist_entry)) */
168 };
169
170 struct stackshot_buffer {
171 void *ssb_ptr; /* Base of buffer */
172 size_t ssb_size;
173 size_t _Atomic ssb_used;
174 struct freelist_entry *ssb_freelist; /* First freelist entry */
175 int _Atomic ssb_freelist_lock;
176 size_t _Atomic ssb_overhead; /* Total amount ever freed (even if re-allocated from freelist) */
177 };
178
179 struct kdp_snapshot_args {
180 int pid;
181 void *buffer;
182 struct kcdata_descriptor *descriptor;
183 uint32_t buffer_size;
184 uint64_t flags;
185 uint64_t since_timestamp;
186 uint32_t pagetable_mask;
187 };
188
189 /*
190 * Keep a simple cache of the most recent validation done at a page granularity
191 * to avoid the expensive software KVA-to-phys translation in the VM.
192 */
193
194 struct _stackshot_validation_state {
195 vm_offset_t last_valid_page_kva;
196 size_t last_valid_size;
197 };
198
199 /* CPU-local generation counts for PLH */
200 struct _stackshot_plh_gen_state {
201 uint8_t *pgs_gen; /* last 'gen #' seen in */
202 int16_t pgs_curgen_min; /* min idx seen for this gen */
203 int16_t pgs_curgen_max; /* max idx seen for this gen */
204 uint8_t pgs_curgen; /* current gen */
205 };
206
207 /*
208 * For port labels, we have a small hash table we use to track the
209 * struct ipc_service_port_label pointers we see along the way.
210 * This structure encapsulates the global state.
211 *
212 * The hash table is insert-only, similar to "intern"ing strings. It's
213 * only used an manipulated in during the stackshot collection. We use
214 * seperate chaining, with the hash elements and chains being int16_ts
215 * indexes into the parallel arrays, with -1 ending the chain. Array indices are
216 * allocated using a bump allocator.
217 *
218 * The parallel arrays contain:
219 * - plh_array[idx] the pointer entered
220 * - plh_chains[idx] the hash chain
221 * - plh_gen[idx] the last 'generation #' seen
222 *
223 * Generation IDs are used to track entries looked up in the current
224 * task; 0 is never used, and the plh_gen array is cleared to 0 on
225 * rollover.
226 *
227 * The portlabel_ids we report externally are just the index in the array,
228 * plus 1 to avoid 0 as a value. 0 is NONE, -1 is UNKNOWN (e.g. there is
229 * one, but we ran out of space)
230 */
231 struct port_label_hash {
232 int _Atomic plh_lock; /* lock for concurrent modifications to this plh */
233 uint16_t plh_size; /* size of allocations; 0 disables tracking */
234 uint16_t plh_count; /* count of used entries in plh_array */
235 struct ipc_service_port_label **plh_array; /* _size allocated, _count used */
236 int16_t *plh_chains; /* _size allocated */
237 int16_t *plh_hash; /* (1 << STACKSHOT_PLH_SHIFT) entry hash table: hash(ptr) -> array index */
238 #if DEVELOPMENT || DEBUG
239 /* statistics */
240 uint32_t _Atomic plh_lookups; /* # lookups or inserts */
241 uint32_t _Atomic plh_found;
242 uint32_t _Atomic plh_found_depth;
243 uint32_t _Atomic plh_insert;
244 uint32_t _Atomic plh_insert_depth;
245 uint32_t _Atomic plh_bad;
246 uint32_t _Atomic plh_bad_depth;
247 uint32_t _Atomic plh_lookup_send;
248 uint32_t _Atomic plh_lookup_receive;
249 #define PLH_STAT_OP(...) (void)(__VA_ARGS__)
250 #else /* DEVELOPMENT || DEBUG */
251 #define PLH_STAT_OP(...) (void)(0)
252 #endif /* DEVELOPMENT || DEBUG */
253 };
254
255 #define plh_lock(plh) while(!os_atomic_cmpxchg(&(plh)->plh_lock, 0, 1, acquire)) { loop_wait(); }
256 #define plh_unlock(plh) os_atomic_store(&(plh)->plh_lock, 0, release);
257
258 #define STACKSHOT_PLH_SHIFT 7
259 #define STACKSHOT_PLH_SIZE_MAX ((kdp_ipc_have_splabel)? 1024 : 0)
260 size_t stackshot_port_label_size = (2 * (1u << STACKSHOT_PLH_SHIFT));
261 #define STASKSHOT_PLH_SIZE(x) MIN((x), STACKSHOT_PLH_SIZE_MAX)
262
263 struct stackshot_cpu_context {
264 bool scc_can_work; /* Whether the CPU can do more stackshot work */
265 bool scc_did_work; /* Whether the CPU actually did any stackshot work */
266 linked_kcdata_descriptor_t scc_kcdata_head; /* See `linked_kcdata_alloc_callback */
267 linked_kcdata_descriptor_t scc_kcdata_tail; /* See `linked_kcdata_alloc_callback */
268 uintptr_t *scc_stack_buffer; /* A buffer for stacktraces. */
269 struct stackshot_fault_stats scc_fault_stats;
270 struct _stackshot_validation_state scc_validation_state;
271 struct _stackshot_plh_gen_state scc_plh_gen;
272 };
273
274 /*
275 * When directly modifying the stackshot state, always use the macros below to
276 * work wth this enum - the higher order bits are used to store an error code
277 * in the case of SS_ERRORED.
278 *
279 * +------------------------------------+-------------------+
280 * | | |
281 * v | |
282 * +-------------+ +----------+ +------------+ +------------+
283 * | SS_INACTIVE |---->| SS_SETUP |---->| SS_RUNNING |---->| SS_ERRORED |
284 * +-------------+ +----------+ +------------+ +------------+
285 * | | | ^ |
286 * | +----------------|----------------+ |
287 * +-------------+ | | |
288 * | SS_PANICKED |<--------+-------------------+ |
289 * +-------------+ |
290 * ^ |
291 * | |
292 * +--------------------------------------------------------+
293 */
294 __enum_closed_decl(stackshot_state_t, uint, {
295 SS_INACTIVE = 0x0, /* -> SS_SETUP */
296 SS_SETUP = 0x1, /* -> SS_RUNNING, SS_ERRORED, SS_PANICKED */
297 SS_RUNNING = 0x2, /* -> SS_ERRORED, SS_PANICKED, SS_INACTIVE */
298 SS_ERRORED = 0x3, /* -> SS_INACTIVE, SS_PANICKED */
299 SS_PANICKED = 0x4, /* -> N/A */
300 _SS_COUNT
301 });
302
303 static_assert(_SS_COUNT <= 0x5);
304 /* Get the stackshot state ID from a stackshot_state_t. */
305 #define SS_STATE(state) ((state) & 0x7u)
306 /* Get the error code from a stackshot_state_t. */
307 #define SS_ERRCODE(state) ((state) >> 3)
308 /* Make a stackshot error state with a given code. */
309 #define SS_MKERR(code) (((code) << 3) | SS_ERRORED)
310
311 struct stackshot_context {
312 /* Constants & Arguments */
313 struct kdp_snapshot_args sc_args;
314 int sc_calling_cpuid;
315 int sc_main_cpuid;
316 bool sc_enable_faulting;
317 uint64_t sc_microsecs; /* Timestamp */
318 bool sc_panic_stackshot;
319 size_t sc_min_kcdata_size;
320 bool sc_is_singlethreaded;
321
322 /* State & Errors */
323 stackshot_state_t _Atomic sc_state; /* Only modified by calling CPU, main CPU, or panicking CPU. See comment above type definition for details. */
324 kern_return_t sc_retval; /* The return value of the main thread */
325 uint32_t _Atomic sc_cpus_working;
326
327 /* KCData */
328 linked_kcdata_descriptor_t sc_pretask_kcdata;
329 linked_kcdata_descriptor_t sc_posttask_kcdata;
330 kcdata_descriptor_t sc_finalized_kcdata;
331
332 /* Buffers & Queues */
333 struct stackshot_buffer __counted_by(num_buffers) sc_buffers[STACKSHOT_NUM_BUFFERS];
334 size_t sc_num_buffers;
335 struct stackshot_workqueue __counted_by(STACKSHOT_NUM_WORKQUEUES) sc_workqueues[STACKSHOT_NUM_WORKQUEUES];
336 struct port_label_hash sc_plh;
337
338 /* Statistics */
339 struct stackshot_duration_v2 sc_duration;
340 uint32_t sc_bytes_traced;
341 uint32_t sc_bytes_uncompressed;
342 #if STACKSHOT_COLLECTS_LATENCY_INFO
343 struct stackshot_latency_collection_v2 sc_latency;
344 #endif
345 };
346
347 #define STACKSHOT_DEBUG_TRACEBUF_SIZE 16
348
349 struct stackshot_trace_entry {
350 int sste_line_no;
351 uint64_t sste_timestamp;
352 mach_vm_address_t sste_data;
353 };
354
355 struct stackshot_trace_buffer {
356 uint64_t sstb_last_trace_timestamp;
357 size_t sstb_tail_idx;
358 size_t sstb_size;
359 struct stackshot_trace_entry __counted_by(STACKSHOT_DEBUG_TRACEBUF_SIZE) sstb_entries[STACKSHOT_DEBUG_TRACEBUF_SIZE];
360 };
361
362 #pragma mark ---Stackshot State and Data---
363
364 /*
365 * Two stackshot states, one for panic and one for normal.
366 * That way, we can take a stackshot during a panic without clobbering state.
367 */
368 #define STACKSHOT_CTX_IDX_NORMAL 0
369 #define STACKSHOT_CTX_IDX_PANIC 1
370 size_t cur_stackshot_ctx_idx = STACKSHOT_CTX_IDX_NORMAL;
371 struct stackshot_context stackshot_contexts[2] = {{0}, {0}};
372 #define stackshot_ctx (stackshot_contexts[cur_stackshot_ctx_idx])
373 #define stackshot_args (stackshot_ctx.sc_args)
374 #define stackshot_flags (stackshot_args.flags)
375
376 static struct {
377 uint64_t last_abs_start; /* start time of last stackshot */
378 uint64_t last_abs_end; /* end time of last stackshot */
379 uint64_t stackshots_taken; /* total stackshots taken since boot */
380 uint64_t stackshots_duration; /* total abs time spent in stackshot_trap() since boot */
381 } stackshot_stats = { 0 };
382
383 #if STACKSHOT_COLLECTS_LATENCY_INFO
384 static struct stackshot_latency_cpu PERCPU_DATA(stackshot_cpu_latency_percpu);
385 #define stackshot_cpu_latency (*PERCPU_GET(stackshot_cpu_latency_percpu))
386 #endif
387
388 static struct stackshot_cpu_context PERCPU_DATA(stackshot_cpu_ctx_percpu);
389 #define stackshot_cpu_ctx (*PERCPU_GET(stackshot_cpu_ctx_percpu))
390
391 static struct kcdata_descriptor PERCPU_DATA(stackshot_kcdata_percpu);
392 #define stackshot_kcdata_p (PERCPU_GET(stackshot_kcdata_percpu))
393
394 #if STACKSHOT_COLLECTS_LATENCY_INFO
395 static bool collect_latency_info = true;
396 #endif
397
398 static uint64_t stackshot_max_fault_time;
399
400 #if STACKSHOT_COLLECTS_DIAGNOSTICS
401 static struct stackshot_trace_buffer PERCPU_DATA(stackshot_trace_buffer);
402 #endif
403
404 #pragma mark ---Stackshot Global State---
405
406 uint32_t stackshot_estimate_adj = 25; /* experiment factor: 0-100, adjust our estimate up by this amount */
407
408 static uint32_t stackshot_initial_estimate;
409 static uint32_t stackshot_initial_estimate_adj;
410 static uint64_t stackshot_duration_prior_abs; /* prior attempts, abs */
411 static unaligned_u64 * stackshot_duration_outer;
412 static uint64_t stackshot_tries;
413
414 void * kernel_stackshot_buf = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
415 int kernel_stackshot_buf_size = 0;
416
417 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
418
419 #if CONFIG_EXCLAVES
420 static ctid_t *stackshot_exclave_inspect_ctids = NULL;
421 static size_t stackshot_exclave_inspect_ctid_count = 0;
422 static size_t stackshot_exclave_inspect_ctid_capacity = 0;
423
424 static kern_return_t stackshot_exclave_kr = KERN_SUCCESS;
425 #endif /* CONFIG_EXCLAVES */
426
427 #if DEBUG || DEVELOPMENT
428 TUNABLE(bool, disable_exclave_stackshot, "-disable_exclave_stackshot", false);
429 #else
430 const bool disable_exclave_stackshot = false;
431 #endif
432
433 #pragma mark ---Stackshot Static Function Declarations---
434
435 __private_extern__ void stackshot_init( void );
436 static boolean_t memory_iszero(void *addr, size_t size);
437 static void stackshot_cpu_do_work(void);
438 static kern_return_t stackshot_finalize_kcdata(void);
439 static kern_return_t stackshot_finalize_singlethreaded_kcdata(void);
440 static kern_return_t stackshot_collect_kcdata(void);
441 static int kdp_stackshot_kcdata_format();
442 static void kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
443 static vm_offset_t stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags);
444 static boolean_t stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
445 static int stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
446 static boolean_t stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
447 static uint64_t proc_was_throttled_from_task(task_t task);
448 static void stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t * waitinfo);
449 static int stackshot_thread_has_valid_waitinfo(thread_t thread);
450 static void stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo);
451 static int stackshot_thread_has_valid_turnstileinfo(thread_t thread);
452 static uint32_t get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj, uint64_t trace_flags, pid_t target_pid);
453 static kern_return_t kdp_snapshot_preflight_internal(struct kdp_snapshot_args args);
454
455 #if CONFIG_COALITIONS
456 static void stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
457 static void stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
458 #endif /* CONFIG_COALITIONS */
459
460 #if CONFIG_THREAD_GROUPS
461 static void stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
462 static void stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
463 #endif /* CONFIG_THREAD_GROUPS */
464
465 extern uint64_t workqueue_get_task_ss_flags_from_pwq_state_kdp(void *proc);
466
467 static kcdata_descriptor_t linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size);
468
469 #pragma mark ---Stackshot Externs---
470
471 struct proc;
472 extern int proc_pid(struct proc *p);
473 extern uint64_t proc_uniqueid(void *p);
474 extern uint64_t proc_was_throttled(void *p);
475 extern uint64_t proc_did_throttle(void *p);
476 extern int proc_exiting(void *p);
477 extern int proc_in_teardown(void *p);
478 static uint64_t proc_did_throttle_from_task(task_t task);
479 extern void proc_name_kdp(struct proc *p, char * buf, int size);
480 extern int proc_threadname_kdp(void * uth, char * buf, size_t size);
481 extern void proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
482 extern void proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
483 extern uint64_t proc_getcsflags_kdp(void * p);
484 extern boolean_t proc_binary_uuid_kdp(task_t task, uuid_t uuid);
485 extern int memorystatus_get_pressure_status_kdp(void);
486 extern void memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
487 extern void panic_stackshot_release_lock(void);
488
489 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
490
491 #if CONFIG_TELEMETRY
492 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
493 #endif /* CONFIG_TELEMETRY */
494
495 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
496 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
497
498 static size_t stackshot_plh_est_size(void);
499
500 #if CONFIG_EXCLAVES
501 static kern_return_t collect_exclave_threads(uint64_t);
502 static kern_return_t stackshot_setup_exclave_waitlist(void);
503 #endif
504
505 /*
506 * Validates that the given address for a word is both a valid page and has
507 * default caching attributes for the current map.
508 */
509 bool machine_trace_thread_validate_kva(vm_offset_t);
510 /*
511 * Validates a region that stackshot will potentially inspect.
512 */
513 static bool _stackshot_validate_kva(vm_offset_t, size_t);
514 /*
515 * Must be called whenever stackshot is re-driven.
516 */
517 static void _stackshot_validation_reset(void);
518 /*
519 * A kdp-safe strlen() call. Returns:
520 * -1 if we reach maxlen or a bad address before the end of the string, or
521 * strlen(s)
522 */
523 static long _stackshot_strlen(const char *s, size_t maxlen);
524
525 #define MAX_FRAMES 1000
526 #define STACKSHOT_PAGETABLE_BUFSZ 4000
527 #define MAX_LOADINFOS 500
528 #define MAX_DYLD_COMPACTINFO (20 * 1024) // max bytes of compactinfo to include per proc/shared region
529 #define TASK_IMP_WALK_LIMIT 20
530
531 typedef struct thread_snapshot *thread_snapshot_t;
532 typedef struct task_snapshot *task_snapshot_t;
533
534 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
535 extern kdp_send_t kdp_en_send_pkt;
536 #endif
537
538 /*
539 * Stackshot locking and other defines.
540 */
541 LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
542 LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
543
544 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
545 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
546 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
547 #define STACKSHOT_SUBSYS_ASSERT_LOCKED() lck_mtx_assert(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
548
549 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
550 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
551
552 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
553 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
554
555 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
556
557 /*
558 * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
559 * for non-panic stackshots where faulting is requested.
560 */
561 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
562
563
564 #ifndef ROUNDUP
565 #define ROUNDUP(x, y) ((((x)+(y)-1)/(y))*(y))
566 #endif
567
568 #define STACKSHOT_QUEUE_LABEL_MAXSIZE 64
569
570 #pragma mark ---Stackshot Useful Macros---
571
572 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
573 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
574 /*
575 * Use of the kcd_exit_on_error(action) macro requires a local
576 * 'kern_return_t error' variable and 'error_exit' label.
577 */
578 #define kcd_exit_on_error(action) \
579 do { \
580 if (KERN_SUCCESS != (error = (action))) { \
581 STACKSHOT_TRACE(error); \
582 if (error == KERN_RESOURCE_SHORTAGE) { \
583 error = KERN_INSUFFICIENT_BUFFER_SIZE; \
584 } \
585 goto error_exit; \
586 } \
587 } while (0); /* end kcd_exit_on_error */
588
589 #if defined(__arm64__)
590 #define loop_wait_noguard() __builtin_arm_wfe()
591 #elif defined(__x86_64__)
592 #define loop_wait_noguard() __builtin_ia32_pause()
593 #else
594 #define loop_wait_noguard()
595 #endif /* __x86_64__ */
596
597 #define loop_wait() { loop_wait_noguard(); stackshot_panic_guard(); }
598
599 static inline void stackshot_panic_guard(void);
600
601 static __attribute__((noreturn, noinline)) void
stackshot_panic_spin(void)602 stackshot_panic_spin(void)
603 {
604 if (stackshot_cpu_ctx.scc_can_work) {
605 stackshot_cpu_ctx.scc_can_work = false;
606 os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
607 }
608 if (stackshot_ctx.sc_calling_cpuid == cpu_number()) {
609 while (os_atomic_load(&stackshot_ctx.sc_cpus_working, acquire) != 0) {
610 loop_wait_noguard();
611 }
612 panic_stackshot_release_lock();
613 }
614 while (1) {
615 loop_wait_noguard();
616 }
617 }
618
619 /**
620 * Immediately aborts if another CPU panicked during the stackshot.
621 */
622 static inline void
stackshot_panic_guard(void)623 stackshot_panic_guard(void)
624 {
625 if (__improbable(os_atomic_load(&stackshot_ctx.sc_state, relaxed) == SS_PANICKED)) {
626 stackshot_panic_spin();
627 }
628 }
629
630 /*
631 * Signal that we panicked during a stackshot by setting an atomic flag and
632 * waiting for others to coalesce before continuing the panic. Other CPUs will
633 * spin on this as soon as they see it set in order to prevent multiple
634 * concurrent panics. The calling CPU (i.e. the one holding the debugger lock)
635 * will release it for us in `stackshot_panic_spin` so we can continue
636 * panicking.
637 *
638 * This is called from panic_trap_to_debugger.
639 */
640 void
stackshot_cpu_signal_panic(void)641 stackshot_cpu_signal_panic(void)
642 {
643 stackshot_state_t o_state;
644 if (stackshot_active()) {
645 /* Check if someone else panicked before we did. */
646 o_state = os_atomic_xchg(&stackshot_ctx.sc_state, SS_PANICKED, seq_cst);
647 if (o_state == SS_PANICKED) {
648 stackshot_panic_spin();
649 }
650
651 /* We're the first CPU to panic - wait for everyone to coalesce. */
652 if (stackshot_cpu_ctx.scc_can_work) {
653 stackshot_cpu_ctx.scc_can_work = false;
654 os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
655 }
656 while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
657 loop_wait_noguard();
658 }
659 }
660 }
661
662 /*
663 * Sets the stackshot state to SS_ERRORED along with the error code.
664 * Only works if the current state is SS_RUNNING or SS_SETUP.
665 */
666 static inline void
stackshot_set_error(kern_return_t error)667 stackshot_set_error(kern_return_t error)
668 {
669 stackshot_state_t cur_state;
670 stackshot_state_t err_state = SS_MKERR(error);
671 if (__improbable(!os_atomic_cmpxchgv(&stackshot_ctx.sc_state, SS_RUNNING, err_state, &cur_state, seq_cst))) {
672 if (cur_state == SS_SETUP) {
673 os_atomic_cmpxchg(&stackshot_ctx.sc_state, SS_SETUP, err_state, seq_cst);
674 } else {
675 /* Our state is something other than SS_RUNNING or SS_SETUP... Check for panic. */
676 stackshot_panic_guard();
677 }
678 }
679 }
680
681 /* Returns an error code if the current stackshot context has errored out.
682 * Also functions as a panic guard.
683 */
684 __result_use_check
685 static inline kern_return_t
stackshot_status_check(void)686 stackshot_status_check(void)
687 {
688 stackshot_state_t state = os_atomic_load(&stackshot_ctx.sc_state, relaxed);
689
690 /* Check for panic */
691 if (__improbable(SS_STATE(state) == SS_PANICKED)) {
692 stackshot_panic_spin();
693 }
694
695 /* Check for error */
696 if (__improbable(SS_STATE(state) == SS_ERRORED)) {
697 kern_return_t err = SS_ERRCODE(state);
698 assert(err != KERN_SUCCESS); /* SS_ERRORED should always store an associated error code. */
699 return err;
700 }
701
702 return KERN_SUCCESS;
703 }
704
705 #pragma mark ---Stackshot Tracing---
706
707 #if STACKSHOT_COLLECTS_DIAGNOSTICS
708 static void
stackshot_trace(int line_no,mach_vm_address_t data)709 stackshot_trace(int line_no, mach_vm_address_t data)
710 {
711 struct stackshot_trace_buffer *buffer = PERCPU_GET(stackshot_trace_buffer);
712 buffer->sstb_entries[buffer->sstb_tail_idx] = (struct stackshot_trace_entry) {
713 .sste_line_no = line_no,
714 .sste_timestamp = mach_continuous_time(),
715 .sste_data = data
716 };
717 buffer->sstb_tail_idx = (buffer->sstb_tail_idx + 1) % STACKSHOT_DEBUG_TRACEBUF_SIZE;
718 buffer->sstb_size = MIN(buffer->sstb_size + 1, STACKSHOT_DEBUG_TRACEBUF_SIZE);
719 }
720 #define STACKSHOT_TRACE(data) stackshot_trace(__LINE__, (mach_vm_address_t) (data))
721
722 #else /* STACKSHOT_COLLECTS_DIAGNOSTICS */
723 #define STACKSHOT_TRACE(data) ((void) data)
724 #endif /* !STACKSHOT_COLLECTS_DIAGNOSTICS */
725
726 #pragma mark ---Stackshot Buffer Management---
727
728 #define freelist_lock(buffer) while(!os_atomic_cmpxchg(&buffer->ssb_freelist_lock, 0, 1, acquire)) { loop_wait(); }
729 #define freelist_unlock(buffer) os_atomic_store(&buffer->ssb_freelist_lock, 0, release);
730
731 /**
732 * Allocates some data from the shared stackshot buffer freelist.
733 * This should not be used directly, it is a last resort if we run out of space.
734 */
735 static void *
stackshot_freelist_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)736 stackshot_freelist_alloc(
737 size_t size,
738 struct stackshot_buffer *buffer,
739 kern_return_t *error)
740 {
741 struct freelist_entry **cur_freelist, **best_freelist = NULL, *ret = NULL;
742
743 freelist_lock(buffer);
744
745 cur_freelist = &buffer->ssb_freelist;
746
747 while (*cur_freelist != NULL) {
748 if (((*cur_freelist)->fl_size >= size) && ((best_freelist == NULL) || ((*best_freelist)->fl_size > (*cur_freelist)->fl_size))) {
749 best_freelist = cur_freelist;
750 if ((*best_freelist)->fl_size == size) {
751 break;
752 }
753 }
754 cur_freelist = &((*cur_freelist)->fl_next);
755 }
756
757 /* If we found a freelist entry, update the freelist */
758 if (best_freelist != NULL) {
759 os_atomic_sub(&buffer->ssb_overhead, size, relaxed);
760 ret = *best_freelist;
761
762 /* If there's enough unused space at the end of this entry, we should make a new one */
763 if (((*best_freelist)->fl_size - size) > sizeof(struct freelist_entry)) {
764 struct freelist_entry *new_freelist = (struct freelist_entry*) ((mach_vm_address_t) *best_freelist + size);
765 *new_freelist = (struct freelist_entry) {
766 .fl_next = (*best_freelist)->fl_next,
767 .fl_size = (*best_freelist)->fl_size - size
768 };
769 (*best_freelist)->fl_next = new_freelist;
770 }
771
772 /* Update previous entry with next or new entry */
773 *best_freelist = (*best_freelist)->fl_next;
774 }
775
776 freelist_unlock(buffer);
777
778 if (error != NULL) {
779 if (ret == NULL) {
780 *error = KERN_INSUFFICIENT_BUFFER_SIZE;
781 } else {
782 *error = KERN_SUCCESS;
783 }
784 }
785
786 return ret;
787 }
788
789 /**
790 * Allocates some data from the shared stackshot buffer.
791 * Should not be used directly - see the `stackshot_alloc` and
792 * `stackshot_alloc_arr` macros.
793 */
794 static void *
stackshot_buffer_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)795 stackshot_buffer_alloc(
796 size_t size,
797 struct stackshot_buffer *buffer,
798 kern_return_t *error)
799 {
800 size_t o_used, new_used;
801
802 stackshot_panic_guard();
803 assert(!stackshot_ctx.sc_is_singlethreaded);
804
805 os_atomic_rmw_loop(&buffer->ssb_used, o_used, new_used, relaxed, {
806 new_used = o_used + size;
807 if (new_used > buffer->ssb_size) {
808 os_atomic_rmw_loop_give_up(return stackshot_freelist_alloc(size, buffer, error));
809 }
810 });
811
812 if (error != NULL) {
813 *error = KERN_SUCCESS;
814 }
815
816 return (void*) ((mach_vm_address_t) buffer->ssb_ptr + o_used);
817 }
818
819 /**
820 * Finds the best stackshot buffer to use (prefer our cluster's buffer)
821 * and allocates from it.
822 * Should not be used directly - see the `stackshot_alloc` and
823 * `stackshot_alloc_arr` macros.
824 */
825 __result_use_check
826 static void *
stackshot_best_buffer_alloc(size_t size,kern_return_t * error)827 stackshot_best_buffer_alloc(size_t size, kern_return_t *error)
828 {
829 #if defined(__AMP__)
830 kern_return_t err;
831 int my_cluster;
832 void *ret = NULL;
833 #endif /* __AMP__ */
834
835 #if STACKSHOT_COLLECTS_LATENCY_INFO
836 stackshot_cpu_latency.total_buf += size;
837 #endif
838
839 #if defined(__AMP__)
840 /* First, try our cluster's buffer */
841 my_cluster = cpu_cluster_id();
842 ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[my_cluster], &err);
843
844 /* Try other buffers now. */
845 if (err != KERN_SUCCESS) {
846 for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
847 if (buf_idx == my_cluster) {
848 continue;
849 }
850
851 ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[buf_idx], &err);
852 if (err == KERN_SUCCESS) {
853 #if STACKSHOT_COLLECTS_LATENCY_INFO
854 stackshot_cpu_latency.intercluster_buf_used += size;
855 #endif
856 break;
857 }
858 }
859 }
860
861 if (error != NULL) {
862 *error = err;
863 }
864
865 return ret;
866 #else /* __AMP__ */
867 return stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[0], error);
868 #endif /* !__AMP__ */
869 }
870
871 /**
872 * Frees some data from the shared stackshot buffer and adds it to the freelist.
873 */
874 static void
stackshot_buffer_free(void * ptr,struct stackshot_buffer * buffer,size_t size)875 stackshot_buffer_free(
876 void *ptr,
877 struct stackshot_buffer *buffer,
878 size_t size)
879 {
880 stackshot_panic_guard();
881
882 /* This should never be called during a singlethreaded stackshot. */
883 assert(!stackshot_ctx.sc_is_singlethreaded);
884
885 os_atomic_add(&buffer->ssb_overhead, size, relaxed);
886
887 /* Make sure we have enough space for the freelist entry */
888 if (size < sizeof(struct freelist_entry)) {
889 return;
890 }
891
892 freelist_lock(buffer);
893
894 /* Create new freelist entry and push it to the front of the list */
895 *((struct freelist_entry*) ptr) = (struct freelist_entry) {
896 .fl_size = size,
897 .fl_next = buffer->ssb_freelist
898 };
899 buffer->ssb_freelist = ptr;
900
901 freelist_unlock(buffer);
902 }
903
904 /**
905 * Allocates some data from the stackshot buffer. Uses the bump allocator in
906 * multithreaded mode and endalloc in singlethreaded.
907 * err must ALWAYS be nonnull.
908 * Should not be used directly - see the macros in kern_stackshot.h.
909 */
910 void *
stackshot_alloc_with_size(size_t size,kern_return_t * err)911 stackshot_alloc_with_size(size_t size, kern_return_t *err)
912 {
913 void *ptr;
914 assert(err != NULL);
915 assert(stackshot_active());
916
917 stackshot_panic_guard();
918
919 if (stackshot_ctx.sc_is_singlethreaded) {
920 ptr = kcdata_endalloc(stackshot_kcdata_p, size);
921 if (ptr == NULL) {
922 *err = KERN_INSUFFICIENT_BUFFER_SIZE;
923 }
924 } else {
925 ptr = stackshot_best_buffer_alloc(size, err);
926 if (ptr == NULL) {
927 /* We should always return an error if we return a null ptr */
928 assert3u(*err, !=, KERN_SUCCESS);
929 }
930 }
931
932 return ptr;
933 }
934
935 /**
936 * Initializes a new kcdata buffer somewhere in a linked kcdata list.
937 * Allocates a buffer for the kcdata from the shared stackshot buffer.
938 *
939 * See `linked_kcdata_alloc_callback` for the implementation details of
940 * linked kcdata for stackshot.
941 */
942 __result_use_check
943 static kern_return_t
linked_kcdata_init(linked_kcdata_descriptor_t descriptor,size_t min_size,unsigned int data_type,unsigned int flags)944 linked_kcdata_init(
945 linked_kcdata_descriptor_t descriptor,
946 size_t min_size,
947 unsigned int data_type,
948 unsigned int flags)
949 {
950 void *buf_ptr;
951 kern_return_t error;
952 size_t buf_size = MAX(min_size, stackshot_ctx.sc_min_kcdata_size);
953
954 buf_ptr = stackshot_alloc_arr(uint8_t, buf_size, &error);
955 if (error != KERN_SUCCESS) {
956 return error;
957 }
958
959 error = kcdata_memory_static_init(&descriptor->kcdata, (mach_vm_address_t) buf_ptr, data_type, buf_size, flags);
960 if (error != KERN_SUCCESS) {
961 return error;
962 }
963
964 descriptor->kcdata.kcd_alloc_callback = linked_kcdata_alloc_callback;
965
966 return KERN_SUCCESS;
967 }
968
969 static void
stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)970 stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)
971 {
972 /*
973 * If we have free space at the end of the kcdata, we can add it to the
974 * freelist. We always add to *our* cluster's freelist, no matter where
975 * the data was originally allocated.
976 *
977 * Important Note: We do not use kcdata_memory_get_used_bytes here because
978 * that includes extra space for the end tag (which we do not care about).
979 */
980 int buffer;
981 size_t used_size = descriptor->kcd_addr_end - descriptor->kcd_addr_begin;
982 size_t free_size = (descriptor->kcd_length - used_size);
983 if (free_size > 0) {
984 #if defined(__arm64__)
985 buffer = cpu_cluster_id();
986 #else /* __arm64__ */
987 buffer = 0;
988 #endif /* !__arm64__ */
989 stackshot_buffer_free((void*) descriptor->kcd_addr_end, &stackshot_ctx.sc_buffers[buffer], free_size);
990 descriptor->kcd_length = used_size;
991 }
992 }
993
994 /**
995 * The callback for linked kcdata, which is called when one of the kcdata
996 * buffers runs out of space. This allocates a new kcdata descriptor &
997 * buffer in the linked list and sets it up.
998 *
999 * When kcdata calls this callback, it takes the returned descriptor
1000 * and copies it to its own descriptor (which will be the per-cpu kcdata
1001 * descriptor, in the case of stackshot).
1002 *
1003 * --- Stackshot linked kcdata details ---
1004 * The way stackshot allocates kcdata buffers (in a non-panic context) is via
1005 * a basic bump allocator (see `stackshot_buffer_alloc`) and a linked list of
1006 * kcdata structures. The kcdata are allocated with a reasonable size based on
1007 * some system heuristics (or more if whatever is being pushed into the buffer
1008 * is larger). When the current kcdata buffer runs out of space, it calls this
1009 * callback, which allocates a new linked kcdata object at the tail of the
1010 * current list.
1011 *
1012 * The per-cpu `stackshot_kcdata_p` descriptor is the "tail" of the list, but
1013 * is not actually part of the linked list (this simplified implementation,
1014 * since it didn't require changing every kcdata call & a bunch of
1015 * kcdata code, since the current in-use descriptor is always in the same place
1016 * this way). When it is filled up and this callback is called, the
1017 * `stackshot_kcdata_p` descriptor is copied to the *actual* tail of the list
1018 * (in stackshot_cpu_ctx.scc_kcdata_tail), and a new linked kcdata struct is
1019 * allocated at the tail.
1020 */
1021 static kcdata_descriptor_t
linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor,size_t min_size)1022 linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size)
1023 {
1024 kern_return_t error;
1025 linked_kcdata_descriptor_t new_kcdata = NULL;
1026
1027 /* This callback should ALWAYS be coming from our per-cpu kcdata. If not, something has gone horribly wrong.*/
1028 stackshot_panic_guard();
1029 assert(descriptor == stackshot_kcdata_p);
1030
1031 /* Free the unused space in the buffer and copy it to the tail of the linked kcdata list. */
1032 stackshot_kcdata_free_unused(descriptor);
1033 stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *descriptor;
1034
1035 /* Allocate another linked_kcdata and initialize it. */
1036 new_kcdata = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1037 if (error != KERN_SUCCESS) {
1038 return NULL;
1039 }
1040
1041 /* It doesn't matter what we mark the data type as - we're throwing it away when weave the data together anyway. */
1042 error = linked_kcdata_init(new_kcdata, min_size, KCDATA_BUFFER_BEGIN_STACKSHOT, descriptor->kcd_flags);
1043 if (error != KERN_SUCCESS) {
1044 return NULL;
1045 }
1046
1047 bzero(descriptor, sizeof(struct kcdata_descriptor));
1048 stackshot_cpu_ctx.scc_kcdata_tail->next = new_kcdata;
1049 stackshot_cpu_ctx.scc_kcdata_tail = new_kcdata;
1050
1051 return &new_kcdata->kcdata;
1052 }
1053
1054 /**
1055 * Allocates a new linked kcdata list for the current CPU and sets it up.
1056 * If there was a previous linked kcdata descriptor, you should call
1057 * `stackshot_finalize_linked_kcdata` first, or otherwise save it somewhere.
1058 */
1059 __result_use_check
1060 static kern_return_t
stackshot_new_linked_kcdata(void)1061 stackshot_new_linked_kcdata(void)
1062 {
1063 kern_return_t error;
1064
1065 stackshot_panic_guard();
1066 assert(!stackshot_ctx.sc_panic_stackshot);
1067
1068 stackshot_cpu_ctx.scc_kcdata_head = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1069 if (error != KERN_SUCCESS) {
1070 return error;
1071 }
1072
1073 kcd_exit_on_error(linked_kcdata_init(stackshot_cpu_ctx.scc_kcdata_head, 0,
1074 KCDATA_BUFFER_BEGIN_STACKSHOT,
1075 KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER | KCFLAG_ALLOC_CALLBACK));
1076
1077 stackshot_cpu_ctx.scc_kcdata_tail = stackshot_cpu_ctx.scc_kcdata_head;
1078 *stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_head->kcdata;
1079
1080 error_exit:
1081 return error;
1082 }
1083
1084 /**
1085 * Finalizes the current linked kcdata structure for the CPU by updating the
1086 * tail of the list with the per-cpu kcdata descriptor.
1087 */
1088 static void
stackshot_finalize_linked_kcdata(void)1089 stackshot_finalize_linked_kcdata(void)
1090 {
1091 stackshot_panic_guard();
1092 assert(!stackshot_ctx.sc_panic_stackshot);
1093 stackshot_kcdata_free_unused(stackshot_kcdata_p);
1094 if (stackshot_cpu_ctx.scc_kcdata_tail != NULL) {
1095 stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *stackshot_kcdata_p;
1096 }
1097 *stackshot_kcdata_p = (struct kcdata_descriptor){};
1098 }
1099
1100 /*
1101 * Initialize the mutex governing access to the stack snapshot subsystem
1102 * and other stackshot related bits.
1103 */
1104 __private_extern__ void
stackshot_init(void)1105 stackshot_init(void)
1106 {
1107 mach_timebase_info_data_t timebase;
1108
1109 clock_timebase_info(&timebase);
1110 stackshot_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
1111
1112 max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
1113
1114 PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
1115 }
1116
1117 /*
1118 * Called with interrupts disabled after stackshot context has been
1119 * initialized.
1120 */
1121 static kern_return_t
stackshot_trap(void)1122 stackshot_trap(void)
1123 {
1124 kern_return_t rv;
1125
1126 #if defined(__x86_64__)
1127 /*
1128 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
1129 * operation, it's essential to apply mutual exclusion to the other when one
1130 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
1131 * capture CPUs.
1132 *
1133 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
1134 * allowed, so we check to ensure there there is no rendezvous in progress before
1135 * trying to grab the lock (if there is, a deadlock will occur when we try to
1136 * grab the lock). This is accomplished by setting cpu_rendezvous_in_progress to
1137 * TRUE in the mp rendezvous action function. If stackshot_trap() is called by
1138 * a subordinate of the call chain within the mp rendezvous action, this flag will
1139 * be set and can be used to detect the inevitable deadlock that would occur
1140 * if this thread tried to grab the rendezvous lock.
1141 */
1142
1143 if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
1144 panic("Calling stackshot from a rendezvous is not allowed!");
1145 }
1146
1147 mp_rendezvous_lock();
1148 #endif
1149
1150 stackshot_stats.last_abs_start = mach_absolute_time();
1151 stackshot_stats.last_abs_end = 0;
1152
1153 rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0, NULL);
1154
1155 stackshot_stats.last_abs_end = mach_absolute_time();
1156 stackshot_stats.stackshots_taken++;
1157 stackshot_stats.stackshots_duration += (stackshot_stats.last_abs_end - stackshot_stats.last_abs_start);
1158
1159 #if defined(__x86_64__)
1160 mp_rendezvous_unlock();
1161 #endif
1162 return rv;
1163 }
1164
1165 extern void stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration);
1166 void
stackshot_get_timing(uint64_t * last_abs_start,uint64_t * last_abs_end,uint64_t * count,uint64_t * total_duration)1167 stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration)
1168 {
1169 STACKSHOT_SUBSYS_LOCK();
1170 *last_abs_start = stackshot_stats.last_abs_start;
1171 *last_abs_end = stackshot_stats.last_abs_end;
1172 *count = stackshot_stats.stackshots_taken;
1173 *total_duration = stackshot_stats.stackshots_duration;
1174 STACKSHOT_SUBSYS_UNLOCK();
1175 }
1176
1177 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)1178 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
1179 {
1180 kern_return_t error = KERN_SUCCESS;
1181 boolean_t istate;
1182 struct kdp_snapshot_args args;
1183
1184 args = (struct kdp_snapshot_args) {
1185 .pid = pid,
1186 .buffer = buf,
1187 .buffer_size = size,
1188 .flags = flags,
1189 .since_timestamp = delta_since_timestamp,
1190 .pagetable_mask = pagetable_mask
1191 };
1192
1193 #if DEVELOPMENT || DEBUG
1194 if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
1195 return KERN_NOT_SUPPORTED;
1196 }
1197 #endif
1198 if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
1199 return KERN_INVALID_ARGUMENT;
1200 }
1201
1202 /* zero caller's buffer to match KMA_ZERO in other path */
1203 bzero(buf, size);
1204
1205 /* cap in individual stackshot to max_tracebuf_size */
1206 if (size > max_tracebuf_size) {
1207 size = max_tracebuf_size;
1208 }
1209
1210 /* Serialize tracing */
1211 if (flags & STACKSHOT_TRYLOCK) {
1212 if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
1213 return KERN_LOCK_OWNED;
1214 }
1215 } else {
1216 STACKSHOT_SUBSYS_LOCK();
1217 }
1218
1219 #if CONFIG_EXCLAVES
1220 assert(!stackshot_exclave_inspect_ctids);
1221 #endif
1222
1223 stackshot_initial_estimate = 0;
1224 stackshot_duration_prior_abs = 0;
1225 stackshot_duration_outer = NULL;
1226
1227 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
1228 flags, size, pid, delta_since_timestamp);
1229
1230 /* Prepare the compressor for a stackshot */
1231 error = vm_compressor_kdp_init();
1232 if (error != KERN_SUCCESS) {
1233 return error;
1234 }
1235
1236 #if STACKSHOT_COLLECTS_RDAR_126582377_DATA
1237 // Opportunistically collect reports of the rdar://126582377 failure.
1238 // If the allocation doesn't succeed, or if another CPU "steals" the
1239 // allocated event first, that is acceptable.
1240 ca_event_t new_event = CA_EVENT_ALLOCATE_FLAGS(bad_stackshot_upper16, Z_NOWAIT);
1241 if (new_event) {
1242 if (os_atomic_cmpxchg(&rdar_126582377_event, NULL, new_event, relaxed) == 0) {
1243 // Already set up, so free it
1244 CA_EVENT_DEALLOCATE(new_event);
1245 }
1246 }
1247 #endif
1248
1249 istate = ml_set_interrupts_enabled(FALSE);
1250 uint64_t time_start = mach_absolute_time();
1251
1252 /* Emit a SOCD tracepoint that we are initiating a stackshot */
1253 SOCD_TRACE_XNU_START(STACKSHOT);
1254
1255 /* Preload trace parameters*/
1256 error = kdp_snapshot_preflight_internal(args);
1257
1258 /*
1259 * Trap to the debugger to obtain a coherent stack snapshot; this populates
1260 * the trace buffer
1261 */
1262 if (error == KERN_SUCCESS) {
1263 error = stackshot_trap();
1264 }
1265
1266 uint64_t time_end = mach_absolute_time();
1267
1268 /* Emit a SOCD tracepoint that we have completed the stackshot */
1269 SOCD_TRACE_XNU_END(STACKSHOT);
1270
1271 ml_set_interrupts_enabled(istate);
1272
1273 #if CONFIG_EXCLAVES
1274 /* stackshot trap should only finish successfully or with no pending Exclave threads */
1275 assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
1276 #endif
1277
1278 /*
1279 * Stackshot is no longer active.
1280 * (We have to do this here for the special interrupt disable timeout case to work)
1281 */
1282 os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
1283
1284 /* Release kdp compressor buffers */
1285 vm_compressor_kdp_teardown();
1286
1287 /* Collect multithreaded kcdata into one finalized buffer */
1288 if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
1289 error = stackshot_collect_kcdata();
1290 }
1291
1292 #if CONFIG_EXCLAVES
1293 if (error == KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
1294 error = collect_exclave_threads(flags);
1295 }
1296 #endif /* CONFIG_EXCLAVES */
1297
1298 if (error == KERN_SUCCESS) {
1299 if (!stackshot_ctx.sc_is_singlethreaded) {
1300 error = stackshot_finalize_kcdata();
1301 } else {
1302 error = stackshot_finalize_singlethreaded_kcdata();
1303 }
1304 }
1305
1306 if (stackshot_duration_outer) {
1307 *stackshot_duration_outer = time_end - time_start;
1308 }
1309 *bytes_traced = kdp_stack_snapshot_bytes_traced();
1310
1311 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
1312 error, (time_end - time_start), size, *bytes_traced);
1313
1314 STACKSHOT_SUBSYS_UNLOCK();
1315 return error;
1316 }
1317
1318 #if CONFIG_TELEMETRY
1319 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)1320 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
1321 {
1322 int error = KERN_FAILURE;
1323 uint32_t bytes_traced = 0;
1324
1325 /*
1326 * "Flags" is actually treated as an enumeration, make sure only one value
1327 * is passed at a time.
1328 */
1329 bool set_mark = flags & STACKSHOT_SET_MICROSTACKSHOT_MARK;
1330 flags &= ~STACKSHOT_SET_MICROSTACKSHOT_MARK;
1331 if (__builtin_popcount(flags) != 1) {
1332 return KERN_INVALID_ARGUMENT;
1333 }
1334
1335 /*
1336 * Ensure that there's space to copyout to.
1337 */
1338 if (tracebuf == USER_ADDR_NULL || tracebuf_size == 0) {
1339 return KERN_INVALID_ARGUMENT;
1340 }
1341
1342 STACKSHOT_SUBSYS_LOCK();
1343
1344 switch (flags) {
1345 case STACKSHOT_GET_KERNEL_MICROSTACKSHOT:
1346 /*
1347 * Kernel samples consume from their buffer, so using a mark is the only
1348 * allowed option.
1349 */
1350 if (!set_mark) {
1351 error = KERN_INVALID_ARGUMENT;
1352 break;
1353 }
1354 bytes_traced = tracebuf_size;
1355 error = telemetry_kernel_gather(tracebuf, &bytes_traced);
1356 *retval = (int)bytes_traced;
1357 break;
1358 case STACKSHOT_GET_MICROSTACKSHOT: {
1359 if (tracebuf_size > max_tracebuf_size) {
1360 error = KERN_INVALID_ARGUMENT;
1361 break;
1362 }
1363
1364 bytes_traced = tracebuf_size;
1365 error = telemetry_gather(tracebuf, &bytes_traced, set_mark);
1366 *retval = (int)bytes_traced;
1367 break;
1368 }
1369 default:
1370 error = KERN_NOT_SUPPORTED;
1371 break;
1372 }
1373
1374 STACKSHOT_SUBSYS_UNLOCK();
1375 return error;
1376 }
1377 #endif /* CONFIG_TELEMETRY */
1378
1379 /**
1380 * Grabs the next work item from the stackshot work queue.
1381 */
1382 static struct stackshot_workitem *
stackshot_get_workitem(struct stackshot_workqueue * queue)1383 stackshot_get_workitem(struct stackshot_workqueue *queue)
1384 {
1385 uint32_t old_count, new_count;
1386
1387 /* note: this relies on give_up not performing the write, just bailing out immediately */
1388 os_atomic_rmw_loop(&queue->sswq_cur_item, old_count, new_count, acq_rel, {
1389 if (old_count >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1390 os_atomic_rmw_loop_give_up(return NULL);
1391 }
1392 new_count = old_count + 1;
1393 });
1394
1395 return &queue->sswq_items[old_count];
1396 };
1397
1398 /**
1399 * Puts an item on the appropriate stackshot work queue.
1400 * We don't need the lock for this, but only because it's
1401 * only called by one writer..
1402 *
1403 * @returns
1404 * true if the item fit in the queue, false if not.
1405 */
1406 static kern_return_t
stackshot_put_workitem(struct stackshot_workitem item)1407 stackshot_put_workitem(struct stackshot_workitem item)
1408 {
1409 struct stackshot_workqueue *queue;
1410
1411 /* Put in higher queue if task has more threads, with highest queue having >= STACKSHOT_HARDEST_THREADCOUNT threads */
1412 size_t queue_idx = ((item.sswi_task->thread_count * (STACKSHOT_NUM_WORKQUEUES - 1)) / STACKSHOT_HARDEST_THREADCOUNT);
1413 queue_idx = MIN(queue_idx, STACKSHOT_NUM_WORKQUEUES - 1);
1414
1415 queue = &stackshot_ctx.sc_workqueues[queue_idx];
1416
1417 size_t num_items = os_atomic_load(&queue->sswq_num_items, relaxed);
1418
1419 if (num_items >= queue->sswq_capacity) {
1420 return KERN_INSUFFICIENT_BUFFER_SIZE;
1421 }
1422
1423 queue->sswq_items[num_items] = item;
1424 os_atomic_inc(&queue->sswq_num_items, release);
1425
1426 return KERN_SUCCESS;
1427 }
1428
1429 #define calc_num_linked_kcdata_frames(size, kcdata_size) (1 + ((size) - 1) / (kcdata_size))
1430 #define calc_linked_kcdata_size(size, kcdata_size) (calc_num_linked_kcdata_frames((size), (kcdata_size)) * ((kcdata_size) + sizeof(struct linked_kcdata_descriptor)))
1431
1432 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
1433 #define TASK_SHARED_CACHE_AVG_SIZE (128) /* Average space consumed by task shared cache info */
1434 #define sizeof_if_traceflag(a, flag) (((trace_flags & (flag)) != 0) ? sizeof(a) : 0)
1435
1436 #define FUDGED_SIZE(size, adj) (((size) * ((adj) + 100)) / 100)
1437
1438 /*
1439 * Return the estimated size of a single task (including threads)
1440 * in a stackshot with the given flags.
1441 */
1442 static uint32_t
get_stackshot_est_tasksize(uint64_t trace_flags)1443 get_stackshot_est_tasksize(uint64_t trace_flags)
1444 {
1445 size_t total_size;
1446 size_t threads_per_task = (((threads_count + terminated_threads_count) - 1) / (tasks_count + terminated_tasks_count)) + 1;
1447 size_t est_thread_size = sizeof(struct thread_snapshot_v4) + 42 * sizeof(uintptr_t);
1448 size_t est_task_size = sizeof(struct task_snapshot_v2) +
1449 TASK_UUID_AVG_SIZE +
1450 TASK_SHARED_CACHE_AVG_SIZE +
1451 sizeof_if_traceflag(struct io_stats_snapshot, STACKSHOT_INSTRS_CYCLES) +
1452 sizeof_if_traceflag(uint32_t, STACKSHOT_ASID) +
1453 sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ, STACKSHOT_PAGE_TABLES) +
1454 sizeof_if_traceflag(struct instrs_cycles_snapshot_v2, STACKSHOT_INSTRS_CYCLES) +
1455 sizeof(struct stackshot_cpu_architecture) +
1456 sizeof(struct stackshot_task_codesigning_info);
1457
1458 #if STACKSHOT_COLLECTS_LATENCY_INFO
1459 if (collect_latency_info) {
1460 est_thread_size += sizeof(struct stackshot_latency_thread);
1461 est_task_size += sizeof(struct stackshot_latency_task);
1462 }
1463 #endif
1464
1465 total_size = est_task_size + threads_per_task * est_thread_size;
1466
1467 return total_size;
1468 }
1469
1470 /*
1471 * Return the estimated size of a stackshot based on the
1472 * number of currently running threads and tasks.
1473 *
1474 * adj is an adjustment in units of percentage
1475 */
1476 static uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj,uint64_t trace_flags,pid_t target_pid)1477 get_stackshot_estsize(
1478 uint32_t prev_size_hint,
1479 uint32_t adj,
1480 uint64_t trace_flags,
1481 pid_t target_pid)
1482 {
1483 vm_size_t thread_and_task_total;
1484 uint64_t size;
1485 uint32_t estimated_size;
1486 bool process_scoped = ((target_pid != -1) && ((trace_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0));
1487
1488 /*
1489 * We use the estimated task size (with a fudge factor) as the default
1490 * linked kcdata buffer size in an effort to reduce overhead (ideally, we want
1491 * each task to only need a single kcdata buffer.)
1492 */
1493 uint32_t est_task_size = get_stackshot_est_tasksize(trace_flags);
1494 uint32_t est_kcdata_size = FUDGED_SIZE(est_task_size, adj);
1495 uint64_t est_preamble_size = calc_linked_kcdata_size(8192 * 4, est_kcdata_size);
1496 uint64_t est_postamble_size = calc_linked_kcdata_size(8192 * 2, est_kcdata_size);
1497 uint64_t est_extra_size = 0;
1498
1499 adj = MIN(adj, 100u); /* no more than double our estimate */
1500
1501 #if STACKSHOT_COLLECTS_LATENCY_INFO
1502 est_extra_size += real_ncpus * sizeof(struct stackshot_latency_cpu);
1503 est_extra_size += sizeof(struct stackshot_latency_collection_v2);
1504 #endif
1505
1506 est_extra_size += real_ncpus * MAX_FRAMES * sizeof(uintptr_t); /* Stacktrace buffers */
1507 est_extra_size += FUDGED_SIZE(tasks_count, 10) * sizeof(uintptr_t) * STACKSHOT_NUM_WORKQUEUES; /* Work queues */
1508 est_extra_size += sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ * real_ncpus, STACKSHOT_PAGE_TABLES);
1509
1510 thread_and_task_total = calc_linked_kcdata_size(est_task_size, est_kcdata_size);
1511 if (!process_scoped) {
1512 thread_and_task_total *= tasks_count;
1513 }
1514 size = thread_and_task_total + est_preamble_size + est_postamble_size + est_extra_size; /* estimate */
1515 size = FUDGED_SIZE(size, adj); /* add adj */
1516 size = MAX(size, prev_size_hint); /* allow hint to increase */
1517 size += stackshot_plh_est_size(); /* add space for the port label hash */
1518 size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK)); /* avoid overflow */
1519 estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
1520
1521 return estimated_size;
1522 }
1523
1524 /**
1525 * Copies a linked list of kcdata structures into a final kcdata structure.
1526 * Only used from stackshot_finalize_kcdata.
1527 */
1528 __result_use_check
1529 static kern_return_t
stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata,linked_kcdata_descriptor_t linked_kcdata)1530 stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata, linked_kcdata_descriptor_t linked_kcdata)
1531 {
1532 kern_return_t error = KERN_SUCCESS;
1533
1534 while (linked_kcdata) {
1535 /* Walk linked kcdata list */
1536 kcdata_descriptor_t cur_kcdata = &linked_kcdata->kcdata;
1537 if ((cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin) == 0) {
1538 linked_kcdata = linked_kcdata->next;
1539 continue;
1540 }
1541
1542 /* Every item in the linked kcdata should have a header tag of type KCDATA_BUFFER_BEGIN_STACKSHOT. */
1543 assert(((struct kcdata_item*) cur_kcdata->kcd_addr_begin)->type == KCDATA_BUFFER_BEGIN_STACKSHOT);
1544 assert((final_kcdata->kcd_addr_begin + final_kcdata->kcd_length) > final_kcdata->kcd_addr_end);
1545 size_t header_size = sizeof(kcdata_item_t) + kcdata_calc_padding(sizeof(kcdata_item_t));
1546 size_t size = cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin - header_size;
1547 size_t free = (final_kcdata->kcd_length + final_kcdata->kcd_addr_begin) - final_kcdata->kcd_addr_end;
1548 if (free < size) {
1549 error = KERN_INSUFFICIENT_BUFFER_SIZE;
1550 goto error_exit;
1551 }
1552
1553 /* Just memcpy the data over (and compress if we need to.) */
1554 kcdata_compression_window_open(final_kcdata);
1555 error = kcdata_memcpy(final_kcdata, final_kcdata->kcd_addr_end, (void*) (cur_kcdata->kcd_addr_begin + header_size), size);
1556 if (error != KERN_SUCCESS) {
1557 goto error_exit;
1558 }
1559 final_kcdata->kcd_addr_end += size;
1560 kcdata_compression_window_close(final_kcdata);
1561
1562 linked_kcdata = linked_kcdata->next;
1563 }
1564
1565 error_exit:
1566 return error;
1567 }
1568
1569 /**
1570 * Copies the duration, latency, and diagnostic info into a final kcdata buffer.
1571 * Only used by stackshot_finalize_kcdata and stackshot_finalize_singlethreaded_kcdata.
1572 */
1573 __result_use_check
1574 static kern_return_t
stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)1575 stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)
1576 {
1577 kern_return_t error;
1578 mach_vm_address_t out_addr;
1579 bool use_fault_path = ((stackshot_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
1580 #if STACKSHOT_COLLECTS_LATENCY_INFO
1581 size_t buffer_used = 0;
1582 size_t buffer_overhead = 0;
1583 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1584
1585 if (use_fault_path) {
1586 struct stackshot_fault_stats stats = (struct stackshot_fault_stats) {
1587 .sfs_pages_faulted_in = 0,
1588 .sfs_time_spent_faulting = 0,
1589 .sfs_system_max_fault_time = stackshot_max_fault_time,
1590 .sfs_stopped_faulting = false
1591 };
1592 percpu_foreach_base(base) {
1593 struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
1594 if (!cpu_ctx->scc_did_work) {
1595 continue;
1596 }
1597 stats.sfs_pages_faulted_in += cpu_ctx->scc_fault_stats.sfs_pages_faulted_in;
1598 stats.sfs_time_spent_faulting += cpu_ctx->scc_fault_stats.sfs_time_spent_faulting;
1599 stats.sfs_stopped_faulting = stats.sfs_stopped_faulting || cpu_ctx->scc_fault_stats.sfs_stopped_faulting;
1600 }
1601 kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
1602 sizeof(struct stackshot_fault_stats), &stats);
1603 }
1604
1605 #if STACKSHOT_COLLECTS_LATENCY_INFO
1606 int num_working_cpus = 0;
1607 if (collect_latency_info) {
1608 /* Add per-CPU latency info */
1609 percpu_foreach(cpu_ctx, stackshot_cpu_ctx_percpu) {
1610 if (cpu_ctx->scc_did_work) {
1611 num_working_cpus++;
1612 }
1613 }
1614 kcdata_compression_window_open(kcdata);
1615 kcd_exit_on_error(kcdata_get_memory_addr_for_array(
1616 kcdata, STACKSHOT_KCTYPE_LATENCY_INFO_CPU, sizeof(struct stackshot_latency_cpu), num_working_cpus, &out_addr));
1617 percpu_foreach_base(base) {
1618 if (PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu)->scc_did_work) {
1619 kcdata_memcpy(kcdata, out_addr, PERCPU_GET_WITH_BASE(base, stackshot_cpu_latency_percpu),
1620 sizeof(struct stackshot_latency_cpu));
1621 out_addr += sizeof(struct stackshot_latency_cpu);
1622 }
1623 }
1624 kcd_exit_on_error(kcdata_compression_window_close(kcdata));
1625
1626 /* Add up buffer info */
1627 for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
1628 struct stackshot_buffer *buf = &stackshot_ctx.sc_buffers[buf_idx];
1629 buffer_used += os_atomic_load(&buf->ssb_used, relaxed);
1630 buffer_overhead += os_atomic_load(&buf->ssb_overhead, relaxed);
1631 }
1632 stackshot_ctx.sc_latency.buffer_size = stackshot_ctx.sc_args.buffer_size;
1633 stackshot_ctx.sc_latency.buffer_overhead = buffer_overhead;
1634 stackshot_ctx.sc_latency.buffer_used = buffer_used;
1635 stackshot_ctx.sc_latency.buffer_count = stackshot_ctx.sc_num_buffers;
1636
1637 /* Add overall latency info */
1638 kcd_exit_on_error(kcdata_push_data(
1639 kcdata, STACKSHOT_KCTYPE_LATENCY_INFO,
1640 sizeof(stackshot_ctx.sc_latency), &stackshot_ctx.sc_latency));
1641 }
1642 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1643
1644 if ((stackshot_flags & STACKSHOT_DO_COMPRESS) == 0) {
1645 assert(!stackshot_ctx.sc_panic_stackshot);
1646 kcd_exit_on_error(kcdata_get_memory_addr(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
1647 sizeof(struct stackshot_duration_v2), &out_addr));
1648 struct stackshot_duration_v2 *duration_p = (void *) out_addr;
1649 memcpy(duration_p, &stackshot_ctx.sc_duration, sizeof(*duration_p));
1650 stackshot_duration_outer = (unaligned_u64 *) &duration_p->stackshot_duration_outer;
1651 kcd_exit_on_error(kcdata_add_uint64_with_description(kcdata, stackshot_tries, "stackshot_tries"));
1652 } else {
1653 kcd_exit_on_error(kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_ctx.sc_duration), &stackshot_ctx.sc_duration));
1654 stackshot_duration_outer = NULL;
1655 }
1656
1657 error_exit:
1658 return error;
1659 }
1660
1661 /**
1662 * Allocates the final kcdata buffer for a mulitithreaded stackshot,
1663 * where all of the per-task kcdata (and exclave kcdata) will end up.
1664 */
1665 __result_use_check
1666 static kern_return_t
stackshot_alloc_final_kcdata(void)1667 stackshot_alloc_final_kcdata(void)
1668 {
1669 vm_offset_t final_kcdata_buffer = 0;
1670 kern_return_t error = KERN_SUCCESS;
1671 uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1672 : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
1673 : KCDATA_BUFFER_BEGIN_STACKSHOT;
1674
1675 if (stackshot_ctx.sc_is_singlethreaded) {
1676 return KERN_SUCCESS;
1677 }
1678
1679 if ((error = kmem_alloc(kernel_map, &final_kcdata_buffer, stackshot_args.buffer_size,
1680 KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
1681 os_log_error(OS_LOG_DEFAULT, "stackshot: final allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, stackshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
1682 return KERN_RESOURCE_SHORTAGE;
1683 }
1684
1685 stackshot_ctx.sc_finalized_kcdata = kcdata_memory_alloc_init(final_kcdata_buffer, hdr_tag,
1686 stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
1687
1688 if (stackshot_ctx.sc_finalized_kcdata == NULL) {
1689 kmem_free(kernel_map, final_kcdata_buffer, stackshot_args.buffer_size);
1690 return KERN_FAILURE;
1691 }
1692
1693 return KERN_SUCCESS;
1694 }
1695
1696 /**
1697 * Frees the final kcdata buffer.
1698 */
1699 static void
stackshot_free_final_kcdata(void)1700 stackshot_free_final_kcdata(void)
1701 {
1702 if (stackshot_ctx.sc_is_singlethreaded || (stackshot_ctx.sc_finalized_kcdata == NULL)) {
1703 return;
1704 }
1705
1706 kmem_free(kernel_map, stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1707 kcdata_memory_destroy(stackshot_ctx.sc_finalized_kcdata);
1708 stackshot_ctx.sc_finalized_kcdata = NULL;
1709 }
1710
1711 /**
1712 * Called once we exit the debugger trap to collate all of the separate linked
1713 * kcdata lists into one kcdata buffer. The calling thread will run this, and
1714 * it is guaranteed that nobody else is touching any stackshot state at this
1715 * point. In the case of a panic stackshot, this is never called since we only
1716 * use one thread.
1717 *
1718 * Called with interrupts enabled, stackshot subsys lock held.
1719 */
1720 __result_use_check
1721 static kern_return_t
stackshot_collect_kcdata(void)1722 stackshot_collect_kcdata(void)
1723 {
1724 kern_return_t error = 0;
1725 uint32_t hdr_tag;
1726
1727 assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1728 LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1729
1730 /* Allocate our final kcdata buffer. */
1731 kcd_exit_on_error(stackshot_alloc_final_kcdata());
1732 assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1733
1734 /* Setup compression if we need it. */
1735 if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
1736 hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1737 : KCDATA_BUFFER_BEGIN_STACKSHOT;
1738 kcd_exit_on_error(kcdata_init_compress(stackshot_ctx.sc_finalized_kcdata, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
1739 }
1740
1741 /* Copy over all of the pre task-iteration kcdata (to preserve order as if it were single-threaded) */
1742 kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_pretask_kcdata));
1743
1744 /* Set each queue's cur_item to 0. */
1745 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1746 os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_cur_item, 0, relaxed);
1747 }
1748
1749 /*
1750 * Iterate over work queue(s) and copy the kcdata in.
1751 */
1752 while (true) {
1753 struct stackshot_workitem *next_item = NULL;
1754 struct stackshot_workqueue *next_queue = NULL;
1755 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1756 struct stackshot_workqueue *queue = &stackshot_ctx.sc_workqueues[i];
1757 size_t cur_item = os_atomic_load(&queue->sswq_cur_item, relaxed);
1758
1759 /* Check if we're done with this queue */
1760 if (cur_item >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1761 continue;
1762 }
1763
1764 /* Check if this workitem should come next */
1765 struct stackshot_workitem *item = &queue->sswq_items[cur_item];
1766 if ((next_item == NULL) || (next_item->sswi_idx > item->sswi_idx)) {
1767 next_item = item;
1768 next_queue = queue;
1769 }
1770 }
1771
1772 /* Queues are empty. */
1773 if (next_item == NULL) {
1774 break;
1775 }
1776
1777 assert(next_queue);
1778 assert(next_item->sswi_data != NULL);
1779
1780 os_atomic_inc(&next_queue->sswq_cur_item, relaxed);
1781 kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, next_item->sswi_data));
1782 }
1783
1784 /* Write post-task kcdata */
1785 kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_posttask_kcdata));
1786 error_exit:
1787 if (error != KERN_SUCCESS) {
1788 stackshot_free_final_kcdata();
1789 }
1790 return error;
1791 }
1792
1793
1794 /**
1795 * Called at the very end of stackshot data generation, to write final timing
1796 * data to the kcdata structure and close compression. Only called for
1797 * multi-threaded stackshots; see stackshot_finalize_singlethreaded_kcata for
1798 * single-threaded variant.
1799 *
1800 * Called with interrupts enabled, stackshot subsys lock held.
1801 */
1802 __result_use_check
1803 static kern_return_t
stackshot_finalize_kcdata(void)1804 stackshot_finalize_kcdata(void)
1805 {
1806 kern_return_t error = 0;
1807
1808 assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1809 LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1810
1811 assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1812
1813 /* Write stackshot timing info */
1814 kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1815
1816 /* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1817 kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1818 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1819 kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1820
1821 stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1822 stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1823
1824 if (os_atomic_load(&stackshot_ctx.sc_retval, relaxed) == KERN_SUCCESS) {
1825 /* releases and zeros done */
1826 kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1827 }
1828
1829 memcpy(stackshot_args.buffer, (void*) stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1830
1831 /* Fix duration_outer offset */
1832 if (stackshot_duration_outer != NULL) {
1833 stackshot_duration_outer = (unaligned_u64*) ((mach_vm_address_t) stackshot_args.buffer + ((mach_vm_address_t) stackshot_duration_outer - stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin));
1834 }
1835
1836 error_exit:
1837 stackshot_free_final_kcdata();
1838 return error;
1839 }
1840
1841 /**
1842 * Finalizes the kcdata for a singlethreaded stackshot.
1843 *
1844 * May be called from interrupt/panic context.
1845 */
1846 __result_use_check
1847 static kern_return_t
stackshot_finalize_singlethreaded_kcdata(void)1848 stackshot_finalize_singlethreaded_kcdata(void)
1849 {
1850 kern_return_t error;
1851
1852 assert(stackshot_ctx.sc_is_singlethreaded);
1853
1854 kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1855 /* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1856 kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1857 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1858 kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1859
1860 stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1861 stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1862
1863 kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1864
1865 if (stackshot_ctx.sc_panic_stackshot) {
1866 *stackshot_args.descriptor = *stackshot_ctx.sc_finalized_kcdata;
1867 }
1868
1869 error_exit:
1870 return error;
1871 }
1872
1873 /*
1874 * stackshot_remap_buffer: Utility function to remap bytes_traced bytes starting at stackshotbuf
1875 * into the current task's user space and subsequently copy out the address
1876 * at which the buffer has been mapped in user space to out_buffer_addr.
1877 *
1878 * Inputs: stackshotbuf - pointer to the original buffer in the kernel's address space
1879 * bytes_traced - length of the buffer to remap starting from stackshotbuf
1880 * out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
1881 * out_size_addr - pointer to be filled in with the size of the buffer
1882 *
1883 * Outputs: ENOSPC if there is not enough free space in the task's address space to remap the buffer
1884 * EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
1885 * an error from copyout
1886 */
1887 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)1888 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
1889 {
1890 int error = 0;
1891 mach_vm_offset_t stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
1892 vm_prot_t cur_prot = VM_PROT_NONE, max_prot = VM_PROT_NONE;
1893
1894 error = mach_vm_remap(current_map(), &stackshotbuf_user_addr, bytes_traced, 0,
1895 VM_FLAGS_ANYWHERE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE,
1896 &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
1897 /*
1898 * If the call to mach_vm_remap fails, we return the appropriate converted error
1899 */
1900 if (error == KERN_SUCCESS) {
1901 /* If the user addr somehow didn't get set, we should make sure that we fail, and (eventually)
1902 * panic on development kernels to find out why
1903 */
1904 if (stackshotbuf_user_addr == (mach_vm_offset_t)NULL) {
1905 #if DEVELOPMENT || DEBUG
1906 os_log_error(OS_LOG_DEFAULT, "stackshot: mach_vm_remap succeeded with NULL\n");
1907 #endif // DEVELOPMENT || DEBUG
1908 return KERN_FAILURE;
1909 }
1910
1911 /*
1912 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
1913 * we just made in the task's user space.
1914 */
1915 error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
1916 if (error != KERN_SUCCESS) {
1917 mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1918 return error;
1919 }
1920 error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
1921 if (error != KERN_SUCCESS) {
1922 mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1923 return error;
1924 }
1925 }
1926 return error;
1927 }
1928
1929 #if CONFIG_EXCLAVES
1930
1931 static kern_return_t
stackshot_setup_exclave_waitlist(void)1932 stackshot_setup_exclave_waitlist(void)
1933 {
1934 kern_return_t error = KERN_SUCCESS;
1935 size_t exclave_threads_max = exclaves_ipc_buffer_count();
1936 size_t waitlist_size = 0;
1937
1938 assert(!stackshot_exclave_inspect_ctids);
1939
1940 if (exclaves_inspection_is_initialized() && exclave_threads_max) {
1941 if (os_mul_overflow(exclave_threads_max, sizeof(ctid_t), &waitlist_size)) {
1942 error = KERN_INVALID_ARGUMENT;
1943 goto error;
1944 }
1945 stackshot_exclave_inspect_ctids = stackshot_alloc_with_size(waitlist_size, &error);
1946 if (!stackshot_exclave_inspect_ctids) {
1947 goto error;
1948 }
1949 stackshot_exclave_inspect_ctid_count = 0;
1950 stackshot_exclave_inspect_ctid_capacity = exclave_threads_max;
1951 }
1952
1953 error:
1954 return error;
1955 }
1956
1957 static kern_return_t
collect_exclave_threads(uint64_t ss_flags)1958 collect_exclave_threads(uint64_t ss_flags)
1959 {
1960 size_t i;
1961 ctid_t ctid;
1962 thread_t thread;
1963 kern_return_t kr = KERN_SUCCESS;
1964 STACKSHOT_SUBSYS_ASSERT_LOCKED();
1965
1966 lck_mtx_lock(&exclaves_collect_mtx);
1967
1968 if (stackshot_exclave_inspect_ctid_count == 0) {
1969 /* Nothing to do */
1970 goto out;
1971 }
1972
1973 // When asking for ASIDs, make sure we get all exclaves asids and mappings as well
1974 exclaves_stackshot_raw_addresses = (ss_flags & STACKSHOT_ASID);
1975 exclaves_stackshot_all_address_spaces = (ss_flags & (STACKSHOT_ASID | STACKSHOT_EXCLAVES));
1976
1977 /* This error is intentionally ignored: we are now committed to collecting
1978 * these threads, or at least properly waking them. If this fails, the first
1979 * collected thread should also fail to append to the kcdata, and will abort
1980 * further collection, properly clearing the AST and waking these threads.
1981 */
1982 kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_BEGIN,
1983 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
1984
1985 for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
1986 ctid = stackshot_exclave_inspect_ctids[i];
1987 thread = ctid_get_thread(ctid);
1988 assert(thread);
1989 exclaves_inspection_queue_add(&exclaves_inspection_queue_stackshot, &thread->th_exclaves_inspection_queue_stackshot);
1990 }
1991 exclaves_inspection_begin_collecting();
1992 exclaves_inspection_wait_complete(&exclaves_inspection_queue_stackshot);
1993 kr = stackshot_exclave_kr; /* Read the result of work done on our behalf, by collection thread */
1994 if (kr != KERN_SUCCESS) {
1995 goto out;
1996 }
1997
1998 kr = kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_END,
1999 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
2000 if (kr != KERN_SUCCESS) {
2001 goto out;
2002 }
2003 out:
2004 /* clear Exclave buffer now that it's been used */
2005 stackshot_exclave_inspect_ctids = NULL;
2006 stackshot_exclave_inspect_ctid_capacity = 0;
2007 stackshot_exclave_inspect_ctid_count = 0;
2008
2009 lck_mtx_unlock(&exclaves_collect_mtx);
2010 return kr;
2011 }
2012
2013 static kern_return_t
stackshot_exclaves_process_stacktrace(const address_v__opt_s * _Nonnull st,void * kcdata_ptr)2014 stackshot_exclaves_process_stacktrace(const address_v__opt_s *_Nonnull st, void *kcdata_ptr)
2015 {
2016 kern_return_t error = KERN_SUCCESS;
2017 exclave_ecstackentry_addr_t * addr = NULL;
2018 __block size_t count = 0;
2019
2020 if (!st->has_value) {
2021 goto error_exit;
2022 }
2023
2024 address__v_visit(&st->value, ^(size_t __unused i, const stackshottypes_address_s __unused item) {
2025 count++;
2026 });
2027
2028 kcdata_compression_window_open(kcdata_ptr);
2029 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_ECSTACK,
2030 sizeof(exclave_ecstackentry_addr_t), count, (mach_vm_address_t*)&addr));
2031
2032 address__v_visit(&st->value, ^(size_t i, const stackshottypes_address_s item) {
2033 addr[i] = (exclave_ecstackentry_addr_t)item;
2034 });
2035
2036 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2037
2038 error_exit:
2039 return error;
2040 }
2041
2042 static kern_return_t
stackshot_exclaves_process_ipcstackentry(uint64_t index,const stackshottypes_ipcstackentry_s * _Nonnull ise,void * kcdata_ptr)2043 stackshot_exclaves_process_ipcstackentry(uint64_t index, const stackshottypes_ipcstackentry_s *_Nonnull ise, void *kcdata_ptr)
2044 {
2045 kern_return_t error = KERN_SUCCESS;
2046
2047 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2048 STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2049
2050 struct exclave_ipcstackentry_info info = { 0 };
2051 info.eise_asid = ise->asid;
2052
2053 info.eise_tnid = ise->tnid;
2054
2055 if (ise->invocationid.has_value) {
2056 info.eise_flags |= kExclaveIpcStackEntryHaveInvocationID;
2057 info.eise_invocationid = ise->invocationid.value;
2058 } else {
2059 info.eise_invocationid = 0;
2060 }
2061
2062 info.eise_flags |= (ise->stacktrace.has_value ? kExclaveIpcStackEntryHaveStack : 0);
2063
2064 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_INFO, sizeof(struct exclave_ipcstackentry_info), &info));
2065
2066 if (ise->stacktrace.has_value) {
2067 kcd_exit_on_error(stackshot_exclaves_process_stacktrace(&ise->stacktrace, kcdata_ptr));
2068 }
2069
2070 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2071 STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2072
2073 error_exit:
2074 return error;
2075 }
2076
2077 static kern_return_t
stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s * _Nonnull ipcstack,void * kcdata_ptr)2078 stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s *_Nonnull ipcstack, void *kcdata_ptr)
2079 {
2080 __block kern_return_t kr = KERN_SUCCESS;
2081
2082 if (!ipcstack->has_value) {
2083 goto error_exit;
2084 }
2085
2086 stackshottypes_ipcstackentry__v_visit(&ipcstack->value, ^(size_t i, const stackshottypes_ipcstackentry_s *_Nonnull item) {
2087 if (kr == KERN_SUCCESS) {
2088 kr = stackshot_exclaves_process_ipcstackentry(i, item, kcdata_ptr);
2089 }
2090 });
2091
2092 error_exit:
2093 return kr;
2094 }
2095
2096 static kern_return_t
stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s * _Nonnull se,void * kcdata_ptr)2097 stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s *_Nonnull se, void *kcdata_ptr)
2098 {
2099 kern_return_t error = KERN_SUCCESS;
2100
2101 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2102 STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2103
2104 struct exclave_scresult_info info = { 0 };
2105 info.esc_id = se->scid;
2106 info.esc_flags = se->ipcstack.has_value ? kExclaveScresultHaveIPCStack : 0;
2107
2108 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_SCRESULT_INFO, sizeof(struct exclave_scresult_info), &info));
2109
2110 if (se->ipcstack.has_value) {
2111 kcd_exit_on_error(stackshot_exclaves_process_ipcstack(&se->ipcstack, kcdata_ptr));
2112 }
2113
2114 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2115 STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2116
2117 error_exit:
2118 return error;
2119 }
2120
2121 static kern_return_t
stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2122 stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2123 {
2124 kern_return_t error = KERN_SUCCESS;
2125 __block struct exclave_textlayout_segment_v2 * info = NULL;
2126
2127 __block size_t count = 0;
2128 stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s __unused *_Nonnull item) {
2129 count++;
2130 });
2131
2132 if (!count) {
2133 goto error_exit;
2134 }
2135
2136 kcdata_compression_window_open(kcdata_ptr);
2137 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_SEGMENTS,
2138 sizeof(struct exclave_textlayout_segment_v2), count, (mach_vm_address_t*)&info));
2139
2140 stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s *_Nonnull item) {
2141 memcpy(&info->layoutSegment_uuid, item->uuid, sizeof(uuid_t));
2142 info->layoutSegment_loadAddress = item->loadaddress;
2143 if (want_raw_addresses) {
2144 info->layoutSegment_rawLoadAddress = item->rawloadaddress.has_value ? item->rawloadaddress.value: 0;
2145 } else {
2146 info->layoutSegment_rawLoadAddress = 0;
2147 }
2148 info++;
2149 });
2150
2151 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2152
2153 error_exit:
2154 return error;
2155 }
2156
2157 static kern_return_t
stackshot_exclaves_process_textlayout(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2158 stackshot_exclaves_process_textlayout(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2159 {
2160 kern_return_t error = KERN_SUCCESS;
2161 __block struct exclave_textlayout_info info = { 0 };
2162
2163 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2164 STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, tl->textlayoutid));
2165
2166 // tightbeam optional interfaced don't have enough const.
2167 u32__opt_s sharedcacheindex_opt = tl->sharedcacheindex;
2168 const uint32_t *sharedcache_index = u32__opt_get(&sharedcacheindex_opt);
2169
2170 info.layout_id = tl->textlayoutid;
2171
2172 info.etl_flags =
2173 (want_raw_addresses ? 0 : kExclaveTextLayoutLoadAddressesUnslid) |
2174 (sharedcache_index == NULL ? 0 : kExclaveTextLayoutHasSharedCache);
2175 info.sharedcache_index = (sharedcache_index == NULL) ? UINT32_MAX : *sharedcache_index;
2176
2177 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_INFO, sizeof(struct exclave_textlayout_info), &info));
2178 kcd_exit_on_error(stackshot_exclaves_process_textlayout_segments(tl, kcdata_ptr, want_raw_addresses));
2179 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2180 STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, tl->textlayoutid));
2181 error_exit:
2182 return error;
2183 }
2184
2185 static kern_return_t
stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s * _Nonnull as,void * kcdata_ptr,bool want_raw_addresses)2186 stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s *_Nonnull as, void *kcdata_ptr, bool want_raw_addresses)
2187 {
2188 kern_return_t error = KERN_SUCCESS;
2189 struct exclave_addressspace_info info = { 0 };
2190 __block size_t name_len = 0;
2191 uint8_t * name = NULL;
2192
2193 u8__v_visit(&as->name, ^(size_t __unused i, const uint8_t __unused item) {
2194 name_len++;
2195 });
2196
2197 info.eas_id = as->asid;
2198
2199 if (want_raw_addresses && as->rawaddressslide.has_value) {
2200 info.eas_flags = kExclaveAddressSpaceHaveSlide;
2201 info.eas_slide = as->rawaddressslide.value;
2202 } else {
2203 info.eas_flags = 0;
2204 info.eas_slide = UINT64_MAX;
2205 }
2206
2207 info.eas_layoutid = as->textlayoutid; // text layout for this address space
2208 info.eas_asroot = as->asroot.has_value ? as->asroot.value : 0;
2209
2210 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2211 STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2212 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_INFO, sizeof(struct exclave_addressspace_info), &info));
2213
2214 if (name_len > 0) {
2215 kcdata_compression_window_open(kcdata_ptr);
2216 kcd_exit_on_error(kcdata_get_memory_addr(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_NAME, name_len + 1, (mach_vm_address_t*)&name));
2217
2218 u8__v_visit(&as->name, ^(size_t i, const uint8_t item) {
2219 name[i] = item;
2220 });
2221 name[name_len] = 0;
2222
2223 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2224 }
2225
2226 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2227 STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2228 error_exit:
2229 return error;
2230 }
2231
2232 kern_return_t
2233 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses);
2234
2235 kern_return_t
stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s * result,void * kcdata_ptr,bool want_raw_addresses)2236 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses)
2237 {
2238 __block kern_return_t kr = KERN_SUCCESS;
2239
2240 stackshot_stackshotentry__v_visit(&result->stackshotentries, ^(size_t __unused i, const stackshot_stackshotentry_s *_Nonnull item) {
2241 if (kr == KERN_SUCCESS) {
2242 kr = stackshot_exclaves_process_stackshotentry(item, kcdata_ptr);
2243 }
2244 });
2245
2246 stackshottypes_addressspace__v_visit(&result->addressspaces, ^(size_t __unused i, const stackshottypes_addressspace_s *_Nonnull item) {
2247 if (kr == KERN_SUCCESS) {
2248 kr = stackshot_exclaves_process_addressspace(item, kcdata_ptr, want_raw_addresses);
2249 }
2250 });
2251
2252 stackshottypes_textlayout__v_visit(&result->textlayouts, ^(size_t __unused i, const stackshottypes_textlayout_s *_Nonnull item) {
2253 if (kr == KERN_SUCCESS) {
2254 kr = stackshot_exclaves_process_textlayout(item, kcdata_ptr, want_raw_addresses);
2255 }
2256 });
2257
2258 return kr;
2259 }
2260
2261 kern_return_t
2262 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses);
2263
2264 kern_return_t
stackshot_exclaves_process_result(kern_return_t collect_kr,const stackshot_stackshotresult_s * result,bool want_raw_addresses)2265 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses)
2266 {
2267 kern_return_t kr = KERN_SUCCESS;
2268 if (result == NULL) {
2269 return collect_kr;
2270 }
2271
2272 kr = stackshot_exclaves_process_stackshot(result, stackshot_ctx.sc_finalized_kcdata, want_raw_addresses);
2273
2274 stackshot_exclave_kr = kr;
2275
2276 return kr;
2277 }
2278
2279
2280 static void
commit_exclaves_ast(void)2281 commit_exclaves_ast(void)
2282 {
2283 size_t i = 0;
2284 thread_t thread = NULL;
2285 size_t count;
2286
2287 assert(debug_mode_active());
2288
2289 count = os_atomic_load(&stackshot_exclave_inspect_ctid_count, acquire);
2290
2291 if (stackshot_exclave_inspect_ctids) {
2292 for (i = 0; i < count; ++i) {
2293 thread = ctid_get_thread(stackshot_exclave_inspect_ctids[i]);
2294 assert(thread);
2295 thread_reference(thread);
2296 os_atomic_or(&thread->th_exclaves_inspection_state, TH_EXCLAVES_INSPECTION_STACKSHOT, relaxed);
2297 }
2298 }
2299 }
2300
2301 #endif /* CONFIG_EXCLAVES */
2302
2303 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)2304 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
2305 {
2306 int error = 0;
2307 boolean_t prev_interrupt_state;
2308 bool did_copyout = false;
2309 uint32_t bytes_traced = 0;
2310 uint32_t stackshot_estimate = 0;
2311 struct kdp_snapshot_args snapshot_args;
2312
2313 void * buf_to_free = NULL;
2314 int size_to_free = 0;
2315 bool is_traced = false; /* has FUNC_START tracepoint fired? */
2316 uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
2317
2318 /* Parsed arguments */
2319 uint64_t out_buffer_addr;
2320 uint64_t out_size_addr;
2321 uint32_t size_hint = 0;
2322
2323 snapshot_args.pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
2324
2325 if (stackshot_config == NULL) {
2326 return KERN_INVALID_ARGUMENT;
2327 }
2328 #if DEVELOPMENT || DEBUG
2329 /* TBD: ask stackshot clients to avoid issuing stackshots in this
2330 * configuration in lieu of the kernel feature override.
2331 */
2332 if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
2333 return KERN_NOT_SUPPORTED;
2334 }
2335 #endif
2336
2337 switch (stackshot_config_version) {
2338 case STACKSHOT_CONFIG_TYPE:
2339 if (stackshot_config_size != sizeof(stackshot_config_t)) {
2340 return KERN_INVALID_ARGUMENT;
2341 }
2342 stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
2343 out_buffer_addr = config->sc_out_buffer_addr;
2344 out_size_addr = config->sc_out_size_addr;
2345 snapshot_args.pid = config->sc_pid;
2346 snapshot_args.flags = config->sc_flags;
2347 snapshot_args.since_timestamp = config->sc_delta_timestamp;
2348 if (config->sc_size <= max_tracebuf_size) {
2349 size_hint = config->sc_size;
2350 }
2351 /*
2352 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
2353 * dump every level if the pagetable_mask is not set
2354 */
2355 if (snapshot_args.flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
2356 snapshot_args.pagetable_mask = config->sc_pagetable_mask;
2357 }
2358 break;
2359 default:
2360 return KERN_NOT_SUPPORTED;
2361 }
2362
2363 /*
2364 * Currently saving a kernel buffer and trylock are only supported from the
2365 * internal/KEXT API.
2366 */
2367 if (stackshot_from_user) {
2368 if (snapshot_args.flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
2369 return KERN_NO_ACCESS;
2370 }
2371 #if !DEVELOPMENT && !DEBUG
2372 if (snapshot_args.flags & (STACKSHOT_DO_COMPRESS)) {
2373 return KERN_NO_ACCESS;
2374 }
2375 #endif
2376 } else {
2377 if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2378 return KERN_NOT_SUPPORTED;
2379 }
2380 }
2381
2382 if (!((snapshot_args.flags & STACKSHOT_KCDATA_FORMAT) || (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
2383 return KERN_NOT_SUPPORTED;
2384 }
2385
2386 /* Compresssed delta stackshots or page dumps are not yet supported */
2387 if (((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (snapshot_args.flags & STACKSHOT_PAGE_TABLES))
2388 && (snapshot_args.flags & STACKSHOT_DO_COMPRESS)) {
2389 return KERN_NOT_SUPPORTED;
2390 }
2391
2392 /*
2393 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
2394 */
2395 if ((!out_buffer_addr || !out_size_addr) && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2396 return KERN_INVALID_ARGUMENT;
2397 }
2398
2399 if (snapshot_args.since_timestamp != 0 && ((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
2400 return KERN_INVALID_ARGUMENT;
2401 }
2402
2403 /* EXCLAVES and SKIP_EXCLAVES conflict */
2404 if ((snapshot_args.flags & (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) == (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) {
2405 return KERN_INVALID_ARGUMENT;
2406 }
2407
2408 #if CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS
2409 if (!mt_core_supported) {
2410 snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2411 }
2412 #else /* CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS */
2413 snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2414 #endif /* !CONFIG_PERVASIVE_CPI || !CONFIG_CPU_COUNTERS */
2415
2416 STACKSHOT_TESTPOINT(TP_WAIT_START_STACKSHOT);
2417 STACKSHOT_SUBSYS_LOCK();
2418
2419 stackshot_tries = 0;
2420
2421 if (snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
2422 /*
2423 * Don't overwrite an existing stackshot
2424 */
2425 if (kernel_stackshot_buf != NULL) {
2426 error = KERN_MEMORY_PRESENT;
2427 goto error_early_exit;
2428 }
2429 } else if (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
2430 if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
2431 error = KERN_NOT_IN_SET;
2432 goto error_early_exit;
2433 }
2434 error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
2435 out_buffer_addr, out_size_addr);
2436 /*
2437 * If we successfully remapped the buffer into the user's address space, we
2438 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
2439 * and then clear the kernel stackshot pointer and associated size.
2440 */
2441 if (error == KERN_SUCCESS) {
2442 did_copyout = true;
2443 buf_to_free = kernel_stackshot_buf;
2444 size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
2445 kernel_stackshot_buf = NULL;
2446 kernel_stackshot_buf_size = 0;
2447 }
2448
2449 goto error_early_exit;
2450 }
2451
2452 if (snapshot_args.flags & STACKSHOT_GET_BOOT_PROFILE) {
2453 void *bootprofile = NULL;
2454 uint32_t len = 0;
2455 #if CONFIG_TELEMETRY
2456 bootprofile_get(&bootprofile, &len);
2457 #endif
2458 if (!bootprofile || !len) {
2459 error = KERN_NOT_IN_SET;
2460 goto error_early_exit;
2461 }
2462 error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
2463 if (error == KERN_SUCCESS) {
2464 did_copyout = true;
2465 }
2466 goto error_early_exit;
2467 }
2468
2469 stackshot_duration_prior_abs = 0;
2470 stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
2471 snapshot_args.buffer_size = stackshot_estimate =
2472 get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj, snapshot_args.flags, snapshot_args.pid);
2473 stackshot_initial_estimate = stackshot_estimate;
2474
2475 // ensure at least one attempt, even if the initial size from estimate was too big
2476 snapshot_args.buffer_size = MIN(snapshot_args.buffer_size, max_tracebuf_size);
2477
2478 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
2479 snapshot_args.flags, snapshot_args.buffer_size, snapshot_args.pid, snapshot_args.since_timestamp);
2480 is_traced = true;
2481
2482 #if CONFIG_EXCLAVES
2483 assert(!stackshot_exclave_inspect_ctids);
2484 #endif
2485
2486 for (; snapshot_args.buffer_size <= max_tracebuf_size; snapshot_args.buffer_size = MIN(snapshot_args.buffer_size << 1, max_tracebuf_size)) {
2487 stackshot_tries++;
2488 if ((error = kmem_alloc(kernel_map, (vm_offset_t *)&snapshot_args.buffer, snapshot_args.buffer_size,
2489 KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
2490 os_log_error(OS_LOG_DEFAULT, "stackshot: initial allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, snapshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
2491 error = KERN_RESOURCE_SHORTAGE;
2492 goto error_exit;
2493 }
2494
2495 uint32_t hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2496 : (snapshot_args.flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2497 : KCDATA_BUFFER_BEGIN_STACKSHOT;
2498 #pragma unused(hdr_tag)
2499
2500 stackshot_duration_outer = NULL;
2501
2502 /* if compression was requested, allocate the extra zlib scratch area */
2503 if (snapshot_args.flags & STACKSHOT_DO_COMPRESS) {
2504 hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2505 : KCDATA_BUFFER_BEGIN_STACKSHOT;
2506 if (error != KERN_SUCCESS) {
2507 os_log_error(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
2508 (int) error);
2509 goto error_exit;
2510 }
2511 }
2512
2513 /* Prepare the compressor for a stackshot */
2514 error = vm_compressor_kdp_init();
2515 if (error != KERN_SUCCESS) {
2516 goto error_exit;
2517 }
2518
2519 /*
2520 * Disable interrupts and save the current interrupt state.
2521 */
2522 prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
2523 uint64_t time_start = mach_absolute_time();
2524
2525 /* Emit a SOCD tracepoint that we are initiating a stackshot */
2526 SOCD_TRACE_XNU_START(STACKSHOT);
2527
2528 /*
2529 * Load stackshot parameters.
2530 */
2531 error = kdp_snapshot_preflight_internal(snapshot_args);
2532
2533 if (error == KERN_SUCCESS) {
2534 error = stackshot_trap();
2535 }
2536
2537 /* Emit a SOCD tracepoint that we have completed the stackshot */
2538 SOCD_TRACE_XNU_END(STACKSHOT);
2539 ml_set_interrupts_enabled(prev_interrupt_state);
2540
2541 #if CONFIG_EXCLAVES
2542 /* stackshot trap should only finish successfully or with no pending Exclave threads */
2543 assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
2544 #endif
2545
2546 /*
2547 * Stackshot is no longer active.
2548 * (We have to do this here for the special interrupt disable timeout case to work)
2549 */
2550 os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
2551
2552 /* Release compressor kdp buffers */
2553 vm_compressor_kdp_teardown();
2554
2555 /* Record duration that interrupts were disabled */
2556 uint64_t time_end = mach_absolute_time();
2557 tot_interrupts_off_abs += (time_end - time_start);
2558
2559 /* Collect multithreaded kcdata into one finalized buffer */
2560 if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
2561 error = stackshot_collect_kcdata();
2562 }
2563
2564 #if CONFIG_EXCLAVES
2565 if (error == KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
2566 if (stackshot_exclave_inspect_ctid_count > 0) {
2567 STACKSHOT_TESTPOINT(TP_START_COLLECTION);
2568 }
2569 error = collect_exclave_threads(snapshot_args.flags);
2570 }
2571 #endif /* CONFIG_EXCLAVES */
2572
2573 if (error == KERN_SUCCESS) {
2574 if (stackshot_ctx.sc_is_singlethreaded) {
2575 error = stackshot_finalize_singlethreaded_kcdata();
2576 } else {
2577 error = stackshot_finalize_kcdata();
2578 }
2579
2580 if ((error != KERN_SUCCESS) && (error != KERN_INSUFFICIENT_BUFFER_SIZE)) {
2581 goto error_exit;
2582 }
2583 if (error == KERN_INSUFFICIENT_BUFFER_SIZE && snapshot_args.buffer_size == max_tracebuf_size) {
2584 os_log_error(OS_LOG_DEFAULT, "stackshot: final buffer size was insufficient at maximum size\n");
2585 error = KERN_RESOURCE_SHORTAGE;
2586 goto error_exit;
2587 }
2588 }
2589
2590 /* record the duration that interupts were disabled + kcdata was being finalized */
2591 if (stackshot_duration_outer) {
2592 *stackshot_duration_outer = mach_absolute_time() - time_start;
2593 }
2594
2595 if (error != KERN_SUCCESS) {
2596 os_log_error(OS_LOG_DEFAULT, "stackshot: debugger call failed: %d, try %llu, buffer %u estimate %u\n", (int)error, stackshot_tries, snapshot_args.buffer_size, stackshot_estimate);
2597 kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2598 snapshot_args.buffer = NULL;
2599 if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
2600 /*
2601 * If we didn't allocate a big enough buffer, deallocate and try again.
2602 */
2603 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
2604 time_end - time_start, stackshot_estimate, snapshot_args.buffer_size);
2605 stackshot_duration_prior_abs += (time_end - time_start);
2606 if (snapshot_args.buffer_size == max_tracebuf_size) {
2607 os_log_error(OS_LOG_DEFAULT, "stackshot: initial buffer size was insufficient at maximum size\n");
2608 error = KERN_RESOURCE_SHORTAGE;
2609 goto error_exit;
2610 }
2611 continue;
2612 } else {
2613 goto error_exit;
2614 }
2615 }
2616
2617 bytes_traced = kdp_stack_snapshot_bytes_traced();
2618 if (bytes_traced <= 0) {
2619 error = KERN_ABORTED;
2620 goto error_exit;
2621 }
2622
2623 if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2624 error = stackshot_remap_buffer(snapshot_args.buffer, bytes_traced, out_buffer_addr, out_size_addr);
2625 if (error == KERN_SUCCESS) {
2626 did_copyout = true;
2627 }
2628 goto error_exit;
2629 }
2630
2631 if (!(snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT)) {
2632 os_log_info(OS_LOG_DEFAULT, "stackshot: succeeded, traced %u bytes to %u buffer (estimate %u) try %llu\n", bytes_traced, snapshot_args.buffer_size, stackshot_estimate, stackshot_tries);
2633 }
2634
2635 /*
2636 * Save the stackshot in the kernel buffer.
2637 */
2638 kernel_stackshot_buf = snapshot_args.buffer;
2639 kernel_stackshot_buf_size = bytes_traced;
2640 /*
2641 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
2642 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
2643 * update size_to_free for kmem_free accordingly.
2644 */
2645 size_to_free = snapshot_args.buffer_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
2646
2647 assert(size_to_free >= 0);
2648
2649 if (size_to_free != 0) {
2650 buf_to_free = (void *)((uint64_t)snapshot_args.buffer + snapshot_args.buffer_size - size_to_free);
2651 }
2652
2653 snapshot_args.buffer = NULL;
2654 snapshot_args.buffer_size = 0;
2655 goto error_exit;
2656 }
2657
2658 error_exit:
2659 if (is_traced) {
2660 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
2661 error, tot_interrupts_off_abs, snapshot_args.buffer_size, bytes_traced);
2662 }
2663
2664 error_early_exit:
2665 if (snapshot_args.buffer != NULL) {
2666 kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2667 }
2668 if (buf_to_free != NULL) {
2669 kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
2670 }
2671
2672 if (error == KERN_SUCCESS && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) && !did_copyout) {
2673 /* If we return success, we must have done the copyout to userspace. If
2674 * we somehow did not, we need to indicate failure instead.
2675 */
2676 #if DEVELOPMENT || DEBUG
2677 os_log_error(OS_LOG_DEFAULT, "stackshot: reached end without doing copyout\n");
2678 #endif // DEVELOPMENT || DEBUG
2679 error = KERN_FAILURE;
2680 }
2681
2682 STACKSHOT_SUBSYS_UNLOCK();
2683 STACKSHOT_TESTPOINT(TP_STACKSHOT_DONE);
2684
2685 return error;
2686 }
2687
2688 /*
2689 * Set up state and parameters for a stackshot.
2690 * (This runs on the calling CPU before other CPUs enter the debugger trap.)
2691 * Called when interrupts are disabled, but we're not in the debugger trap yet.
2692 */
2693 __result_use_check
2694 static kern_return_t
kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)2695 kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)
2696 {
2697 kern_return_t error = KERN_SUCCESS;
2698 uint64_t microsecs = 0, secs = 0;
2699 bool is_panic = ((args.flags & STACKSHOT_FROM_PANIC) != 0);
2700 bool process_scoped = (args.pid != -1) &&
2701 ((args.flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
2702 bool is_singlethreaded = stackshot_single_thread || (process_scoped || is_panic || ((args.flags & STACKSHOT_PAGE_TABLES) != 0));
2703 clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)µsecs);
2704
2705 cur_stackshot_ctx_idx = (is_panic ? STACKSHOT_CTX_IDX_PANIC : STACKSHOT_CTX_IDX_NORMAL);
2706
2707 /* Setup overall state */
2708 stackshot_ctx = (struct stackshot_context) {
2709 .sc_args = args,
2710 .sc_state = SS_SETUP,
2711 .sc_bytes_traced = 0,
2712 .sc_bytes_uncompressed = 0,
2713 .sc_microsecs = microsecs + (secs * USEC_PER_SEC),
2714 .sc_panic_stackshot = is_panic,
2715 .sc_is_singlethreaded = is_singlethreaded,
2716 .sc_cpus_working = 0,
2717 .sc_retval = 0,
2718 .sc_calling_cpuid = cpu_number(),
2719 .sc_main_cpuid = is_singlethreaded ? cpu_number() : -1,
2720 .sc_min_kcdata_size = get_stackshot_est_tasksize(args.flags),
2721 .sc_enable_faulting = false,
2722 };
2723
2724 if (!stackshot_ctx.sc_panic_stackshot) {
2725 #if defined(__AMP__)
2726 /* On AMP systems, we want to split the buffers up by cluster to avoid cache line effects. */
2727 stackshot_ctx.sc_num_buffers = is_singlethreaded ? 1 : ml_get_cluster_count();
2728 #else /* __AMP__ */
2729 stackshot_ctx.sc_num_buffers = 1;
2730 #endif /* !__AMP__ */
2731 size_t bufsz = args.buffer_size / stackshot_ctx.sc_num_buffers;
2732 for (int buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
2733 stackshot_ctx.sc_buffers[buf_idx] = (struct stackshot_buffer) {
2734 .ssb_ptr = (void*) ((mach_vm_address_t) args.buffer + (bufsz * buf_idx)),
2735 .ssb_size = bufsz,
2736 .ssb_used = 0,
2737 .ssb_freelist = NULL,
2738 .ssb_freelist_lock = 0,
2739 .ssb_overhead = 0
2740 };
2741 }
2742
2743 /* Setup per-cpu state */
2744 percpu_foreach_base(base) {
2745 *PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu) = (struct stackshot_cpu_context) { 0 };
2746 }
2747
2748 if (is_singlethreaded) {
2749 /* If the stackshot is singlethreaded, set up the kcdata - we don't bother with linked-list kcdata in singlethreaded mode. */
2750 uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2751 : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2752 : KCDATA_BUFFER_BEGIN_STACKSHOT;
2753 kcdata_memory_static_init(stackshot_kcdata_p, (mach_vm_address_t) stackshot_args.buffer, hdr_tag,
2754 stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
2755 if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
2756 hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2757 : KCDATA_BUFFER_BEGIN_STACKSHOT;
2758 kcd_exit_on_error(kcdata_init_compress(stackshot_kcdata_p, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
2759 }
2760 stackshot_cpu_ctx.scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES);
2761 }
2762 } else {
2763 /*
2764 * If this is a panic stackshot, we need to handle things differently.
2765 * The panic code hands us a kcdata descriptor to work with instead of
2766 * us making one ourselves.
2767 */
2768 *stackshot_kcdata_p = *stackshot_args.descriptor;
2769 stackshot_cpu_ctx = (struct stackshot_cpu_context) {
2770 .scc_can_work = true,
2771 .scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES)
2772 };
2773 #if STACKSHOT_COLLECTS_LATENCY_INFO
2774 *(PERCPU_GET(stackshot_trace_buffer)) = (struct stackshot_trace_buffer) {};
2775 #endif
2776 }
2777
2778 /* Set up our cpu state */
2779 stackshot_cpu_preflight();
2780
2781 error_exit:
2782 return error;
2783 }
2784
2785 /*
2786 * The old function signature for kdp_snapshot_preflight, used in the panic path.
2787 * Called when interrupts are disabled, but we're not in the debugger trap yet.
2788 */
2789 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)2790 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
2791 kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
2792 {
2793 __assert_only kern_return_t err;
2794 err = kdp_snapshot_preflight_internal((struct kdp_snapshot_args) {
2795 .pid = pid,
2796 .buffer = tracebuf,
2797 .buffer_size = tracebuf_size,
2798 .flags = flags,
2799 .descriptor = data_p,
2800 .since_timestamp = since_timestamp,
2801 .pagetable_mask = pagetable_mask
2802 });
2803
2804
2805 /* This shouldn't ever return an error in the panic path. */
2806 assert(err == KERN_SUCCESS);
2807 }
2808
2809 static void
stackshot_reset_state(void)2810 stackshot_reset_state(void)
2811 {
2812 stackshot_ctx = (struct stackshot_context) { 0 };
2813 }
2814
2815 void
panic_stackshot_reset_state(void)2816 panic_stackshot_reset_state(void)
2817 {
2818 stackshot_reset_state();
2819 }
2820
2821 boolean_t
stackshot_active(void)2822 stackshot_active(void)
2823 {
2824 return os_atomic_load(&stackshot_ctx.sc_state, relaxed) != SS_INACTIVE;
2825 }
2826
2827 boolean_t
panic_stackshot_active(void)2828 panic_stackshot_active(void)
2829 {
2830 return os_atomic_load(&stackshot_contexts[STACKSHOT_CTX_IDX_PANIC].sc_state, relaxed) != SS_INACTIVE;
2831 }
2832
2833 uint32_t
kdp_stack_snapshot_bytes_traced(void)2834 kdp_stack_snapshot_bytes_traced(void)
2835 {
2836 return stackshot_ctx.sc_bytes_traced;
2837 }
2838
2839 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)2840 kdp_stack_snapshot_bytes_uncompressed(void)
2841 {
2842 return stackshot_ctx.sc_bytes_uncompressed;
2843 }
2844
2845 static boolean_t
memory_iszero(void * addr,size_t size)2846 memory_iszero(void *addr, size_t size)
2847 {
2848 char *data = (char *)addr;
2849 for (size_t i = 0; i < size; i++) {
2850 if (data[i] != 0) {
2851 return FALSE;
2852 }
2853 }
2854 return TRUE;
2855 }
2856
2857 static void
_stackshot_validation_reset(void)2858 _stackshot_validation_reset(void)
2859 {
2860 percpu_foreach_base(base) {
2861 struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2862 cpu_ctx->scc_validation_state.last_valid_page_kva = -1;
2863 cpu_ctx->scc_validation_state.last_valid_size = 0;
2864 }
2865 }
2866
2867 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)2868 _stackshot_validate_kva(vm_offset_t addr, size_t size)
2869 {
2870 vm_offset_t page_addr = atop_kernel(addr);
2871 if (stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva == page_addr &&
2872 stackshot_cpu_ctx.scc_validation_state.last_valid_size <= size) {
2873 return true;
2874 }
2875
2876 if (ml_validate_nofault(addr, size)) {
2877 stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva = page_addr;
2878 stackshot_cpu_ctx.scc_validation_state.last_valid_size = size;
2879 return true;
2880 }
2881 return false;
2882 }
2883
2884 static long
_stackshot_strlen(const char * s,size_t maxlen)2885 _stackshot_strlen(const char *s, size_t maxlen)
2886 {
2887 size_t len = 0;
2888 for (len = 0; _stackshot_validate_kva((vm_offset_t)s, 1); len++, s++) {
2889 if (*s == 0) {
2890 return len;
2891 }
2892 if (len >= maxlen) {
2893 return -1;
2894 }
2895 }
2896 return -1; /* failed before end of string */
2897 }
2898
2899
2900 static size_t
stackshot_plh_est_size(void)2901 stackshot_plh_est_size(void)
2902 {
2903 struct port_label_hash *plh = &stackshot_ctx.sc_plh;
2904 size_t size = STASKSHOT_PLH_SIZE(stackshot_port_label_size);
2905
2906 if (size == 0) {
2907 return 0;
2908 }
2909 #define SIZE_EST(x) ROUNDUP((x), sizeof (uintptr_t))
2910 return SIZE_EST(size * sizeof(*plh->plh_array)) +
2911 SIZE_EST(size * sizeof(*plh->plh_chains)) +
2912 SIZE_EST(size * sizeof(*stackshot_cpu_ctx.scc_plh_gen.pgs_gen) * real_ncpus) +
2913 SIZE_EST((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh->plh_hash));
2914 #undef SIZE_EST
2915 }
2916
2917 static void
stackshot_plh_reset(void)2918 stackshot_plh_reset(void)
2919 {
2920 stackshot_ctx.sc_plh = (struct port_label_hash){.plh_size = 0}; /* structure assignment */
2921 }
2922
2923 static kern_return_t
stackshot_plh_setup(void)2924 stackshot_plh_setup(void)
2925 {
2926 kern_return_t error;
2927 size_t size;
2928 bool percpu_alloc_failed = false;
2929 struct port_label_hash plh = {
2930 .plh_size = STASKSHOT_PLH_SIZE(stackshot_port_label_size),
2931 .plh_count = 0,
2932 };
2933
2934 stackshot_plh_reset();
2935
2936 percpu_foreach_base(base) {
2937 struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2938 cpu_ctx->scc_plh_gen = (struct _stackshot_plh_gen_state){
2939 .pgs_gen = NULL,
2940 .pgs_curgen = 1,
2941 .pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX,
2942 .pgs_curgen_max = 0,
2943 };
2944 }
2945
2946 size = plh.plh_size;
2947 if (size == 0) {
2948 return KERN_SUCCESS;
2949 }
2950 plh.plh_array = stackshot_alloc_with_size(size * sizeof(*plh.plh_array), &error);
2951 plh.plh_chains = stackshot_alloc_with_size(size * sizeof(*plh.plh_chains), &error);
2952 percpu_foreach_base(base) {
2953 struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2954 cpu_ctx->scc_plh_gen.pgs_gen = stackshot_alloc_with_size(size * sizeof(*cpu_ctx->scc_plh_gen.pgs_gen), &error);
2955 if (cpu_ctx->scc_plh_gen.pgs_gen == NULL) {
2956 percpu_alloc_failed = true;
2957 break;
2958 }
2959 for (int x = 0; x < size; x++) {
2960 cpu_ctx->scc_plh_gen.pgs_gen[x] = 0;
2961 }
2962 }
2963 plh.plh_hash = stackshot_alloc_with_size((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh.plh_hash), &error);
2964 if (error != KERN_SUCCESS) {
2965 return error;
2966 }
2967 if (plh.plh_array == NULL || plh.plh_chains == NULL || percpu_alloc_failed || plh.plh_hash == NULL) {
2968 PLH_STAT_OP(os_atomic_inc(&stackshot_ctx.sc_plh.plh_bad, relaxed));
2969 return KERN_SUCCESS;
2970 }
2971 for (int x = 0; x < size; x++) {
2972 plh.plh_array[x] = NULL;
2973 plh.plh_chains[x] = -1;
2974 }
2975 for (int x = 0; x < (1ul << STACKSHOT_PLH_SHIFT); x++) {
2976 plh.plh_hash[x] = -1;
2977 }
2978 stackshot_ctx.sc_plh = plh; /* structure assignment */
2979 return KERN_SUCCESS;
2980 }
2981
2982 static int16_t
stackshot_plh_hash(struct ipc_service_port_label * ispl)2983 stackshot_plh_hash(struct ipc_service_port_label *ispl)
2984 {
2985 uintptr_t ptr = VM_KERNEL_STRIP_PTR((uintptr_t)ispl);
2986
2987 static_assert(STACKSHOT_PLH_SHIFT < 16, "plh_hash must fit in 15 bits");
2988 #define PLH_HASH_STEP(ptr, x) \
2989 ((((x) * STACKSHOT_PLH_SHIFT) < (sizeof(ispl) * CHAR_BIT)) ? ((ptr) >> ((x) * STACKSHOT_PLH_SHIFT)) : 0)
2990 ptr ^= PLH_HASH_STEP(ptr, 16);
2991 ptr ^= PLH_HASH_STEP(ptr, 8);
2992 ptr ^= PLH_HASH_STEP(ptr, 4);
2993 ptr ^= PLH_HASH_STEP(ptr, 2);
2994 ptr ^= PLH_HASH_STEP(ptr, 1);
2995 #undef PLH_HASH_STEP
2996 return (int16_t)(ptr & ((1ul << STACKSHOT_PLH_SHIFT) - 1));
2997 }
2998
2999 enum stackshot_plh_lookup_type {
3000 STACKSHOT_PLH_LOOKUP_UNKNOWN,
3001 STACKSHOT_PLH_LOOKUP_SEND,
3002 STACKSHOT_PLH_LOOKUP_RECEIVE,
3003 };
3004
3005 static void
stackshot_plh_resetgen(void)3006 stackshot_plh_resetgen(void)
3007 {
3008 struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3009 uint16_t plh_size = stackshot_ctx.sc_plh.plh_size;
3010
3011 if (pgs->pgs_curgen_min == STACKSHOT_PLH_SIZE_MAX && pgs->pgs_curgen_max == 0) {
3012 return; // no lookups, nothing using the current generation
3013 }
3014 pgs->pgs_curgen++;
3015 pgs->pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX;
3016 pgs->pgs_curgen_max = 0;
3017 if (pgs->pgs_curgen == 0) { // wrapped, zero the array and increment the generation
3018 for (int x = 0; x < plh_size; x++) {
3019 pgs->pgs_gen[x] = 0;
3020 }
3021 pgs->pgs_curgen = 1;
3022 }
3023 }
3024
3025 static int16_t
stackshot_plh_lookup_locked(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3026 stackshot_plh_lookup_locked(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3027 {
3028 struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3029 int depth;
3030 int16_t cur;
3031 if (ispl == NULL) {
3032 return STACKSHOT_PORTLABELID_NONE;
3033 }
3034 switch (type) {
3035 case STACKSHOT_PLH_LOOKUP_SEND:
3036 PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_send, relaxed));
3037 break;
3038 case STACKSHOT_PLH_LOOKUP_RECEIVE:
3039 PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_receive, relaxed));
3040 break;
3041 default:
3042 break;
3043 }
3044 PLH_STAT_OP(os_atomic_inc(&plh->plh_lookups, relaxed));
3045 if (plh->plh_size == 0) {
3046 return STACKSHOT_PORTLABELID_MISSING;
3047 }
3048 int16_t hash = stackshot_plh_hash(ispl);
3049 assert(hash >= 0 && hash < (1ul << STACKSHOT_PLH_SHIFT));
3050 depth = 0;
3051 for (cur = plh->plh_hash[hash]; cur >= 0; cur = plh->plh_chains[cur]) {
3052 /* cur must be in-range, and chain depth can never be above our # allocated */
3053 if (cur >= plh->plh_count || depth > plh->plh_count || depth > plh->plh_size) {
3054 PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3055 PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3056 return STACKSHOT_PORTLABELID_MISSING;
3057 }
3058 assert(cur < plh->plh_count);
3059 if (plh->plh_array[cur] == ispl) {
3060 PLH_STAT_OP(os_atomic_inc(&plh->plh_found, relaxed));
3061 PLH_STAT_OP(os_atomic_add(&plh->plh_found_depth, depth, relaxed));
3062 goto found;
3063 }
3064 depth++;
3065 }
3066 /* not found in hash table, so alloc and insert it */
3067 if (cur != -1) {
3068 PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3069 PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3070 return STACKSHOT_PORTLABELID_MISSING; /* bad end of chain */
3071 }
3072 PLH_STAT_OP(os_atomic_inc(&plh->plh_insert, relaxed));
3073 PLH_STAT_OP(os_atomic_add(&plh->plh_insert_depth, depth, relaxed));
3074 if (plh->plh_count >= plh->plh_size) {
3075 return STACKSHOT_PORTLABELID_MISSING; /* no space */
3076 }
3077 cur = plh->plh_count;
3078 plh->plh_count++;
3079 plh->plh_array[cur] = ispl;
3080 plh->plh_chains[cur] = plh->plh_hash[hash];
3081 plh->plh_hash[hash] = cur;
3082 found: ;
3083 struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3084 pgs->pgs_gen[cur] = pgs->pgs_curgen;
3085 if (pgs->pgs_curgen_min > cur) {
3086 pgs->pgs_curgen_min = cur;
3087 }
3088 if (pgs->pgs_curgen_max < cur) {
3089 pgs->pgs_curgen_max = cur;
3090 }
3091 return cur + 1; /* offset to avoid 0 */
3092 }
3093
3094 static kern_return_t
kdp_stackshot_plh_record_locked(void)3095 kdp_stackshot_plh_record_locked(void)
3096 {
3097 kern_return_t error = KERN_SUCCESS;
3098 struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3099 struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3100 uint16_t count = plh->plh_count;
3101 uint8_t curgen = pgs->pgs_curgen;
3102 int16_t curgen_min = pgs->pgs_curgen_min;
3103 int16_t curgen_max = pgs->pgs_curgen_max;
3104 if (curgen_min <= curgen_max && curgen_max < count &&
3105 count <= plh->plh_size && plh->plh_size <= STACKSHOT_PLH_SIZE_MAX) {
3106 struct ipc_service_port_label **arr = plh->plh_array;
3107 size_t ispl_size, max_namelen;
3108 kdp_ipc_splabel_size(&ispl_size, &max_namelen);
3109 for (int idx = curgen_min; idx <= curgen_max; idx++) {
3110 struct ipc_service_port_label *ispl = arr[idx];
3111 struct portlabel_info spl = {
3112 .portlabel_id = (idx + 1),
3113 };
3114 const char *name = NULL;
3115 long name_sz = 0;
3116 if (pgs->pgs_gen[idx] != curgen) {
3117 continue;
3118 }
3119 if (_stackshot_validate_kva((vm_offset_t)ispl, ispl_size)) {
3120 kdp_ipc_fill_splabel(ispl, &spl, &name);
3121 #if STACKSHOT_COLLECTS_RDAR_126582377_DATA
3122 } else {
3123 if (ispl != NULL && (vm_offset_t)ispl >> 48 == 0x0000) {
3124 ca_event_t event_to_send = os_atomic_xchg(&rdar_126582377_event, NULL, relaxed);
3125 if (event_to_send) {
3126 CA_EVENT_SEND(event_to_send);
3127 }
3128 }
3129 #endif
3130 }
3131
3132 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3133 STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3134 if (name != NULL && (name_sz = _stackshot_strlen(name, max_namelen)) > 0) { /* validates the kva */
3135 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL_NAME, name_sz + 1, name));
3136 } else {
3137 spl.portlabel_flags |= STACKSHOT_PORTLABEL_READFAILED;
3138 }
3139 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL, sizeof(spl), &spl));
3140 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3141 STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3142 }
3143 }
3144
3145 error_exit:
3146 return error;
3147 }
3148
3149 // record any PLH referenced since the last stackshot_plh_resetgen() call
3150 static kern_return_t
kdp_stackshot_plh_record(void)3151 kdp_stackshot_plh_record(void)
3152 {
3153 kern_return_t error;
3154 plh_lock(&stackshot_ctx.sc_plh);
3155 error = kdp_stackshot_plh_record_locked();
3156 plh_unlock(&stackshot_ctx.sc_plh);
3157 return error;
3158 }
3159
3160 static int16_t
stackshot_plh_lookup(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3161 stackshot_plh_lookup(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3162 {
3163 int16_t result;
3164 plh_lock(&stackshot_ctx.sc_plh);
3165 result = stackshot_plh_lookup_locked(ispl, type);
3166 plh_unlock(&stackshot_ctx.sc_plh);
3167 return result;
3168 }
3169
3170 #if DEVELOPMENT || DEBUG
3171 static kern_return_t
kdp_stackshot_plh_stats(void)3172 kdp_stackshot_plh_stats(void)
3173 {
3174 kern_return_t error = KERN_SUCCESS;
3175 struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3176
3177 #define PLH_STAT(x) do { if (os_atomic_load(&plh->x, relaxed) != 0) { \
3178 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, os_atomic_load(&plh->x, relaxed), "stackshot_" #x)); \
3179 } } while (0)
3180 PLH_STAT(plh_size);
3181 PLH_STAT(plh_lookups);
3182 PLH_STAT(plh_found);
3183 PLH_STAT(plh_found_depth);
3184 PLH_STAT(plh_insert);
3185 PLH_STAT(plh_insert_depth);
3186 PLH_STAT(plh_bad);
3187 PLH_STAT(plh_bad_depth);
3188 PLH_STAT(plh_lookup_send);
3189 PLH_STAT(plh_lookup_receive);
3190 #undef PLH_STAT
3191
3192 error_exit:
3193 return error;
3194 }
3195 #endif /* DEVELOPMENT || DEBUG */
3196
3197 static uint64_t
kcdata_get_task_ss_flags(task_t task)3198 kcdata_get_task_ss_flags(task_t task)
3199 {
3200 uint64_t ss_flags = 0;
3201 boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3202 void *bsd_info = get_bsdtask_info(task);
3203
3204 if (task_64bit_addr) {
3205 ss_flags |= kUser64_p;
3206 }
3207 if (!task->active || task_is_a_corpse(task) || proc_exiting(bsd_info)) {
3208 ss_flags |= kTerminatedSnapshot;
3209 }
3210 if (task->pidsuspended) {
3211 ss_flags |= kPidSuspended;
3212 }
3213 if (task->frozen) {
3214 ss_flags |= kFrozen;
3215 }
3216 if (task->effective_policy.tep_darwinbg == 1) {
3217 ss_flags |= kTaskDarwinBG;
3218 }
3219 if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
3220 ss_flags |= kTaskIsForeground;
3221 }
3222 if (task->requested_policy.trp_boosted == 1) {
3223 ss_flags |= kTaskIsBoosted;
3224 }
3225 if (task->effective_policy.tep_sup_active == 1) {
3226 ss_flags |= kTaskIsSuppressed;
3227 }
3228 #if CONFIG_MEMORYSTATUS
3229
3230 boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
3231 memorystatus_proc_flags_unsafe(bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
3232 if (dirty) {
3233 ss_flags |= kTaskIsDirty;
3234 }
3235 if (dirty_tracked) {
3236 ss_flags |= kTaskIsDirtyTracked;
3237 }
3238 if (allow_idle_exit) {
3239 ss_flags |= kTaskAllowIdleExit;
3240 }
3241
3242 #endif
3243 if (task->effective_policy.tep_tal_engaged) {
3244 ss_flags |= kTaskTALEngaged;
3245 }
3246
3247 ss_flags |= workqueue_get_task_ss_flags_from_pwq_state_kdp(bsd_info);
3248
3249 #if IMPORTANCE_INHERITANCE
3250 if (task->task_imp_base) {
3251 if (task->task_imp_base->iit_donor) {
3252 ss_flags |= kTaskIsImpDonor;
3253 }
3254 if (task->task_imp_base->iit_live_donor) {
3255 ss_flags |= kTaskIsLiveImpDonor;
3256 }
3257 }
3258 #endif
3259 return ss_flags;
3260 }
3261
3262 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)3263 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
3264 {
3265 kern_return_t error = KERN_SUCCESS;
3266
3267 uint64_t shared_cache_slide = 0;
3268 uint64_t shared_cache_first_mapping = 0;
3269 uint32_t kdp_fault_results = 0;
3270 uint32_t shared_cache_id = 0;
3271 struct dyld_shared_cache_loadinfo shared_cache_data = {0};
3272
3273
3274 assert(task_snap_ss_flags != NULL);
3275
3276 /* Get basic info about the shared region pointer, regardless of any failures */
3277 if (task->shared_region == NULL) {
3278 *task_snap_ss_flags |= kTaskSharedRegionNone;
3279 } else if (task->shared_region == primary_system_shared_region) {
3280 *task_snap_ss_flags |= kTaskSharedRegionSystem;
3281 } else {
3282 *task_snap_ss_flags |= kTaskSharedRegionOther;
3283 }
3284
3285 if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
3286 struct vm_shared_region *sr = task->shared_region;
3287 shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
3288
3289 shared_cache_id = sr->sr_id;
3290 } else {
3291 *task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
3292 goto error_exit;
3293 }
3294
3295 /* We haven't copied in the shared region UUID yet as part of setup */
3296 if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
3297 goto error_exit;
3298 }
3299
3300
3301 /*
3302 * No refcounting here, but we are in debugger context, so that should be safe.
3303 */
3304 shared_cache_slide = task->shared_region->sr_slide;
3305
3306 if (task->shared_region == primary_system_shared_region) {
3307 /* skip adding shared cache info -- it's the same as the system level one */
3308 goto error_exit;
3309 }
3310 /*
3311 * New-style shared cache reference: for non-primary shared regions,
3312 * just include the ID of the shared cache we're attached to. Consumers
3313 * should use the following info from the task's ts_ss_flags as well:
3314 *
3315 * kTaskSharedRegionNone - task is not attached to a shared region
3316 * kTaskSharedRegionSystem - task is attached to the shared region
3317 * with kSharedCacheSystemPrimary set in sharedCacheFlags.
3318 * kTaskSharedRegionOther - task is attached to the shared region with
3319 * sharedCacheID matching the STACKSHOT_KCTYPE_SHAREDCACHE_ID entry.
3320 */
3321 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_ID, sizeof(shared_cache_id), &shared_cache_id));
3322
3323 /*
3324 * For backwards compatibility; this should eventually be removed.
3325 *
3326 * Historically, this data was in a dyld_uuid_info_64 structure, but the
3327 * naming of both the structure and fields for this use wasn't great. The
3328 * dyld_shared_cache_loadinfo structure has better names, but the same
3329 * layout and content as the original.
3330 *
3331 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
3332 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
3333 * entries; here, it's the slid first mapping, and we leave it that way
3334 * for backwards compatibility.
3335 */
3336 shared_cache_data.sharedCacheSlide = shared_cache_slide;
3337 kdp_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
3338 shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
3339 shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
3340 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
3341
3342 error_exit:
3343 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3344 *task_snap_ss_flags |= kTaskUUIDInfoMissing;
3345 }
3346
3347 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3348 *task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3349 }
3350
3351 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3352 *task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3353 }
3354
3355 return error;
3356 }
3357
3358 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)3359 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
3360 {
3361 bool save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
3362 bool save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
3363 bool save_compactinfo_p = ((trace_flags & STACKSHOT_SAVE_DYLD_COMPACTINFO) != 0);
3364 bool should_fault = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
3365
3366 kern_return_t error = KERN_SUCCESS;
3367 mach_vm_address_t out_addr = 0;
3368
3369 mach_vm_address_t dyld_compactinfo_addr = 0;
3370 uint32_t dyld_compactinfo_size = 0;
3371
3372 uint32_t uuid_info_count = 0;
3373 mach_vm_address_t uuid_info_addr = 0;
3374 uint64_t uuid_info_timestamp = 0;
3375 #pragma unused(uuid_info_timestamp)
3376 kdp_fault_result_flags_t kdp_fault_results = 0;
3377
3378
3379 assert(task_snap_ss_flags != NULL);
3380
3381 int task_pid = pid_from_task(task);
3382 boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3383
3384 if ((save_loadinfo_p || save_compactinfo_p) && have_pmap && task->active && task_pid > 0) {
3385 /* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
3386 if (task_64bit_addr) {
3387 struct user64_dyld_all_image_infos task_image_infos;
3388 if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3389 sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3390 uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
3391 uuid_info_addr = task_image_infos.uuidArray;
3392 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3393 uuid_info_timestamp = task_image_infos.timestamp;
3394 }
3395 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3396 dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3397 dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3398 }
3399
3400 }
3401 } else {
3402 struct user32_dyld_all_image_infos task_image_infos;
3403 if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3404 sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3405 uuid_info_count = task_image_infos.uuidArrayCount;
3406 uuid_info_addr = task_image_infos.uuidArray;
3407 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3408 uuid_info_timestamp = task_image_infos.timestamp;
3409 }
3410 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3411 dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3412 dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3413 }
3414 }
3415 }
3416
3417 /*
3418 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
3419 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
3420 * for this task.
3421 */
3422 if (!uuid_info_addr) {
3423 uuid_info_count = 0;
3424 }
3425
3426 if (!dyld_compactinfo_addr) {
3427 dyld_compactinfo_size = 0;
3428 }
3429
3430 }
3431
3432 if (have_pmap && task_pid == 0) {
3433 if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
3434 uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
3435 } else {
3436 uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
3437 }
3438 }
3439
3440 if (save_compactinfo_p && task_pid > 0) {
3441 if (dyld_compactinfo_size == 0) {
3442 *task_snap_ss_flags |= kTaskDyldCompactInfoNone;
3443 } else if (dyld_compactinfo_size > MAX_DYLD_COMPACTINFO) {
3444 *task_snap_ss_flags |= kTaskDyldCompactInfoTooBig;
3445 } else {
3446 kdp_fault_result_flags_t ci_kdp_fault_results = 0;
3447
3448 /* Open a compression window to avoid overflowing the stack */
3449 kcdata_compression_window_open(kcd);
3450 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_DYLD_COMPACTINFO,
3451 dyld_compactinfo_size, &out_addr));
3452
3453 if (!stackshot_copyin(task->map, dyld_compactinfo_addr, (void *)out_addr,
3454 dyld_compactinfo_size, should_fault, &ci_kdp_fault_results)) {
3455 bzero((void *)out_addr, dyld_compactinfo_size);
3456 }
3457 if (ci_kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3458 *task_snap_ss_flags |= kTaskDyldCompactInfoMissing;
3459 }
3460
3461 if (ci_kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3462 *task_snap_ss_flags |= kTaskDyldCompactInfoTriedFault;
3463 }
3464
3465 if (ci_kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3466 *task_snap_ss_flags |= kTaskDyldCompactInfoFaultedIn;
3467 }
3468
3469 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3470 }
3471 }
3472 if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
3473 uint32_t copied_uuid_count = 0;
3474 uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
3475 uint32_t uuid_info_array_size = 0;
3476
3477 /* Open a compression window to avoid overflowing the stack */
3478 kcdata_compression_window_open(kcd);
3479
3480 /* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
3481 if (uuid_info_count > 0) {
3482 uuid_info_array_size = uuid_info_count * uuid_info_size;
3483
3484 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3485 uuid_info_size, uuid_info_count, &out_addr));
3486
3487 if (!stackshot_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
3488 bzero((void *)out_addr, uuid_info_array_size);
3489 } else {
3490 copied_uuid_count = uuid_info_count;
3491 }
3492 }
3493
3494 uuid_t binary_uuid;
3495 if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
3496 /* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
3497 if (uuid_info_array_size == 0) {
3498 /* We just need to store one UUID */
3499 uuid_info_array_size = uuid_info_size;
3500 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3501 uuid_info_size, 1, &out_addr));
3502 }
3503
3504 if (task_64bit_addr) {
3505 struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
3506 uint64_t image_load_address = task->mach_header_vm_address;
3507
3508 kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3509 kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3510 } else {
3511 struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
3512 uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
3513
3514 kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3515 kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3516 }
3517 }
3518
3519 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3520 } else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
3521 uintptr_t image_load_address;
3522
3523 do {
3524 #if defined(__arm64__)
3525 if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
3526 struct dyld_uuid_info_64 kc_uuid = {0};
3527 kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3528 kdp_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
3529 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
3530 break;
3531 }
3532 #endif /* defined(__arm64__) */
3533
3534 if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
3535 /* Kernel UUID not found or inaccessible */
3536 break;
3537 }
3538
3539 uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
3540 if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
3541 uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
3542 #if defined(__arm64__)
3543 kc_format_t primary_kc_type = KCFormatUnknown;
3544 if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
3545 /* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
3546 uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
3547 }
3548 #endif
3549 }
3550
3551 /*
3552 * The element count of the array can vary - avoid overflowing the
3553 * stack by opening a window.
3554 */
3555 kcdata_compression_window_open(kcd);
3556 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
3557 sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
3558 kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
3559
3560 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
3561 #if defined(__arm64__)
3562 if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3563 /* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3564 extern vm_offset_t segTEXTEXECB;
3565 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
3566 }
3567 #endif
3568 uuid_info_array[0].imageLoadAddress = image_load_address;
3569 kdp_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
3570
3571 if (save_kextloadinfo_p &&
3572 _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
3573 _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
3574 gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
3575 uint32_t kexti;
3576 for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
3577 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
3578 #if defined(__arm64__)
3579 if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3580 /* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3581 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
3582 }
3583 #endif
3584 uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
3585 kdp_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
3586 }
3587 }
3588 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3589 } while (0);
3590 }
3591
3592 error_exit:
3593 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3594 *task_snap_ss_flags |= kTaskUUIDInfoMissing;
3595 }
3596
3597 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3598 *task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3599 }
3600
3601 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3602 *task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3603 }
3604
3605 return error;
3606 }
3607
3608 uint64_t kdp_task_exec_meta_flags(task_t task);
3609
3610 uint64_t
kdp_task_exec_meta_flags(task_t task)3611 kdp_task_exec_meta_flags(task_t task)
3612 {
3613 uint64_t flags = 0;
3614
3615 #if CONFIG_ROSETTA
3616 if (task_is_translated(task)) {
3617 flags |= kTaskExecTranslated;
3618 }
3619 #endif /* CONFIG_ROSETTA */
3620
3621 if (task_has_hardened_heap(task)) {
3622 flags |= kTaskExecHardenedHeap;
3623 }
3624
3625
3626 return flags;
3627 }
3628
3629 /* Compute the set of flags that kdp_task_exec_meta_flags can return based on the kernel config */
3630 static uint64_t
stackshot_available_task_exec_flags(void)3631 stackshot_available_task_exec_flags(void)
3632 {
3633 uint64_t flags_mask = 0;
3634
3635 #if CONFIG_ROSETTA
3636 flags_mask |= kTaskExecTranslated;
3637 #endif /* CONFIG_ROSETTA */
3638
3639 flags_mask |= kTaskExecHardenedHeap;
3640
3641
3642 return flags_mask;
3643 }
3644
3645 static kern_return_t
kcdata_record_task_exec_meta(kcdata_descriptor_t kcd,task_t task)3646 kcdata_record_task_exec_meta(kcdata_descriptor_t kcd, task_t task)
3647 {
3648 struct task_exec_meta tem = {};
3649 kern_return_t error = KERN_SUCCESS;
3650
3651 tem.tem_flags = kdp_task_exec_meta_flags(task);
3652
3653 if (tem.tem_flags != 0) {
3654 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_EXEC_META, sizeof(struct task_exec_meta), &tem));
3655 }
3656
3657 error_exit:
3658 return error;
3659 }
3660
3661 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)3662 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
3663 {
3664 kern_return_t error = KERN_SUCCESS;
3665 mach_vm_address_t out_addr = 0;
3666
3667 /* I/O Statistics if any counters are non zero */
3668 assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
3669 if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
3670 /* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
3671 kcdata_compression_window_open(kcd);
3672 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
3673 struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
3674 _iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
3675 _iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
3676 _iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
3677 _iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
3678 _iostat->ss_paging_count = task->task_io_stats->paging.count;
3679 _iostat->ss_paging_size = task->task_io_stats->paging.size;
3680 _iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
3681 _iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
3682 _iostat->ss_metadata_count = task->task_io_stats->metadata.count;
3683 _iostat->ss_metadata_size = task->task_io_stats->metadata.size;
3684 _iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
3685 _iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
3686 for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
3687 _iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
3688 _iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
3689 }
3690 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3691 }
3692
3693
3694 error_exit:
3695 return error;
3696 }
3697
3698 #if CONFIG_PERVASIVE_CPI
3699 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)3700 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
3701 {
3702 struct instrs_cycles_snapshot_v2 instrs_cycles = { 0 };
3703 struct recount_usage usage = { 0 };
3704 struct recount_usage perf_only = { 0 };
3705 recount_task_terminated_usage_perf_only(task, &usage, &perf_only);
3706 instrs_cycles.ics_instructions = recount_usage_instructions(&usage);
3707 instrs_cycles.ics_cycles = recount_usage_cycles(&usage);
3708 instrs_cycles.ics_p_instructions = recount_usage_instructions(&perf_only);
3709 instrs_cycles.ics_p_cycles = recount_usage_cycles(&perf_only);
3710
3711 return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
3712 }
3713 #endif /* CONFIG_PERVASIVE_CPI */
3714
3715 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)3716 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
3717 {
3718 struct stackshot_cpu_architecture cpu_architecture = {0};
3719 int32_t cputype;
3720 int32_t cpusubtype;
3721
3722 proc_archinfo_kdp(get_bsdtask_info(task), &cputype, &cpusubtype);
3723 cpu_architecture.cputype = cputype;
3724 cpu_architecture.cpusubtype = cpusubtype;
3725
3726 return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
3727 }
3728
3729 static kern_return_t
kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd,task_t task)3730 kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd, task_t task)
3731 {
3732 struct stackshot_task_codesigning_info codesigning_info = {};
3733 void * bsdtask_info = NULL;
3734 uint32_t trust = 0;
3735 kern_return_t ret = 0;
3736 pmap_t pmap = get_task_pmap(task);
3737 uint64_t cs_auxiliary_info = 0;
3738 if (task != kernel_task) {
3739 bsdtask_info = get_bsdtask_info(task);
3740 codesigning_info.csflags = proc_getcsflags_kdp(bsdtask_info);
3741 ret = get_trust_level_kdp(pmap, &trust);
3742 if (ret != KERN_SUCCESS) {
3743 trust = KCDATA_INVALID_CS_TRUST_LEVEL;
3744 }
3745 codesigning_info.cs_trust_level = trust;
3746 cs_auxiliary_info = task_get_cs_auxiliary_info_kdp(task);
3747 } else {
3748 return KERN_SUCCESS;
3749 }
3750 ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_CODESIGNING_INFO, sizeof(struct stackshot_task_codesigning_info), &codesigning_info);
3751 if (ret != KERN_SUCCESS) {
3752 return ret;
3753 }
3754 return kcdata_push_data(kcd, TASK_CRASHINFO_CS_AUXILIARY_INFO, sizeof(cs_auxiliary_info), &cs_auxiliary_info);
3755 }
3756
3757 static kern_return_t
kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd,task_t task)3758 kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd, task_t task)
3759 {
3760 uint64_t jit_start_addr = 0;
3761 uint64_t jit_end_addr = 0;
3762 struct crashinfo_jit_address_range range = {};
3763 kern_return_t ret = 0;
3764 pmap_t pmap = get_task_pmap(task);
3765 if (task == kernel_task || NULL == pmap) {
3766 return KERN_SUCCESS;
3767 }
3768 ret = get_jit_address_range_kdp(pmap, (uintptr_t*)&jit_start_addr, (uintptr_t*)&jit_end_addr);
3769 if (KERN_SUCCESS == ret) {
3770 range.start_address = jit_start_addr;
3771 range.end_address = jit_end_addr;
3772 return kcdata_push_data(kcd, TASK_CRASHINFO_JIT_ADDRESS_RANGE, sizeof(struct crashinfo_jit_address_range), &range);
3773 } else {
3774 return KERN_SUCCESS;
3775 }
3776 }
3777
3778 #if CONFIG_TASK_SUSPEND_STATS
3779 static kern_return_t
kcdata_record_task_suspension_info(kcdata_descriptor_t kcd,task_t task)3780 kcdata_record_task_suspension_info(kcdata_descriptor_t kcd, task_t task)
3781 {
3782 kern_return_t ret = KERN_SUCCESS;
3783 struct stackshot_suspension_info suspension_info = {};
3784 task_suspend_stats_data_t suspend_stats;
3785 task_suspend_source_array_t suspend_sources;
3786 struct stackshot_suspension_source suspension_sources[TASK_SUSPEND_SOURCES_MAX];
3787 int i;
3788
3789 if (task == kernel_task) {
3790 return KERN_SUCCESS;
3791 }
3792
3793 ret = task_get_suspend_stats_kdp(task, &suspend_stats);
3794 if (ret != KERN_SUCCESS) {
3795 return ret;
3796 }
3797
3798 suspension_info.tss_count = suspend_stats.tss_count;
3799 suspension_info.tss_duration = suspend_stats.tss_duration;
3800 suspension_info.tss_last_end = suspend_stats.tss_last_end;
3801 suspension_info.tss_last_start = suspend_stats.tss_last_start;
3802 ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_SUSPENSION_INFO, sizeof(suspension_info), &suspension_info);
3803 if (ret != KERN_SUCCESS) {
3804 return ret;
3805 }
3806
3807 ret = task_get_suspend_sources_kdp(task, suspend_sources);
3808 if (ret != KERN_SUCCESS) {
3809 return ret;
3810 }
3811
3812 for (i = 0; i < TASK_SUSPEND_SOURCES_MAX; ++i) {
3813 suspension_sources[i].tss_pid = suspend_sources[i].tss_pid;
3814 strlcpy(suspension_sources[i].tss_procname, suspend_sources[i].tss_procname, sizeof(suspend_sources[i].tss_procname));
3815 suspension_sources[i].tss_tid = suspend_sources[i].tss_tid;
3816 suspension_sources[i].tss_time = suspend_sources[i].tss_time;
3817 }
3818 return kcdata_push_array(kcd, STACKSHOT_KCTYPE_SUSPENSION_SOURCE, sizeof(suspension_sources[0]), TASK_SUSPEND_SOURCES_MAX, &suspension_sources);
3819 }
3820 #endif /* CONFIG_TASK_SUSPEND_STATS */
3821
3822 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)3823 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
3824 {
3825 kern_return_t error = KERN_SUCCESS;
3826 mach_vm_address_t out_addr = 0;
3827 struct transitioning_task_snapshot * cur_tsnap = NULL;
3828
3829 int task_pid = pid_from_task(task);
3830 /* Is returning -1 ok for terminating task ok ??? */
3831 uint64_t task_uniqueid = get_task_uniqueid(task);
3832
3833 if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3834 /*
3835 * if this task is a transit task from another one, show the pid as
3836 * negative
3837 */
3838 task_pid = 0 - task_pid;
3839 }
3840
3841 /* the task_snapshot_v2 struct is large - avoid overflowing the stack */
3842 kcdata_compression_window_open(kcd);
3843 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
3844 cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
3845 bzero(cur_tsnap, sizeof(*cur_tsnap));
3846
3847 cur_tsnap->tts_unique_pid = task_uniqueid;
3848 cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task);
3849 cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
3850 cur_tsnap->tts_transition_type = transition_type;
3851 cur_tsnap->tts_pid = task_pid;
3852
3853 /* Add the BSD process identifiers */
3854 if (task_pid != -1 && get_bsdtask_info(task) != NULL) {
3855 proc_name_kdp(get_bsdtask_info(task), cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
3856 } else {
3857 cur_tsnap->tts_p_comm[0] = '\0';
3858 }
3859
3860 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3861
3862 error_exit:
3863 return error;
3864 }
3865
3866 static kern_return_t
3867 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)3868 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
3869 #else
3870 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
3871 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3872 {
3873 bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3874 bool collect_iostats = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
3875 #if CONFIG_PERVASIVE_CPI
3876 bool collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
3877 #endif /* CONFIG_PERVASIVE_CPI */
3878 #if __arm64__
3879 bool collect_asid = ((trace_flags & STACKSHOT_ASID) != 0);
3880 #endif
3881 bool collect_pagetables = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
3882
3883
3884 kern_return_t error = KERN_SUCCESS;
3885 mach_vm_address_t out_addr = 0;
3886 struct task_snapshot_v2 * cur_tsnap = NULL;
3887 #if STACKSHOT_COLLECTS_LATENCY_INFO
3888 latency_info->cur_tsnap_latency = mach_absolute_time();
3889 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3890
3891 int task_pid = pid_from_task(task);
3892 uint64_t task_uniqueid = get_task_uniqueid(task);
3893 void *bsd_info = get_bsdtask_info(task);
3894 uint64_t proc_starttime_secs = 0;
3895
3896 if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3897 /*
3898 * if this task is a transit task from another one, show the pid as
3899 * negative
3900 */
3901 task_pid = 0 - task_pid;
3902 }
3903
3904 /* the task_snapshot_v2 struct is large - avoid overflowing the stack */
3905 kcdata_compression_window_open(kcd);
3906 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
3907 cur_tsnap = (struct task_snapshot_v2 *)out_addr;
3908 bzero(cur_tsnap, sizeof(*cur_tsnap));
3909
3910 cur_tsnap->ts_unique_pid = task_uniqueid;
3911 cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
3912 cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
3913
3914 struct recount_usage term_usage = { 0 };
3915 recount_task_terminated_usage(task, &term_usage);
3916 struct recount_times_mach term_times = recount_usage_times_mach(&term_usage);
3917 cur_tsnap->ts_user_time_in_terminated_threads = term_times.rtm_user;
3918 cur_tsnap->ts_system_time_in_terminated_threads = term_times.rtm_system;
3919
3920 proc_starttime_kdp(bsd_info, &proc_starttime_secs, NULL, NULL);
3921 cur_tsnap->ts_p_start_sec = proc_starttime_secs;
3922 cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
3923 cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
3924 cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
3925 cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
3926
3927 cur_tsnap->ts_suspend_count = task->suspend_count;
3928 cur_tsnap->ts_faults = counter_load(&task->faults);
3929 cur_tsnap->ts_pageins = counter_load(&task->pageins);
3930 cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
3931 cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
3932 LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
3933 cur_tsnap->ts_pid = task_pid;
3934
3935 /* Add the BSD process identifiers */
3936 if (task_pid != -1 && bsd_info != NULL) {
3937 proc_name_kdp(bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
3938 } else {
3939 cur_tsnap->ts_p_comm[0] = '\0';
3940 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
3941 if (task->task_imp_base != NULL) {
3942 kdp_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
3943 MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
3944 }
3945 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
3946 }
3947
3948 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3949
3950 #if CONFIG_COALITIONS
3951 if (task_pid != -1 && bsd_info != NULL &&
3952 (task->coalition[COALITION_TYPE_JETSAM] != NULL)) {
3953 /*
3954 * The jetsam coalition ID is always saved, even if
3955 * STACKSHOT_SAVE_JETSAM_COALITIONS is not set.
3956 */
3957 uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
3958 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
3959 }
3960 #endif /* CONFIG_COALITIONS */
3961
3962 #if __arm64__
3963 if (collect_asid && have_pmap) {
3964 uint32_t asid = PMAP_VASID(task->map->pmap);
3965 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
3966 }
3967 #endif
3968
3969 #if STACKSHOT_COLLECTS_LATENCY_INFO
3970 latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
3971 latency_info->pmap_latency = mach_absolute_time();
3972 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3973
3974 if (collect_pagetables && have_pmap) {
3975 #if SCHED_HYGIENE_DEBUG
3976 // pagetable dumps can be large; reset the interrupt timeout to avoid a panic
3977 ml_spin_debug_clear_self();
3978 #endif
3979 assert(stackshot_ctx.sc_is_singlethreaded);
3980 size_t bytes_dumped = 0;
3981 error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stackshot_args.pagetable_mask, &bytes_dumped);
3982 if (error != KERN_SUCCESS) {
3983 goto error_exit;
3984 } else {
3985 /* Variable size array - better not have it on the stack. */
3986 kcdata_compression_window_open(kcd);
3987 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
3988 sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
3989 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3990 }
3991 }
3992
3993 #if STACKSHOT_COLLECTS_LATENCY_INFO
3994 latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
3995 latency_info->bsd_proc_ids_latency = mach_absolute_time();
3996 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3997
3998 #if STACKSHOT_COLLECTS_LATENCY_INFO
3999 latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
4000 latency_info->end_latency = mach_absolute_time();
4001 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4002
4003 if (collect_iostats) {
4004 kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
4005 }
4006
4007 #if CONFIG_PERVASIVE_CPI
4008 if (collect_instrs_cycles) {
4009 kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
4010 }
4011 #endif /* CONFIG_PERVASIVE_CPI */
4012
4013 kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
4014 kcd_exit_on_error(kcdata_record_task_codesigning_info(kcd, task));
4015 kcd_exit_on_error(kcdata_record_task_jit_address_range(kcd, task));
4016
4017 #if CONFIG_TASK_SUSPEND_STATS
4018 kcd_exit_on_error(kcdata_record_task_suspension_info(kcd, task));
4019 #endif /* CONFIG_TASK_SUSPEND_STATS */
4020
4021 #if STACKSHOT_COLLECTS_LATENCY_INFO
4022 latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
4023 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4024
4025 error_exit:
4026 return error;
4027 }
4028
4029 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)4030 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
4031 {
4032 #if !CONFIG_PERVASIVE_CPI
4033 #pragma unused(trace_flags)
4034 #endif /* !CONFIG_PERVASIVE_CPI */
4035 kern_return_t error = KERN_SUCCESS;
4036 struct task_delta_snapshot_v2 * cur_tsnap = NULL;
4037 mach_vm_address_t out_addr = 0;
4038 (void) trace_flags;
4039 #if __arm64__
4040 boolean_t collect_asid = ((trace_flags & STACKSHOT_ASID) != 0);
4041 #endif
4042 #if CONFIG_PERVASIVE_CPI
4043 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
4044 #endif /* CONFIG_PERVASIVE_CPI */
4045
4046 uint64_t task_uniqueid = get_task_uniqueid(task);
4047
4048 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
4049
4050 cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
4051
4052 cur_tsnap->tds_unique_pid = task_uniqueid;
4053 cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
4054 cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
4055
4056 struct recount_usage usage = { 0 };
4057 recount_task_terminated_usage(task, &usage);
4058 struct recount_times_mach term_times = recount_usage_times_mach(&usage);
4059
4060 cur_tsnap->tds_user_time_in_terminated_threads = term_times.rtm_user;
4061 cur_tsnap->tds_system_time_in_terminated_threads = term_times.rtm_system;
4062
4063 cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
4064
4065 cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
4066 cur_tsnap->tds_suspend_count = task->suspend_count;
4067 cur_tsnap->tds_faults = counter_load(&task->faults);
4068 cur_tsnap->tds_pageins = counter_load(&task->pageins);
4069 cur_tsnap->tds_cow_faults = counter_load(&task->cow_faults);
4070 cur_tsnap->tds_was_throttled = (uint32_t)proc_was_throttled_from_task(task);
4071 cur_tsnap->tds_did_throttle = (uint32_t)proc_did_throttle_from_task(task);
4072 cur_tsnap->tds_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
4073 ? LATENCY_QOS_TIER_UNSPECIFIED
4074 : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
4075
4076 #if __arm64__
4077 if (collect_asid && have_pmap) {
4078 uint32_t asid = PMAP_VASID(task->map->pmap);
4079 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
4080 kdp_memcpy((void*)out_addr, &asid, sizeof(asid));
4081 }
4082 #endif
4083
4084 #if CONFIG_PERVASIVE_CPI
4085 if (collect_instrs_cycles) {
4086 kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
4087 }
4088 #endif /* CONFIG_PERVASIVE_CPI */
4089
4090 error_exit:
4091 return error;
4092 }
4093
4094 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)4095 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
4096 {
4097 kern_return_t error = KERN_SUCCESS;
4098 mach_vm_address_t out_addr = 0;
4099
4100 /* I/O Statistics */
4101 assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
4102 if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
4103 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
4104 struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
4105 _iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
4106 _iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
4107 _iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
4108 _iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
4109 _iostat->ss_paging_count = thread->thread_io_stats->paging.count;
4110 _iostat->ss_paging_size = thread->thread_io_stats->paging.size;
4111 _iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
4112 _iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
4113 _iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
4114 _iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
4115 _iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
4116 _iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
4117 for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
4118 _iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
4119 _iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
4120 }
4121 }
4122
4123 error_exit:
4124 return error;
4125 }
4126
4127 bool
machine_trace_thread_validate_kva(vm_offset_t addr)4128 machine_trace_thread_validate_kva(vm_offset_t addr)
4129 {
4130 return _stackshot_validate_kva(addr, sizeof(uintptr_t));
4131 }
4132
4133 struct _stackshot_backtrace_context {
4134 vm_map_t sbc_map;
4135 vm_offset_t sbc_prev_page;
4136 vm_offset_t sbc_prev_kva;
4137 uint32_t sbc_flags;
4138 bool sbc_allow_faulting;
4139 };
4140
4141 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)4142 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
4143 {
4144 struct _stackshot_backtrace_context *ctx = vctx;
4145 size_t map_page_mask = 0;
4146 size_t __assert_only map_page_size = kdp_vm_map_get_page_size(ctx->sbc_map,
4147 &map_page_mask);
4148 assert(size < map_page_size);
4149 if (src & (size - 1)) {
4150 // The source should be aligned to the size passed in, like a stack
4151 // frame or word.
4152 return EINVAL;
4153 }
4154
4155 vm_offset_t src_page = src & ~map_page_mask;
4156 vm_offset_t src_kva = 0;
4157
4158 if (src_page != ctx->sbc_prev_page) {
4159 uint32_t res = 0;
4160 uint32_t flags = 0;
4161 vm_offset_t src_pa = stackshot_find_phys(ctx->sbc_map, src,
4162 ctx->sbc_allow_faulting, &res);
4163
4164 flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
4165 flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
4166 flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
4167 ctx->sbc_flags |= flags;
4168 if (src_pa == 0) {
4169 return EFAULT;
4170 }
4171
4172 src_kva = phystokv(src_pa);
4173 ctx->sbc_prev_page = src_page;
4174 ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
4175 } else {
4176 src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
4177 }
4178
4179 #if KASAN
4180 /*
4181 * KASan does not monitor accesses to userspace pages. Therefore, it is
4182 * pointless to maintain a shadow map for them. Instead, they are all
4183 * mapped to a single, always valid shadow map page. This approach saves
4184 * a considerable amount of shadow map pages which are limited and
4185 * precious.
4186 */
4187 kasan_notify_address_nopoison(src_kva, size);
4188 #endif
4189
4190 memcpy(dst, (const void *)src_kva, size);
4191
4192 return 0;
4193 }
4194
4195 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)4196 kcdata_record_thread_snapshot(kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
4197 {
4198 boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
4199 boolean_t active_kthreads_only_p = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4200 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4201 boolean_t collect_iostats = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
4202 #if CONFIG_PERVASIVE_CPI
4203 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
4204 #endif /* CONFIG_PERVASIVE_CPI */
4205 kern_return_t error = KERN_SUCCESS;
4206
4207 #if STACKSHOT_COLLECTS_LATENCY_INFO
4208 struct stackshot_latency_thread latency_info;
4209 latency_info.cur_thsnap1_latency = mach_absolute_time();
4210 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4211
4212 mach_vm_address_t out_addr = 0;
4213 int saved_count = 0;
4214
4215 struct thread_snapshot_v4 * cur_thread_snap = NULL;
4216 char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
4217
4218 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
4219 cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
4220
4221 /* Populate the thread snapshot header */
4222 cur_thread_snap->ths_ss_flags = 0;
4223 cur_thread_snap->ths_thread_id = thread_tid(thread);
4224 cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
4225 cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
4226 cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
4227
4228 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4229 cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4230 } else {
4231 cur_thread_snap->ths_voucher_identifier = 0;
4232 }
4233
4234 #if STACKSHOT_COLLECTS_LATENCY_INFO
4235 latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
4236 latency_info.dispatch_serial_latency = mach_absolute_time();
4237 latency_info.dispatch_label_latency = 0;
4238 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4239
4240 cur_thread_snap->ths_dqserialnum = 0;
4241 if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
4242 uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
4243 if (dqkeyaddr != 0) {
4244 uint64_t dqaddr = 0;
4245 boolean_t copyin_ok = stackshot_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
4246 if (copyin_ok && dqaddr != 0) {
4247 uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
4248 uint64_t dqserialnum = 0;
4249 copyin_ok = stackshot_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
4250 if (copyin_ok) {
4251 cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
4252 cur_thread_snap->ths_dqserialnum = dqserialnum;
4253 }
4254
4255 #if STACKSHOT_COLLECTS_LATENCY_INFO
4256 latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
4257 latency_info.dispatch_label_latency = mach_absolute_time();
4258 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4259
4260 /* try copying in the queue label */
4261 uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
4262 if (label_offs) {
4263 uint64_t dqlabeladdr = dqaddr + label_offs;
4264 uint64_t actual_dqlabeladdr = 0;
4265
4266 copyin_ok = stackshot_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
4267 if (copyin_ok && actual_dqlabeladdr != 0) {
4268 char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
4269 int len;
4270
4271 bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
4272 len = stackshot_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
4273 if (len > 0) {
4274 mach_vm_address_t label_addr = 0;
4275 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
4276 kdp_strlcpy((char*)label_addr, &label_buf[0], len);
4277 }
4278 }
4279 }
4280 #if STACKSHOT_COLLECTS_LATENCY_INFO
4281 latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
4282 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4283 }
4284 }
4285 }
4286
4287 #if STACKSHOT_COLLECTS_LATENCY_INFO
4288 if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
4289 latency_info.dispatch_serial_latency = 0;
4290 }
4291 latency_info.cur_thsnap2_latency = mach_absolute_time();
4292 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4293
4294 struct recount_times_mach times = recount_thread_times(thread);
4295 cur_thread_snap->ths_user_time = times.rtm_user;
4296 cur_thread_snap->ths_sys_time = times.rtm_system;
4297
4298 if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
4299 cur_thread_snap->ths_ss_flags |= kThreadMain;
4300 }
4301 if (thread->effective_policy.thep_darwinbg) {
4302 cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
4303 }
4304 if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4305 cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
4306 }
4307 if (thread->suspend_count > 0) {
4308 cur_thread_snap->ths_ss_flags |= kThreadSuspended;
4309 }
4310 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4311 cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
4312 }
4313 #if CONFIG_EXCLAVES
4314 /* save exclave thread for later collection */
4315 if ((thread->th_exclaves_state & TH_EXCLAVES_RPC) && stackshot_exclave_inspect_ctids && !stackshot_ctx.sc_panic_stackshot) {
4316 /* certain threads, like the collector, must never be inspected */
4317 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) == 0) {
4318 uint32_t ctid_index = os_atomic_inc_orig(&stackshot_exclave_inspect_ctid_count, acq_rel);
4319 if (ctid_index < stackshot_exclave_inspect_ctid_capacity) {
4320 stackshot_exclave_inspect_ctids[ctid_index] = thread_get_ctid(thread);
4321 } else {
4322 os_atomic_store(&stackshot_exclave_inspect_ctid_count, stackshot_exclave_inspect_ctid_capacity, release);
4323 }
4324 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_STACKSHOT) != 0) {
4325 panic("stackshot: trying to inspect already-queued thread");
4326 }
4327 }
4328 }
4329 #endif /* CONFIG_EXCLAVES */
4330 if (thread_on_core) {
4331 cur_thread_snap->ths_ss_flags |= kThreadOnCore;
4332 }
4333 if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4334 cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
4335 }
4336
4337 /* make sure state flags defined in kcdata.h still match internal flags */
4338 static_assert(SS_TH_WAIT == TH_WAIT);
4339 static_assert(SS_TH_SUSP == TH_SUSP);
4340 static_assert(SS_TH_RUN == TH_RUN);
4341 static_assert(SS_TH_UNINT == TH_UNINT);
4342 static_assert(SS_TH_TERMINATE == TH_TERMINATE);
4343 static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
4344 static_assert(SS_TH_IDLE == TH_IDLE);
4345
4346 cur_thread_snap->ths_last_run_time = thread->last_run_time;
4347 cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
4348 cur_thread_snap->ths_state = thread->state;
4349 cur_thread_snap->ths_sched_flags = thread->sched_flags;
4350 cur_thread_snap->ths_base_priority = thread->base_pri;
4351 cur_thread_snap->ths_sched_priority = thread->sched_pri;
4352 cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
4353 cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
4354 cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
4355 thread->requested_policy.thrp_qos_workq_override);
4356 cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4357 cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
4358
4359 static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4360 static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4361 cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4362 cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4363
4364 #if STACKSHOT_COLLECTS_LATENCY_INFO
4365 latency_info.cur_thsnap2_latency = mach_absolute_time() - latency_info.cur_thsnap2_latency;
4366 latency_info.thread_name_latency = mach_absolute_time();
4367 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4368
4369 /* if there is thread name then add to buffer */
4370 cur_thread_name[0] = '\0';
4371 proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
4372 if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
4373 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
4374 kdp_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
4375 }
4376
4377 #if STACKSHOT_COLLECTS_LATENCY_INFO
4378 latency_info.thread_name_latency = mach_absolute_time() - latency_info.thread_name_latency;
4379 latency_info.sur_times_latency = mach_absolute_time();
4380 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4381
4382 /* record system, user, and runnable times */
4383 time_value_t runnable_time;
4384 thread_read_times(thread, NULL, NULL, &runnable_time);
4385 clock_sec_t user_sec = 0, system_sec = 0;
4386 clock_usec_t user_usec = 0, system_usec = 0;
4387 absolutetime_to_microtime(times.rtm_user, &user_sec, &user_usec);
4388 absolutetime_to_microtime(times.rtm_system, &system_sec, &system_usec);
4389
4390 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
4391 struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
4392 *stackshot_cpu_times = (struct stackshot_cpu_times_v2){
4393 .user_usec = user_sec * USEC_PER_SEC + user_usec,
4394 .system_usec = system_sec * USEC_PER_SEC + system_usec,
4395 .runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
4396 };
4397
4398 #if STACKSHOT_COLLECTS_LATENCY_INFO
4399 latency_info.sur_times_latency = mach_absolute_time() - latency_info.sur_times_latency;
4400 latency_info.user_stack_latency = mach_absolute_time();
4401 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4402
4403 /* Trace user stack, if any */
4404 if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
4405 uint32_t user_ths_ss_flags = 0;
4406
4407 /*
4408 * We don't know how big the stacktrace will be, so read it into our
4409 * per-cpu buffer, then copy it to the kcdata.
4410 */
4411 struct _stackshot_backtrace_context ctx = {
4412 .sbc_map = task->map,
4413 .sbc_allow_faulting = stackshot_ctx.sc_enable_faulting,
4414 .sbc_prev_page = -1,
4415 .sbc_prev_kva = -1,
4416 };
4417 struct backtrace_control ctl = {
4418 .btc_user_thread = thread,
4419 .btc_user_copy = _stackshot_backtrace_copy,
4420 .btc_user_copy_context = &ctx,
4421 };
4422 struct backtrace_user_info info = BTUINFO_INIT;
4423
4424 saved_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4425 &info);
4426 if (saved_count > 0) {
4427 #if __LP64__
4428 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
4429 #else // __LP64__
4430 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
4431 #endif // !__LP64__
4432 /* Now, copy the stacktrace into kcdata. */
4433 kcd_exit_on_error(kcdata_push_array(kcd, STACKLR_WORDS, sizeof(uintptr_t),
4434 saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4435 if (info.btui_info & BTI_64_BIT) {
4436 user_ths_ss_flags |= kUser64_p;
4437 }
4438 if ((info.btui_info & BTI_TRUNCATED) ||
4439 (ctx.sbc_flags & kThreadTruncatedBT)) {
4440 user_ths_ss_flags |= kThreadTruncatedBT;
4441 user_ths_ss_flags |= kThreadTruncUserBT;
4442 }
4443 user_ths_ss_flags |= ctx.sbc_flags;
4444 ctx.sbc_flags = 0;
4445 #if __LP64__
4446 /* We only support async stacks on 64-bit kernels */
4447 if (info.btui_async_frame_addr != 0) {
4448 uint32_t async_start_offset = info.btui_async_start_index;
4449 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
4450 sizeof(async_start_offset), &async_start_offset));
4451 ctl.btc_frame_addr = info.btui_async_frame_addr;
4452 ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
4453 info = BTUINFO_INIT;
4454 unsigned int async_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4455 &info);
4456 if (async_count > 0) {
4457 kcd_exit_on_error(kcdata_push_array(kcd, STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64,
4458 sizeof(uintptr_t), async_count, stackshot_cpu_ctx.scc_stack_buffer));
4459 if ((info.btui_info & BTI_TRUNCATED) ||
4460 (ctx.sbc_flags & kThreadTruncatedBT)) {
4461 user_ths_ss_flags |= kThreadTruncatedBT;
4462 user_ths_ss_flags |= kThreadTruncUserAsyncBT;
4463 }
4464 user_ths_ss_flags |= ctx.sbc_flags;
4465 }
4466 }
4467 #endif /* _LP64 */
4468 }
4469 if (user_ths_ss_flags != 0) {
4470 cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
4471 }
4472 }
4473
4474 #if STACKSHOT_COLLECTS_LATENCY_INFO
4475 latency_info.user_stack_latency = mach_absolute_time() - latency_info.user_stack_latency;
4476 latency_info.kernel_stack_latency = mach_absolute_time();
4477 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4478
4479 /* Call through to the machine specific trace routines
4480 * Frames are added past the snapshot header.
4481 */
4482 if (thread->kernel_stack != 0) {
4483 uint32_t kern_ths_ss_flags = 0;
4484 #if defined(__LP64__)
4485 uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
4486 extern int machine_trace_thread64(thread_t thread, char *tracepos,
4487 char *tracebound, int nframes, uint32_t *thread_trace_flags);
4488 saved_count = machine_trace_thread64(
4489 #else
4490 uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
4491 extern int machine_trace_thread(thread_t thread, char *tracepos,
4492 char *tracebound, int nframes, uint32_t *thread_trace_flags);
4493 saved_count = machine_trace_thread(
4494 #endif
4495 thread, (char*) stackshot_cpu_ctx.scc_stack_buffer,
4496 (char *) (stackshot_cpu_ctx.scc_stack_buffer + MAX_FRAMES), MAX_FRAMES,
4497 &kern_ths_ss_flags);
4498 if (saved_count > 0) {
4499 int frame_size = sizeof(uintptr_t);
4500 #if defined(__LP64__)
4501 cur_thread_snap->ths_ss_flags |= kKernel64_p;
4502 #endif
4503 #if CONFIG_EXCLAVES
4504 if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
4505 struct thread_exclaves_info info = { 0 };
4506
4507 info.tei_flags = kExclaveRPCActive;
4508 if (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) {
4509 info.tei_flags |= kExclaveSchedulerRequest;
4510 }
4511 if (thread->th_exclaves_state & TH_EXCLAVES_UPCALL) {
4512 info.tei_flags |= kExclaveUpcallActive;
4513 }
4514 info.tei_scid = thread->th_exclaves_ipc_ctx.scid;
4515 info.tei_thread_offset = exclaves_stack_offset(stackshot_cpu_ctx.scc_stack_buffer, saved_count / frame_size, false);
4516
4517 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERN_EXCLAVES_THREADINFO, sizeof(struct thread_exclaves_info), &info));
4518 }
4519 #endif /* CONFIG_EXCLAVES */
4520 kcd_exit_on_error(kcdata_push_array(kcd, stack_kcdata_type,
4521 frame_size, saved_count / frame_size, stackshot_cpu_ctx.scc_stack_buffer));
4522 }
4523 if (kern_ths_ss_flags & kThreadTruncatedBT) {
4524 kern_ths_ss_flags |= kThreadTruncKernBT;
4525 }
4526 if (kern_ths_ss_flags != 0) {
4527 cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
4528 }
4529 }
4530
4531 #if STACKSHOT_COLLECTS_LATENCY_INFO
4532 latency_info.kernel_stack_latency = mach_absolute_time() - latency_info.kernel_stack_latency;
4533 latency_info.misc_latency = mach_absolute_time();
4534 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4535
4536 #if CONFIG_THREAD_GROUPS
4537 if (trace_flags & STACKSHOT_THREAD_GROUP) {
4538 uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
4539 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
4540 kdp_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
4541 }
4542 #endif /* CONFIG_THREAD_GROUPS */
4543
4544 if (collect_iostats) {
4545 kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
4546 }
4547
4548 #if CONFIG_PERVASIVE_CPI
4549 if (collect_instrs_cycles) {
4550 struct recount_usage usage = { 0 };
4551 recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
4552 &usage);
4553
4554 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
4555 struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
4556 instrs_cycles->ics_instructions = recount_usage_instructions(&usage);
4557 instrs_cycles->ics_cycles = recount_usage_cycles(&usage);
4558 }
4559 #endif /* CONFIG_PERVASIVE_CPI */
4560
4561 #if STACKSHOT_COLLECTS_LATENCY_INFO
4562 latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4563 if (collect_latency_info) {
4564 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
4565 }
4566 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4567
4568 error_exit:
4569 return error;
4570 }
4571
4572 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)4573 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
4574 {
4575 cur_thread_snap->tds_thread_id = thread_tid(thread);
4576 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4577 cur_thread_snap->tds_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4578 } else {
4579 cur_thread_snap->tds_voucher_identifier = 0;
4580 }
4581
4582 cur_thread_snap->tds_ss_flags = 0;
4583 if (thread->effective_policy.thep_darwinbg) {
4584 cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
4585 }
4586 if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4587 cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
4588 }
4589 if (thread->suspend_count > 0) {
4590 cur_thread_snap->tds_ss_flags |= kThreadSuspended;
4591 }
4592 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4593 cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
4594 }
4595 if (thread_on_core) {
4596 cur_thread_snap->tds_ss_flags |= kThreadOnCore;
4597 }
4598 if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4599 cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
4600 }
4601
4602 cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
4603 cur_thread_snap->tds_state = thread->state;
4604 cur_thread_snap->tds_sched_flags = thread->sched_flags;
4605 cur_thread_snap->tds_base_priority = thread->base_pri;
4606 cur_thread_snap->tds_sched_priority = thread->sched_pri;
4607 cur_thread_snap->tds_eqos = thread->effective_policy.thep_qos;
4608 cur_thread_snap->tds_rqos = thread->requested_policy.thrp_qos;
4609 cur_thread_snap->tds_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
4610 thread->requested_policy.thrp_qos_workq_override);
4611 cur_thread_snap->tds_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4612
4613 static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4614 static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4615 cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4616 cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4617
4618 return 0;
4619 }
4620
4621 /*
4622 * Why 12? 12 strikes a decent balance between allocating a large array on
4623 * the stack and having large kcdata item overheads for recording nonrunable
4624 * tasks.
4625 */
4626 #define UNIQUEIDSPERFLUSH 12
4627
4628 struct saved_uniqueids {
4629 uint64_t ids[UNIQUEIDSPERFLUSH];
4630 unsigned count;
4631 };
4632
4633 enum thread_classification {
4634 tc_full_snapshot, /* take a full snapshot */
4635 tc_delta_snapshot, /* take a delta snapshot */
4636 };
4637
4638 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)4639 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
4640 {
4641 processor_t last_processor = thread->last_processor;
4642
4643 boolean_t thread_on_core = FALSE;
4644 if (last_processor != PROCESSOR_NULL) {
4645 /* Idle threads are always treated as on-core, since the processor state can change while they are running. */
4646 thread_on_core = (thread == last_processor->idle_thread) ||
4647 (last_processor->state == PROCESSOR_RUNNING &&
4648 last_processor->active_thread == thread);
4649 }
4650
4651 *thread_on_core_p = thread_on_core;
4652
4653 /* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
4654 * previous full stackshot */
4655 if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stackshot_args.since_timestamp)) {
4656 return tc_full_snapshot;
4657 } else {
4658 return tc_delta_snapshot;
4659 }
4660 }
4661
4662
4663 static kern_return_t
kdp_stackshot_record_task(task_t task)4664 kdp_stackshot_record_task(task_t task)
4665 {
4666 boolean_t active_kthreads_only_p = ((stackshot_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4667 boolean_t save_donating_pids_p = ((stackshot_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
4668 boolean_t collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4669 boolean_t save_owner_info = ((stackshot_flags & STACKSHOT_THREAD_WAITINFO) != 0);
4670 boolean_t include_drivers = ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) != 0);
4671
4672 kern_return_t error = KERN_SUCCESS;
4673 mach_vm_address_t out_addr = 0;
4674 int saved_count = 0;
4675
4676 int task_pid = 0;
4677 uint64_t task_uniqueid = 0;
4678 int num_delta_thread_snapshots = 0;
4679 int num_waitinfo_threads = 0;
4680 int num_turnstileinfo_threads = 0;
4681
4682 uint64_t task_start_abstime = 0;
4683 boolean_t have_map = FALSE, have_pmap = FALSE;
4684 boolean_t some_thread_ran = FALSE;
4685 unaligned_u64 task_snap_ss_flags = 0;
4686 #if STACKSHOT_COLLECTS_LATENCY_INFO
4687 struct stackshot_latency_task latency_info;
4688 latency_info.setup_latency = mach_absolute_time();
4689 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4690
4691 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4692 uint64_t task_begin_cpu_cycle_count = 0;
4693 if (!stackshot_ctx.sc_panic_stackshot) {
4694 task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
4695 }
4696 #endif
4697
4698 if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
4699 error = KERN_FAILURE;
4700 goto error_exit;
4701 }
4702
4703 void *bsd_info = get_bsdtask_info(task);
4704 boolean_t task_in_teardown = (bsd_info == NULL) || proc_in_teardown(bsd_info);// has P_LPEXIT set during proc_exit()
4705 boolean_t task_in_transition = task_in_teardown; // here we can add other types of transition.
4706 uint32_t container_type = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
4707 uint32_t transition_type = (task_in_teardown) ? kTaskIsTerminated : 0;
4708
4709 if (task_in_transition) {
4710 collect_delta_stackshot = FALSE;
4711 }
4712
4713 have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
4714 have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
4715
4716 task_pid = pid_from_task(task);
4717 /* Is returning -1 ok for terminating task ok ??? */
4718 task_uniqueid = get_task_uniqueid(task);
4719
4720 if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
4721 /*
4722 * Not interested in terminated tasks without threads.
4723 */
4724 if (queue_empty(&task->threads) || task_pid == -1) {
4725 return KERN_SUCCESS;
4726 }
4727 }
4728
4729 /* All PIDs should have the MSB unset */
4730 assert((task_pid & (1ULL << 31)) == 0);
4731
4732 #if STACKSHOT_COLLECTS_LATENCY_INFO
4733 latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
4734 latency_info.task_uniqueid = task_uniqueid;
4735 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4736
4737 /* Trace everything, unless a process was specified. Add in driver tasks if requested. */
4738 if ((stackshot_args.pid == -1) || (stackshot_args.pid == task_pid) || (include_drivers && task_is_driver(task))) {
4739 #if STACKSHOT_COLLECTS_LATENCY_INFO
4740 stackshot_cpu_latency.tasks_processed++;
4741 #endif
4742
4743 /* add task snapshot marker */
4744 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4745 container_type, task_uniqueid));
4746
4747 if (collect_delta_stackshot) {
4748 /*
4749 * For delta stackshots we need to know if a thread from this task has run since the
4750 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
4751 */
4752 thread_t thread = THREAD_NULL;
4753 queue_iterate(&task->threads, thread, thread_t, task_threads)
4754 {
4755 if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4756 error = KERN_FAILURE;
4757 goto error_exit;
4758 }
4759
4760 if (active_kthreads_only_p && thread->kernel_stack == 0) {
4761 continue;
4762 }
4763
4764 boolean_t thread_on_core;
4765 enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4766
4767 switch (thread_classification) {
4768 case tc_full_snapshot:
4769 some_thread_ran = TRUE;
4770 break;
4771 case tc_delta_snapshot:
4772 num_delta_thread_snapshots++;
4773 break;
4774 }
4775 }
4776 }
4777
4778 if (collect_delta_stackshot) {
4779 proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &task_start_abstime);
4780 }
4781
4782 /* Next record any relevant UUID info and store the task snapshot */
4783 if (task_in_transition ||
4784 !collect_delta_stackshot ||
4785 (task_start_abstime == 0) ||
4786 (task_start_abstime > stackshot_args.since_timestamp) ||
4787 some_thread_ran) {
4788 /*
4789 * Collect full task information in these scenarios:
4790 *
4791 * 1) a full stackshot or the task is in transition
4792 * 2) a delta stackshot where the task started after the previous full stackshot
4793 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
4794 *
4795 * because the task may have exec'ed, changing its name, architecture, load info, etc
4796 */
4797
4798 kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
4799 kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, stackshot_flags, have_pmap, &task_snap_ss_flags));
4800 kcd_exit_on_error(kcdata_record_task_exec_meta(stackshot_kcdata_p, task));
4801 #if STACKSHOT_COLLECTS_LATENCY_INFO
4802 if (!task_in_transition) {
4803 kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags, &latency_info));
4804 } else {
4805 kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4806 }
4807 #else
4808 if (!task_in_transition) {
4809 kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4810 } else {
4811 kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4812 }
4813 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4814 } else {
4815 kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4816 }
4817
4818 #if STACKSHOT_COLLECTS_LATENCY_INFO
4819 latency_info.misc_latency = mach_absolute_time();
4820 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4821
4822 struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
4823 int current_delta_snapshot_index = 0;
4824 if (num_delta_thread_snapshots > 0) {
4825 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
4826 sizeof(struct thread_delta_snapshot_v3),
4827 num_delta_thread_snapshots, &out_addr));
4828 delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
4829 }
4830
4831
4832 #if STACKSHOT_COLLECTS_LATENCY_INFO
4833 latency_info.task_thread_count_loop_latency = mach_absolute_time();
4834 #endif
4835 /*
4836 * Iterate over the task threads to save thread snapshots and determine
4837 * how much space we need for waitinfo and turnstile info
4838 */
4839 thread_t thread = THREAD_NULL;
4840 queue_iterate(&task->threads, thread, thread_t, task_threads)
4841 {
4842 if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4843 error = KERN_FAILURE;
4844 goto error_exit;
4845 }
4846
4847 uint64_t thread_uniqueid;
4848 if (active_kthreads_only_p && thread->kernel_stack == 0) {
4849 continue;
4850 }
4851 thread_uniqueid = thread_tid(thread);
4852
4853 boolean_t thread_on_core;
4854 enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4855
4856 #if STACKSHOT_COLLECTS_LATENCY_INFO
4857 stackshot_cpu_latency.threads_processed++;
4858 #endif
4859
4860 switch (thread_classification) {
4861 case tc_full_snapshot:
4862 /* add thread marker */
4863 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4864 STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
4865
4866 /* thread snapshot can be large, including strings, avoid overflowing the stack. */
4867 kcdata_compression_window_open(stackshot_kcdata_p);
4868
4869 kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, stackshot_flags, have_pmap, thread_on_core));
4870
4871 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4872
4873 /* mark end of thread snapshot data */
4874 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
4875 STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
4876 break;
4877 case tc_delta_snapshot:
4878 kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
4879 break;
4880 }
4881
4882 /*
4883 * We want to report owner information regardless of whether a thread
4884 * has changed since the last delta, whether it's a normal stackshot,
4885 * or whether it's nonrunnable
4886 */
4887 if (save_owner_info) {
4888 if (stackshot_thread_has_valid_waitinfo(thread)) {
4889 num_waitinfo_threads++;
4890 }
4891
4892 if (stackshot_thread_has_valid_turnstileinfo(thread)) {
4893 num_turnstileinfo_threads++;
4894 }
4895 }
4896 }
4897 #if STACKSHOT_COLLECTS_LATENCY_INFO
4898 latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
4899 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4900
4901 thread_waitinfo_v2_t *thread_waitinfo = NULL;
4902 thread_turnstileinfo_v2_t *thread_turnstileinfo = NULL;
4903 int current_waitinfo_index = 0;
4904 int current_turnstileinfo_index = 0;
4905 /* allocate space for the wait and turnstil info */
4906 if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
4907 /* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
4908 kcdata_compression_window_open(stackshot_kcdata_p);
4909
4910 if (num_waitinfo_threads > 0) {
4911 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
4912 sizeof(thread_waitinfo_v2_t), num_waitinfo_threads, &out_addr));
4913 thread_waitinfo = (thread_waitinfo_v2_t *)out_addr;
4914 }
4915
4916 if (num_turnstileinfo_threads > 0) {
4917 /* get space for the turnstile info */
4918 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
4919 sizeof(thread_turnstileinfo_v2_t), num_turnstileinfo_threads, &out_addr));
4920 thread_turnstileinfo = (thread_turnstileinfo_v2_t *)out_addr;
4921 }
4922
4923 stackshot_plh_resetgen(); // so we know which portlabel_ids are referenced
4924 }
4925
4926 #if STACKSHOT_COLLECTS_LATENCY_INFO
4927 latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4928 latency_info.task_thread_data_loop_latency = mach_absolute_time();
4929 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4930
4931 /* Iterate over the task's threads to save the wait and turnstile info */
4932 queue_iterate(&task->threads, thread, thread_t, task_threads)
4933 {
4934 uint64_t thread_uniqueid;
4935 #pragma unused(thread_uniqueid)
4936
4937 if (active_kthreads_only_p && thread->kernel_stack == 0) {
4938 continue;
4939 }
4940
4941 thread_uniqueid = thread_tid(thread);
4942
4943 /* If we want owner info, we should capture it regardless of its classification */
4944 if (save_owner_info) {
4945 if (stackshot_thread_has_valid_waitinfo(thread)) {
4946 stackshot_thread_wait_owner_info(
4947 thread,
4948 &thread_waitinfo[current_waitinfo_index++]);
4949 }
4950
4951 if (stackshot_thread_has_valid_turnstileinfo(thread)) {
4952 stackshot_thread_turnstileinfo(
4953 thread,
4954 &thread_turnstileinfo[current_turnstileinfo_index++]);
4955 }
4956 }
4957 }
4958
4959 #if STACKSHOT_COLLECTS_LATENCY_INFO
4960 latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
4961 latency_info.misc2_latency = mach_absolute_time();
4962 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4963
4964 #if DEBUG || DEVELOPMENT
4965 if (current_delta_snapshot_index != num_delta_thread_snapshots) {
4966 panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
4967 num_delta_thread_snapshots, current_delta_snapshot_index);
4968 }
4969 if (current_waitinfo_index != num_waitinfo_threads) {
4970 panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
4971 num_waitinfo_threads, current_waitinfo_index);
4972 }
4973 #endif
4974
4975 if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
4976 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4977 // now, record the portlabel hashes.
4978 kcd_exit_on_error(kdp_stackshot_plh_record());
4979 }
4980
4981 #if IMPORTANCE_INHERITANCE
4982 if (save_donating_pids_p) {
4983 /* Ensure the buffer is big enough, since we're using the stack buffer for this. */
4984 static_assert(TASK_IMP_WALK_LIMIT * sizeof(int32_t) <= MAX_FRAMES * sizeof(uintptr_t));
4985 saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
4986 (char*) stackshot_cpu_ctx.scc_stack_buffer, TASK_IMP_WALK_LIMIT);
4987 if (saved_count > 0) {
4988 /* Variable size array - better not have it on the stack. */
4989 kcdata_compression_window_open(stackshot_kcdata_p);
4990 kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
4991 sizeof(int32_t), saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4992 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4993 }
4994 }
4995 #endif
4996
4997 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4998 if (!stackshot_ctx.sc_panic_stackshot) {
4999 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
5000 "task_cpu_cycle_count"));
5001 }
5002 #endif
5003
5004 #if STACKSHOT_COLLECTS_LATENCY_INFO
5005 latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
5006 if (collect_latency_info) {
5007 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
5008 }
5009 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5010
5011 /* mark end of task snapshot data */
5012 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
5013 task_uniqueid));
5014 }
5015
5016
5017 error_exit:
5018 return error;
5019 }
5020
5021 /* Record global shared regions */
5022 static kern_return_t
kdp_stackshot_shared_regions(uint64_t trace_flags)5023 kdp_stackshot_shared_regions(uint64_t trace_flags)
5024 {
5025 kern_return_t error = KERN_SUCCESS;
5026
5027 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
5028 extern queue_head_t vm_shared_region_queue;
5029 vm_shared_region_t sr;
5030
5031 extern queue_head_t vm_shared_region_queue;
5032 queue_iterate(&vm_shared_region_queue,
5033 sr,
5034 vm_shared_region_t,
5035 sr_q) {
5036 struct dyld_shared_cache_loadinfo_v2 scinfo = {0};
5037 if (!_stackshot_validate_kva((vm_offset_t)sr, sizeof(*sr))) {
5038 break;
5039 }
5040 if (collect_delta_stackshot && sr->sr_install_time < stackshot_args.since_timestamp) {
5041 continue; // only include new shared caches in delta stackshots
5042 }
5043 uint32_t sharedCacheFlags = ((sr == primary_system_shared_region) ? kSharedCacheSystemPrimary : 0) |
5044 (sr->sr_driverkit ? kSharedCacheDriverkit : 0);
5045 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
5046 STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
5047 kdp_memcpy(scinfo.sharedCacheUUID, sr->sr_uuid, sizeof(sr->sr_uuid));
5048 scinfo.sharedCacheSlide = sr->sr_slide;
5049 scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_base_address + sr->sr_first_mapping;
5050 scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
5051 scinfo.sharedCacheID = sr->sr_id;
5052 scinfo.sharedCacheFlags = sharedCacheFlags;
5053
5054 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_INFO,
5055 sizeof(scinfo), &scinfo));
5056
5057 if ((trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) && sr->sr_images != NULL &&
5058 _stackshot_validate_kva((vm_offset_t)sr->sr_images, sr->sr_images_count * sizeof(struct dyld_uuid_info_64))) {
5059 assert(sr->sr_images_count != 0);
5060 kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
5061 }
5062 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5063 STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
5064 }
5065
5066 /*
5067 * For backwards compatibility; this will eventually be removed.
5068 * Another copy of the Primary System Shared Region, for older readers.
5069 */
5070 sr = primary_system_shared_region;
5071 /* record system level shared cache load info (if available) */
5072 if (!collect_delta_stackshot && sr &&
5073 _stackshot_validate_kva((vm_offset_t)sr, sizeof(struct vm_shared_region))) {
5074 struct dyld_shared_cache_loadinfo scinfo = {0};
5075
5076 /*
5077 * Historically, this data was in a dyld_uuid_info_64 structure, but the
5078 * naming of both the structure and fields for this use isn't great. The
5079 * dyld_shared_cache_loadinfo structure has better names, but the same
5080 * layout and content as the original.
5081 *
5082 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
5083 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
5084 * entries; here, it's the slid base address, and we leave it that way
5085 * for backwards compatibility.
5086 */
5087 kdp_memcpy(scinfo.sharedCacheUUID, &sr->sr_uuid, sizeof(sr->sr_uuid));
5088 scinfo.sharedCacheSlide = sr->sr_slide;
5089 scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_slide + sr->sr_base_address;
5090 scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
5091
5092 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
5093 sizeof(scinfo), &scinfo));
5094
5095 if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
5096 /*
5097 * Include a map of the system shared cache layout if it has been populated
5098 * (which is only when the system is using a custom shared cache).
5099 */
5100 if (sr->sr_images && _stackshot_validate_kva((vm_offset_t)sr->sr_images,
5101 (sr->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
5102 assert(sr->sr_images_count != 0);
5103 kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
5104 }
5105 }
5106 }
5107
5108 error_exit:
5109 return error;
5110 }
5111
5112 static kern_return_t
kdp_stackshot_kcdata_format(void)5113 kdp_stackshot_kcdata_format(void)
5114 {
5115 kern_return_t error = KERN_SUCCESS;
5116 mach_vm_address_t out_addr = 0;
5117 uint64_t abs_time = 0;
5118 uint64_t system_state_flags = 0;
5119 task_t task = TASK_NULL;
5120 mach_timebase_info_data_t timebase = {0, 0};
5121 uint32_t length_to_copy = 0, tmp32 = 0;
5122 abs_time = mach_absolute_time();
5123 uint64_t last_task_start_time = 0;
5124 int cur_workitem_index = 0;
5125 uint64_t tasks_in_stackshot = 0;
5126 uint64_t threads_in_stackshot = 0;
5127
5128 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5129 uint64_t stackshot_begin_cpu_cycle_count = 0;
5130
5131 if (!stackshot_ctx.sc_panic_stackshot) {
5132 stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5133 }
5134 #endif
5135
5136 /* the CPU entering here is participating in the stackshot */
5137 stackshot_cpu_ctx.scc_did_work = true;
5138
5139 #if STACKSHOT_COLLECTS_LATENCY_INFO
5140 collect_latency_info = stackshot_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
5141 #endif
5142 /* process the flags */
5143 bool collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
5144 bool collect_exclaves = !disable_exclave_stackshot && ((stackshot_flags & STACKSHOT_SKIP_EXCLAVES) == 0);
5145 stackshot_ctx.sc_enable_faulting = (stackshot_flags & (STACKSHOT_ENABLE_BT_FAULTING));
5146
5147 /* Currently we only support returning explicit KEXT load info on fileset kernels */
5148 kc_format_t primary_kc_type = KCFormatUnknown;
5149 if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
5150 stackshot_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
5151 }
5152
5153 if (sizeof(void *) == 8) {
5154 system_state_flags |= kKernel64_p;
5155 }
5156
5157 #if CONFIG_EXCLAVES
5158 if (!stackshot_ctx.sc_panic_stackshot && collect_exclaves) {
5159 kcd_exit_on_error(stackshot_setup_exclave_waitlist()); /* Allocate list of exclave threads */
5160 }
5161 #else
5162 #pragma unused(collect_exclaves)
5163 #endif /* CONFIG_EXCLAVES */
5164
5165 /* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
5166 clock_timebase_info(&timebase);
5167
5168 /* begin saving data into the buffer */
5169 if (stackshot_ctx.sc_bytes_uncompressed) {
5170 stackshot_ctx.sc_bytes_uncompressed = 0;
5171 }
5172
5173 /*
5174 * Setup pre-task linked kcdata buffer.
5175 * The idea here is that we want the kcdata to be in (roughly) the same order as it was
5176 * before we made this multithreaded, so we have separate buffers for pre and post task-iteration,
5177 * since that's the parallelized part.
5178 */
5179 if (!stackshot_ctx.sc_is_singlethreaded) {
5180 kcd_exit_on_error(stackshot_new_linked_kcdata());
5181 stackshot_ctx.sc_pretask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5182 }
5183
5184 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, stackshot_flags, "stackshot_in_flags"));
5185 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)stackshot_flags, "stackshot_in_pid"));
5186 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
5187 if (stackshot_flags & STACKSHOT_PAGE_TABLES) {
5188 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_args.pagetable_mask, "stackshot_pagetable_mask"));
5189 }
5190 if (stackshot_initial_estimate != 0) {
5191 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
5192 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
5193 }
5194 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, stackshot_available_task_exec_flags(), "stackshot_te_flags_mask"));
5195
5196
5197 #if STACKSHOT_COLLECTS_LATENCY_INFO
5198 stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time();
5199 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5200
5201 #if CONFIG_JETSAM
5202 tmp32 = memorystatus_get_pressure_status_kdp();
5203 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
5204 #endif
5205
5206 if (!collect_delta_stackshot) {
5207 tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
5208 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
5209
5210 tmp32 = PAGE_SIZE;
5211 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
5212
5213 /* save boot-args and osversion string */
5214 length_to_copy = MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
5215 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
5216 length_to_copy = MIN((uint32_t)(strlen(osversion) + 1), OSVERSIZE);
5217 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OS_BUILD_VERSION, length_to_copy, (void *)osversion));
5218
5219
5220 length_to_copy = MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
5221 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
5222
5223 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
5224 } else {
5225 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stackshot_args.since_timestamp));
5226 }
5227
5228 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
5229
5230 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_ctx.sc_microsecs));
5231
5232 kcd_exit_on_error(kdp_stackshot_shared_regions(stackshot_flags));
5233
5234 /* Add requested information first */
5235 if (stackshot_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
5236 struct mem_and_io_snapshot mais = {0};
5237 kdp_mem_and_io_snapshot(&mais);
5238 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
5239 }
5240
5241
5242 #if CONFIG_THREAD_GROUPS
5243 struct thread_group_snapshot_v3 *thread_groups = NULL;
5244 int num_thread_groups = 0;
5245
5246 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5247 uint64_t thread_group_begin_cpu_cycle_count = 0;
5248
5249 if (!stackshot_ctx.sc_is_singlethreaded && (stackshot_flags & STACKSHOT_THREAD_GROUP)) {
5250 thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5251 }
5252 #endif
5253
5254 /* Iterate over thread group names */
5255 if (stackshot_flags & STACKSHOT_THREAD_GROUP) {
5256 /* Variable size array - better not have it on the stack. */
5257 kcdata_compression_window_open(stackshot_kcdata_p);
5258
5259 if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
5260 stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5261 }
5262
5263 if (num_thread_groups > 0) {
5264 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v3), num_thread_groups, &out_addr));
5265 thread_groups = (struct thread_group_snapshot_v3 *)out_addr;
5266 }
5267
5268 if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
5269 error = KERN_FAILURE;
5270 goto error_exit;
5271 }
5272
5273 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5274 }
5275
5276 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5277 if (!stackshot_ctx.sc_panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
5278 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
5279 "thread_groups_cpu_cycle_count"));
5280 }
5281 #endif
5282 #else
5283 stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5284 #endif /* CONFIG_THREAD_GROUPS */
5285
5286
5287 #if STACKSHOT_COLLECTS_LATENCY_INFO
5288 stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.setup_latency_mt;
5289 if (stackshot_ctx.sc_is_singlethreaded) {
5290 stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time();
5291 } else {
5292 stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time();
5293 }
5294 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5295
5296 bool const process_scoped = (stackshot_args.pid != -1) &&
5297 ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
5298
5299 /* Iterate over tasks */
5300 queue_iterate(&tasks, task, task_t, tasks)
5301 {
5302 stackshot_panic_guard();
5303
5304 if (collect_delta_stackshot) {
5305 uint64_t abstime;
5306 proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &abstime);
5307
5308 if (abstime > last_task_start_time) {
5309 last_task_start_time = abstime;
5310 }
5311 }
5312
5313 pid_t task_pid = pid_from_task(task);
5314
5315 if (process_scoped && (task_pid != stackshot_args.pid)) {
5316 continue;
5317 }
5318
5319 if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5320 (!queue_empty(&task->threads) && task_pid != -1)) {
5321 tasks_in_stackshot++;
5322 threads_in_stackshot += task->thread_count;
5323 }
5324
5325 /* If this is a singlethreaded stackshot, don't use the work queues. */
5326 if (stackshot_ctx.sc_is_singlethreaded) {
5327 kcd_exit_on_error(kdp_stackshot_record_task(task));
5328 } else {
5329 kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5330 .sswi_task = task,
5331 .sswi_data = NULL,
5332 .sswi_idx = cur_workitem_index++
5333 }));
5334 }
5335
5336 if (process_scoped) {
5337 /* Only targeting one process, we're done now. */
5338 break;
5339 }
5340 }
5341
5342 #if STACKSHOT_COLLECTS_LATENCY_INFO
5343 if (stackshot_ctx.sc_is_singlethreaded) {
5344 stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_task_iteration_latency_mt;
5345 } else {
5346 stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.task_queue_building_latency_mt;
5347 }
5348 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5349
5350 /* Setup post-task kcdata buffer */
5351 if (!stackshot_ctx.sc_is_singlethreaded) {
5352 stackshot_finalize_linked_kcdata();
5353 kcd_exit_on_error(stackshot_new_linked_kcdata());
5354 stackshot_ctx.sc_posttask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5355 }
5356
5357 #if CONFIG_COALITIONS
5358 /* Don't collect jetsam coalition snapshots in delta stackshots - these don't change */
5359 if (!collect_delta_stackshot || (last_task_start_time > stackshot_args.since_timestamp)) {
5360 int num_coalitions = 0;
5361 struct jetsam_coalition_snapshot *coalitions = NULL;
5362
5363 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5364 uint64_t coalition_begin_cpu_cycle_count = 0;
5365
5366 if (!stackshot_ctx.sc_panic_stackshot && (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
5367 coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5368 }
5369 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5370
5371 /* Iterate over coalitions */
5372 if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5373 if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5374 stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5375 }
5376 }
5377 if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5378 if (num_coalitions > 0) {
5379 /* Variable size array - better not have it on the stack. */
5380 kcdata_compression_window_open(stackshot_kcdata_p);
5381 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
5382 coalitions = (struct jetsam_coalition_snapshot*)out_addr;
5383
5384 if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5385 error = KERN_FAILURE;
5386 goto error_exit;
5387 }
5388
5389 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5390 }
5391 }
5392 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5393 if (!stackshot_ctx.sc_panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
5394 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
5395 "coalitions_cpu_cycle_count"));
5396 }
5397 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5398 }
5399 #else
5400 stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5401 #endif /* CONFIG_COALITIONS */
5402
5403 stackshot_panic_guard();
5404
5405 #if STACKSHOT_COLLECTS_LATENCY_INFO
5406 if (stackshot_ctx.sc_is_singlethreaded) {
5407 stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time();
5408 } else {
5409 stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time();
5410 }
5411 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5412
5413 /*
5414 * Iterate over the tasks in the terminated tasks list. We only inspect
5415 * tasks that have a valid bsd_info pointer. The check for task transition
5416 * like past P_LPEXIT during proc_exit() is now checked for inside the
5417 * kdp_stackshot_record_task(), and then a safer and minimal
5418 * transitioning_task_snapshot struct is collected via
5419 * kcdata_record_transitioning_task_snapshot()
5420 */
5421 queue_iterate(&terminated_tasks, task, task_t, tasks)
5422 {
5423 stackshot_panic_guard();
5424
5425 if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5426 (!queue_empty(&task->threads) && pid_from_task(task) != -1)) {
5427 tasks_in_stackshot++;
5428 threads_in_stackshot += task->thread_count;
5429 }
5430
5431 /* Only use workqueues on non-panic and non-scoped stackshots. */
5432 if (stackshot_ctx.sc_is_singlethreaded) {
5433 kcd_exit_on_error(kdp_stackshot_record_task(task));
5434 } else {
5435 kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5436 .sswi_task = task,
5437 .sswi_data = NULL,
5438 .sswi_idx = cur_workitem_index++
5439 }));
5440 }
5441 }
5442
5443 /* Mark the queue(s) as populated. */
5444 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5445 os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_populated, true, release);
5446 }
5447
5448 #if DEVELOPMENT || DEBUG
5449 kcd_exit_on_error(kdp_stackshot_plh_stats());
5450 #endif /* DEVELOPMENT || DEBUG */
5451
5452 #if STACKSHOT_COLLECTS_LATENCY_INFO
5453 if (stackshot_ctx.sc_is_singlethreaded) {
5454 stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt;
5455 } else {
5456 stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt;
5457 }
5458 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5459
5460 #if STACKSHOT_COLLECTS_LATENCY_INFO
5461 if (collect_latency_info) {
5462 stackshot_ctx.sc_latency.latency_version = 2;
5463 stackshot_ctx.sc_latency.main_cpu_number = stackshot_ctx.sc_main_cpuid;
5464 stackshot_ctx.sc_latency.calling_cpu_number = stackshot_ctx.sc_calling_cpuid;
5465 }
5466 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5467
5468 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5469 if (!stackshot_ctx.sc_panic_stackshot) {
5470 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
5471 "stackshot_total_cpu_cycle_cnt"));
5472 }
5473 #endif
5474
5475 kcdata_add_uint64_with_description(stackshot_kcdata_p, tasks_in_stackshot, "stackshot_tasks_count");
5476 kcdata_add_uint64_with_description(stackshot_kcdata_p, threads_in_stackshot, "stackshot_threads_count");
5477
5478 stackshot_panic_guard();
5479
5480 if (!stackshot_ctx.sc_is_singlethreaded) {
5481 /* Chip away at the queue. */
5482 stackshot_finalize_linked_kcdata();
5483 stackshot_cpu_do_work();
5484 *stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_tail->kcdata;
5485 }
5486
5487 #if CONFIG_EXCLAVES
5488 /* If this is the panic stackshot, check if Exclaves panic left its stackshot in the shared region */
5489 if (stackshot_ctx.sc_panic_stackshot) {
5490 struct exclaves_panic_stackshot excl_ss;
5491 kdp_read_panic_exclaves_stackshot(&excl_ss);
5492
5493 if (excl_ss.stackshot_buffer != NULL && excl_ss.stackshot_buffer_size != 0) {
5494 tb_error_t tberr = TB_ERROR_SUCCESS;
5495 exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_FOUND;
5496
5497 /* this block does not escape, so this is okay... */
5498 kern_return_t *error_in_block = &error;
5499 kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
5500 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5501 tberr = stackshot_stackshotresult__unmarshal(excl_ss.stackshot_buffer, excl_ss.stackshot_buffer_size, ^(stackshot_stackshotresult_s result){
5502 *error_in_block = stackshot_exclaves_process_stackshot(&result, stackshot_kcdata_p, false);
5503 });
5504 kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5505 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5506 if (tberr != TB_ERROR_SUCCESS) {
5507 exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_DECODE_FAILED;
5508 }
5509 } else {
5510 exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_NOT_FOUND;
5511 }
5512
5513 /* check error from the block */
5514 kcd_exit_on_error(error);
5515 }
5516 #endif
5517
5518 /* === END of populating stackshot data === */
5519 error_exit:;
5520 if (error != KERN_SUCCESS) {
5521 stackshot_set_error(error);
5522 }
5523
5524 stackshot_panic_guard();
5525
5526 return error;
5527 }
5528
5529 static uint64_t
proc_was_throttled_from_task(task_t task)5530 proc_was_throttled_from_task(task_t task)
5531 {
5532 uint64_t was_throttled = 0;
5533 void *bsd_info = get_bsdtask_info(task);
5534
5535 if (bsd_info) {
5536 was_throttled = proc_was_throttled(bsd_info);
5537 }
5538
5539 return was_throttled;
5540 }
5541
5542 static uint64_t
proc_did_throttle_from_task(task_t task)5543 proc_did_throttle_from_task(task_t task)
5544 {
5545 uint64_t did_throttle = 0;
5546 void *bsd_info = get_bsdtask_info(task);
5547
5548 if (bsd_info) {
5549 did_throttle = proc_did_throttle(bsd_info);
5550 }
5551
5552 return did_throttle;
5553 }
5554
5555 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)5556 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
5557 {
5558 unsigned int pages_reclaimed;
5559 unsigned int pages_wanted;
5560 kern_return_t kErr;
5561
5562 uint64_t compressions = 0;
5563 uint64_t decompressions = 0;
5564
5565 compressions = counter_load(&vm_statistics_compressions);
5566 decompressions = counter_load(&vm_statistics_decompressions);
5567
5568 memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
5569 memio_snap->free_pages = vm_page_free_count;
5570 memio_snap->active_pages = vm_page_active_count;
5571 memio_snap->inactive_pages = vm_page_inactive_count;
5572 memio_snap->purgeable_pages = vm_page_purgeable_count;
5573 memio_snap->wired_pages = vm_page_wire_count;
5574 memio_snap->speculative_pages = vm_page_speculative_count;
5575 memio_snap->throttled_pages = vm_page_throttled_count;
5576 memio_snap->busy_buffer_count = count_busy_buffers();
5577 memio_snap->filebacked_pages = vm_page_pageable_external_count;
5578 memio_snap->compressions = (uint32_t)compressions;
5579 memio_snap->decompressions = (uint32_t)decompressions;
5580 memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
5581 kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
5582
5583 if (!kErr) {
5584 memio_snap->pages_wanted = (uint32_t)pages_wanted;
5585 memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
5586 memio_snap->pages_wanted_reclaimed_valid = 1;
5587 } else {
5588 memio_snap->pages_wanted = 0;
5589 memio_snap->pages_reclaimed = 0;
5590 memio_snap->pages_wanted_reclaimed_valid = 0;
5591 }
5592 }
5593
5594
5595 static vm_offset_t
stackshot_find_phys(vm_map_t map,vm_offset_t target_addr,kdp_fault_flags_t fault_flags,uint32_t * kdp_fault_result_flags)5596 stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags)
5597 {
5598 vm_offset_t result;
5599 struct kdp_fault_result fault_results = {0};
5600 if (stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting) {
5601 fault_flags &= ~KDP_FAULT_FLAGS_ENABLE_FAULTING;
5602 }
5603 if (!stackshot_ctx.sc_panic_stackshot) {
5604 fault_flags |= KDP_FAULT_FLAGS_MULTICPU;
5605 }
5606
5607 result = kdp_find_phys(map, target_addr, fault_flags, &fault_results);
5608
5609 if ((fault_results.flags & KDP_FAULT_RESULT_TRIED_FAULT) || (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN)) {
5610 stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting += fault_results.time_spent_faulting;
5611
5612 #if STACKSHOT_COLLECTS_LATENCY_INFO
5613 stackshot_cpu_latency.faulting_time_mt += fault_results.time_spent_faulting;
5614 #endif
5615
5616 if ((stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting >= stackshot_max_fault_time) && !stackshot_ctx.sc_panic_stackshot) {
5617 stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
5618 }
5619 }
5620
5621 if (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN) {
5622 stackshot_cpu_ctx.scc_fault_stats.sfs_pages_faulted_in++;
5623 }
5624
5625 if (kdp_fault_result_flags) {
5626 *kdp_fault_result_flags = fault_results.flags;
5627 }
5628
5629 return result;
5630 }
5631
5632 /*
5633 * Wrappers around kdp_generic_copyin, kdp_generic_copyin_word, kdp_generic_copyin_string that use stackshot_find_phys
5634 * in order to:
5635 * 1. collect statistics on the number of pages faulted in
5636 * 2. stop faulting if the time spent faulting has exceeded the limit.
5637 */
5638 static boolean_t
stackshot_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5639 stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5640 {
5641 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5642 if (try_fault) {
5643 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5644 }
5645 return kdp_generic_copyin(map, uaddr, dest, size, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5646 }
5647 static boolean_t
stackshot_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5648 stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5649 {
5650 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5651 if (try_fault) {
5652 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5653 }
5654 return kdp_generic_copyin_word(task, addr, result, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5655 }
5656 static int
stackshot_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5657 stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5658 {
5659 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5660 if (try_fault) {
5661 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5662 }
5663 return kdp_generic_copyin_string(task, addr, buf, buf_sz, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags);
5664 }
5665
5666 kern_return_t
do_stackshot(void * context)5667 do_stackshot(void *context)
5668 {
5669 #pragma unused(context)
5670 kern_return_t error;
5671 size_t queue_size;
5672 uint64_t abs_time = mach_absolute_time(), abs_time_end = 0;
5673 kdp_snapshot++;
5674
5675 _stackshot_validation_reset();
5676 error = stackshot_plh_setup(); /* set up port label hash */
5677
5678 if (!stackshot_ctx.sc_is_singlethreaded) {
5679 /* Set up queues. These numbers shouldn't change, but slightly fudge queue size just in case. */
5680 queue_size = FUDGED_SIZE(tasks_count + terminated_tasks_count, 10);
5681 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5682 stackshot_ctx.sc_workqueues[i] = (struct stackshot_workqueue) {
5683 .sswq_items = stackshot_alloc_arr(struct stackshot_workitem, queue_size, &error),
5684 .sswq_capacity = queue_size,
5685 .sswq_num_items = 0,
5686 .sswq_cur_item = 0,
5687 .sswq_populated = false
5688 };
5689 if (error != KERN_SUCCESS) {
5690 break;
5691 }
5692 }
5693 }
5694
5695 if (error != KERN_SUCCESS) {
5696 stackshot_set_error(error);
5697 return error;
5698 }
5699
5700 /*
5701 * If no main CPU has been selected at this point, (since every CPU has
5702 * called stackshot_cpu_preflight by now), then there was no CLPC
5703 * recommended P-core available. In that case, we should volunteer ourself
5704 * to be the main CPU, because someone has to do it.
5705 */
5706 if (stackshot_ctx.sc_main_cpuid == -1) {
5707 os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, cpu_number(), acquire);
5708 stackshot_cpu_ctx.scc_can_work = true;
5709 }
5710
5711 /* After this, auxiliary CPUs can begin work. */
5712 os_atomic_store(&stackshot_ctx.sc_state, SS_RUNNING, release);
5713
5714 /* If we are the main CPU, populate the queues / do other main CPU work. */
5715 if (stackshot_ctx.sc_panic_stackshot || (stackshot_ctx.sc_main_cpuid == cpu_number())) {
5716 stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5717 } else if (stackshot_cpu_ctx.scc_can_work) {
5718 stackshot_cpu_do_work();
5719 }
5720
5721 /* Wait for every CPU to finish. */
5722 #if STACKSHOT_COLLECTS_LATENCY_INFO
5723 stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time();
5724 #endif
5725 if (stackshot_cpu_ctx.scc_can_work) {
5726 os_atomic_dec(&stackshot_ctx.sc_cpus_working, seq_cst);
5727 stackshot_cpu_ctx.scc_can_work = false;
5728 }
5729 while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
5730 loop_wait();
5731 }
5732 stackshot_panic_guard();
5733 #if STACKSHOT_COLLECTS_LATENCY_INFO
5734 stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.cpu_wait_latency_mt;
5735 #endif
5736
5737 /* update timestamp of the stackshot */
5738 abs_time_end = mach_absolute_time();
5739 stackshot_ctx.sc_duration = (struct stackshot_duration_v2) {
5740 .stackshot_duration = (abs_time_end - abs_time),
5741 .stackshot_duration_outer = 0,
5742 .stackshot_duration_prior = stackshot_duration_prior_abs,
5743 };
5744
5745 stackshot_plh_reset();
5746
5747 /* Check interrupts disabled time. */
5748 #if SCHED_HYGIENE_DEBUG
5749 bool disable_interrupts_masked_check = kern_feature_override(
5750 KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
5751 (stackshot_flags & STACKSHOT_DO_COMPRESS) != 0;
5752
5753 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
5754 disable_interrupts_masked_check = true;
5755 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
5756
5757 if (disable_interrupts_masked_check) {
5758 ml_spin_debug_clear_self();
5759 }
5760
5761 if (!stackshot_ctx.sc_panic_stackshot && interrupt_masked_debug_mode) {
5762 /*
5763 * Try to catch instances where stackshot takes too long BEFORE returning from
5764 * the debugger
5765 */
5766 ml_handle_stackshot_interrupt_disabled_duration(current_thread());
5767 }
5768 #endif /* SCHED_HYGIENE_DEBUG */
5769
5770 kdp_snapshot--;
5771
5772 /* If any other CPU had an error, make sure we return it */
5773 if (stackshot_ctx.sc_retval == KERN_SUCCESS) {
5774 stackshot_ctx.sc_retval = stackshot_status_check();
5775 }
5776
5777 #if CONFIG_EXCLAVES
5778 /* Avoid setting AST until as late as possible, in case the stackshot fails */
5779 if (!stackshot_ctx.sc_panic_stackshot && stackshot_ctx.sc_retval == KERN_SUCCESS) {
5780 commit_exclaves_ast();
5781 }
5782 if (stackshot_ctx.sc_retval != KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
5783 /* Clear inspection CTID list: no need to wait for these threads */
5784 stackshot_exclave_inspect_ctid_count = 0;
5785 stackshot_exclave_inspect_ctid_capacity = 0;
5786 stackshot_exclave_inspect_ctids = NULL;
5787 }
5788 #endif
5789
5790 /* If this is a singlethreaded stackshot, the "final" kcdata buffer is just our CPU's kcdata buffer */
5791 if (stackshot_ctx.sc_is_singlethreaded) {
5792 stackshot_ctx.sc_finalized_kcdata = stackshot_kcdata_p;
5793 }
5794
5795 return stackshot_ctx.sc_retval;
5796 }
5797
5798 kern_return_t
do_panic_stackshot(void * context)5799 do_panic_stackshot(void *context)
5800 {
5801 kern_return_t ret = do_stackshot(context);
5802 if (ret != KERN_SUCCESS) {
5803 goto out;
5804 }
5805
5806 ret = stackshot_finalize_singlethreaded_kcdata();
5807
5808 out:
5809 return ret;
5810 }
5811
5812 /*
5813 * Set up needed state for this CPU before participating in a stackshot.
5814 * Namely, we want to signal that we're available to do work.
5815 * Called while interrupts are disabled & in the debugger trap.
5816 */
5817 void
stackshot_cpu_preflight(void)5818 stackshot_cpu_preflight(void)
5819 {
5820 bool is_recommended, is_calling_cpu;
5821 int my_cpu_no = cpu_number();
5822
5823 #if STACKSHOT_COLLECTS_LATENCY_INFO
5824 stackshot_cpu_latency = (typeof(stackshot_cpu_latency)) {
5825 .cpu_number = cpu_number(),
5826 #if defined(__AMP__)
5827 .cluster_type = current_cpu_datap()->cpu_cluster_type,
5828 #else /* __AMP__ */
5829 .cluster_type = CLUSTER_TYPE_SMP,
5830 #endif /* __AMP__ */
5831 .faulting_time_mt = 0,
5832 .total_buf = 0,
5833 .intercluster_buf_used = 0
5834 };
5835 #if CONFIG_PERVASIVE_CPI
5836 mt_cur_cpu_cycles_instrs_speculative(&stackshot_cpu_latency.total_cycles, &stackshot_cpu_latency.total_instrs);
5837 #endif /* CONFIG_PERVASIVE_CPI */
5838 stackshot_cpu_latency.init_latency_mt = stackshot_cpu_latency.total_latency_mt = mach_absolute_time();
5839 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5840
5841 is_recommended = current_processor()->is_recommended;
5842
5843 /* If this is a recommended P-core (or SMP), try making it the main CPU */
5844 if (is_recommended
5845 #if defined(__AMP__)
5846 && current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_P
5847 #endif /* __AMP__ */
5848 ) {
5849 os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, my_cpu_no, acquire);
5850 }
5851
5852 is_calling_cpu = stackshot_ctx.sc_calling_cpuid == my_cpu_no;
5853
5854 stackshot_cpu_ctx.scc_did_work = false;
5855 stackshot_cpu_ctx.scc_can_work = is_calling_cpu || (is_recommended && !stackshot_ctx.sc_is_singlethreaded);
5856
5857 if (stackshot_cpu_ctx.scc_can_work) {
5858 os_atomic_inc(&stackshot_ctx.sc_cpus_working, relaxed);
5859 }
5860 }
5861
5862 __result_use_check
5863 static kern_return_t
stackshot_cpu_work_on_queue(struct stackshot_workqueue * queue)5864 stackshot_cpu_work_on_queue(struct stackshot_workqueue *queue)
5865 {
5866 struct stackshot_workitem *cur_workitemp;
5867 kern_return_t error = KERN_SUCCESS;
5868
5869 while (((cur_workitemp = stackshot_get_workitem(queue)) != NULL || !os_atomic_load(&queue->sswq_populated, acquire))) {
5870 /* Check to make sure someone hasn't errored out or panicked. */
5871 if (__improbable(stackshot_status_check() != KERN_SUCCESS)) {
5872 return KERN_ABORTED;
5873 }
5874
5875 if (cur_workitemp) {
5876 kcd_exit_on_error(stackshot_new_linked_kcdata());
5877 cur_workitemp->sswi_data = stackshot_cpu_ctx.scc_kcdata_head;
5878 kcd_exit_on_error(kdp_stackshot_record_task(cur_workitemp->sswi_task));
5879 stackshot_finalize_linked_kcdata();
5880 } else {
5881 #if STACKSHOT_COLLECTS_LATENCY_INFO
5882 uint64_t time_begin = mach_absolute_time();
5883 #endif
5884 loop_wait();
5885 #if STACKSHOT_COLLECTS_LATENCY_INFO
5886 stackshot_cpu_latency.workqueue_latency_mt += mach_absolute_time() - time_begin;
5887 #endif
5888 }
5889 }
5890
5891 error_exit:
5892 return error;
5893 }
5894
5895 static void
stackshot_cpu_do_work(void)5896 stackshot_cpu_do_work(void)
5897 {
5898 kern_return_t error;
5899
5900 stackshot_cpu_ctx.scc_stack_buffer = stackshot_alloc_arr(uintptr_t, MAX_FRAMES, &error);
5901 if (error != KERN_SUCCESS) {
5902 goto error_exit;
5903 }
5904
5905 #if STACKSHOT_COLLECTS_LATENCY_INFO
5906 stackshot_cpu_latency.init_latency_mt = mach_absolute_time() - stackshot_cpu_latency.init_latency_mt;
5907 #endif
5908
5909 bool high_perf = true;
5910
5911 #if defined(__AMP__)
5912 if (current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_E) {
5913 high_perf = false;
5914 }
5915 #endif /* __AMP__ */
5916
5917 if (high_perf) {
5918 /* Non-E cores: Work from most difficult to least difficult */
5919 for (size_t i = STACKSHOT_NUM_WORKQUEUES; i > 0; i--) {
5920 kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i - 1]));
5921 }
5922 } else {
5923 /* E: Work from least difficult to most difficult */
5924 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5925 kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i]));
5926 }
5927 }
5928 #if STACKSHOT_COLLECTS_LATENCY_INFO
5929 stackshot_cpu_latency.total_latency_mt = mach_absolute_time() - stackshot_cpu_latency.total_latency_mt;
5930 #if CONFIG_PERVASIVE_CPI
5931 uint64_t cycles, instrs;
5932 mt_cur_cpu_cycles_instrs_speculative(&cycles, &instrs);
5933 stackshot_cpu_latency.total_cycles = cycles - stackshot_cpu_latency.total_cycles;
5934 stackshot_cpu_latency.total_instrs = instrs - stackshot_cpu_latency.total_instrs;
5935 #endif /* CONFIG_PERVASIVE_CPI */
5936 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5937
5938 error_exit:
5939 if (error != KERN_SUCCESS) {
5940 stackshot_set_error(error);
5941 }
5942 stackshot_panic_guard();
5943 }
5944
5945 /*
5946 * This is where the other CPUs will end up when we take a stackshot.
5947 * If they're available to do work, they'll do so here.
5948 * Called with interrupts disabled & from the debugger trap.
5949 */
5950 void
stackshot_aux_cpu_entry(void)5951 stackshot_aux_cpu_entry(void)
5952 {
5953 /*
5954 * This is where the other CPUs will end up when we take a stackshot.
5955 * Also, the main CPU will call this in the middle of its work to chip
5956 * away at the queue.
5957 */
5958
5959 /* Don't do work if we said we couldn't... */
5960 if (!stackshot_cpu_ctx.scc_can_work) {
5961 return;
5962 }
5963
5964 /* Spin until we're ready to run. */
5965 while (os_atomic_load(&stackshot_ctx.sc_state, acquire) == SS_SETUP) {
5966 loop_wait();
5967 }
5968
5969 /* Check to make sure the setup didn't error out or panic. */
5970 if (stackshot_status_check() != KERN_SUCCESS) {
5971 goto exit;
5972 }
5973
5974 /* the CPU entering here is participating in the stackshot */
5975 stackshot_cpu_ctx.scc_did_work = true;
5976
5977 if (stackshot_ctx.sc_main_cpuid == cpu_number()) {
5978 stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5979 } else {
5980 stackshot_cpu_do_work();
5981 }
5982
5983 exit:
5984 os_atomic_dec(&stackshot_ctx.sc_cpus_working, release);
5985 }
5986
5987 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)5988 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
5989 {
5990 /* When the pthread kext puts a worker thread to sleep, it will
5991 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
5992 * struct. See parkit() in kern/kern_support.c in libpthread.
5993 */
5994 return (thread->state & TH_WAIT) &&
5995 (thread->block_hint == kThreadWaitParkedWorkQueue);
5996 }
5997
5998 #if CONFIG_COALITIONS
5999 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)6000 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
6001 {
6002 #pragma unused(i, coal)
6003 unsigned int *coalition_count = (unsigned int*)arg;
6004 (*coalition_count)++;
6005 }
6006
6007 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)6008 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
6009 {
6010 if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
6011 return;
6012 }
6013
6014 struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
6015 struct jetsam_coalition_snapshot *jcs = &coalitions[i];
6016 task_t leader = TASK_NULL;
6017 jcs->jcs_id = coalition_id(coal);
6018 jcs->jcs_flags = 0;
6019 jcs->jcs_thread_group = 0;
6020
6021 if (coalition_term_requested(coal)) {
6022 jcs->jcs_flags |= kCoalitionTermRequested;
6023 }
6024 if (coalition_is_terminated(coal)) {
6025 jcs->jcs_flags |= kCoalitionTerminated;
6026 }
6027 if (coalition_is_reaped(coal)) {
6028 jcs->jcs_flags |= kCoalitionReaped;
6029 }
6030 if (coalition_is_privileged(coal)) {
6031 jcs->jcs_flags |= kCoalitionPrivileged;
6032 }
6033
6034 #if CONFIG_THREAD_GROUPS
6035 struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
6036 if (thread_group) {
6037 jcs->jcs_thread_group = thread_group_get_id(thread_group);
6038 }
6039 #endif /* CONFIG_THREAD_GROUPS */
6040
6041 leader = kdp_coalition_get_leader(coal);
6042 if (leader) {
6043 jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
6044 } else {
6045 jcs->jcs_leader_task_uniqueid = 0;
6046 }
6047 }
6048 #endif /* CONFIG_COALITIONS */
6049
6050 #if CONFIG_THREAD_GROUPS
6051 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)6052 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
6053 {
6054 #pragma unused(i, tg)
6055 unsigned int *n = (unsigned int*)arg;
6056 (*n)++;
6057 }
6058
6059 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)6060 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
6061 {
6062 struct thread_group_snapshot_v3 *thread_groups = arg;
6063 struct thread_group_snapshot_v3 *tgs = &thread_groups[i];
6064 const char *name = thread_group_get_name(tg);
6065 uint32_t flags = thread_group_get_flags(tg);
6066 tgs->tgs_id = thread_group_get_id(tg);
6067 static_assert(THREAD_GROUP_MAXNAME > sizeof(tgs->tgs_name));
6068 kdp_memcpy(tgs->tgs_name, name, sizeof(tgs->tgs_name));
6069 kdp_memcpy(tgs->tgs_name_cont, name + sizeof(tgs->tgs_name),
6070 sizeof(tgs->tgs_name_cont));
6071 tgs->tgs_flags =
6072 ((flags & THREAD_GROUP_FLAGS_EFFICIENT) ? kThreadGroupEfficient : 0) |
6073 ((flags & THREAD_GROUP_FLAGS_APPLICATION) ? kThreadGroupApplication : 0) |
6074 ((flags & THREAD_GROUP_FLAGS_CRITICAL) ? kThreadGroupCritical : 0) |
6075 ((flags & THREAD_GROUP_FLAGS_BEST_EFFORT) ? kThreadGroupBestEffort : 0) |
6076 ((flags & THREAD_GROUP_FLAGS_UI_APP) ? kThreadGroupUIApplication : 0) |
6077 ((flags & THREAD_GROUP_FLAGS_MANAGED) ? kThreadGroupManaged : 0) |
6078 ((flags & THREAD_GROUP_FLAGS_STRICT_TIMERS) ? kThreadGroupStrictTimers : 0) |
6079 0;
6080 }
6081 #endif /* CONFIG_THREAD_GROUPS */
6082
6083 /* Determine if a thread has waitinfo that stackshot can provide */
6084 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)6085 stackshot_thread_has_valid_waitinfo(thread_t thread)
6086 {
6087 if (!(thread->state & TH_WAIT)) {
6088 return 0;
6089 }
6090
6091 switch (thread->block_hint) {
6092 // If set to None or is a parked work queue, ignore it
6093 case kThreadWaitParkedWorkQueue:
6094 case kThreadWaitNone:
6095 return 0;
6096 // There is a short window where the pthread kext removes a thread
6097 // from its ksyn wait queue before waking the thread up
6098 case kThreadWaitPThreadMutex:
6099 case kThreadWaitPThreadRWLockRead:
6100 case kThreadWaitPThreadRWLockWrite:
6101 case kThreadWaitPThreadCondVar:
6102 return kdp_pthread_get_thread_kwq(thread) != NULL;
6103 // All other cases are valid block hints if in a wait state
6104 default:
6105 return 1;
6106 }
6107 }
6108
6109 /* Determine if a thread has turnstileinfo that stackshot can provide */
6110 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)6111 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
6112 {
6113 struct turnstile *ts = thread_get_waiting_turnstile(thread);
6114
6115 return stackshot_thread_has_valid_waitinfo(thread) &&
6116 ts != TURNSTILE_NULL;
6117 }
6118
6119 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_v2_t * tsinfo)6120 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo)
6121 {
6122 struct turnstile *ts;
6123 struct ipc_service_port_label *ispl = NULL;
6124
6125 /* acquire turnstile information and store it in the stackshot */
6126 ts = thread_get_waiting_turnstile(thread);
6127 tsinfo->waiter = thread_tid(thread);
6128 kdp_turnstile_fill_tsinfo(ts, tsinfo, &ispl);
6129 tsinfo->portlabel_id = stackshot_plh_lookup(ispl,
6130 (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_SENDPORT) ? STACKSHOT_PLH_LOOKUP_SEND :
6131 (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_RECEIVEPORT) ? STACKSHOT_PLH_LOOKUP_RECEIVE :
6132 STACKSHOT_PLH_LOOKUP_UNKNOWN);
6133 }
6134
6135 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_v2_t * waitinfo)6136 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t *waitinfo)
6137 {
6138 thread_waitinfo_t *waitinfo_v1 = (thread_waitinfo_t *)waitinfo;
6139 struct ipc_service_port_label *ispl = NULL;
6140
6141 waitinfo->waiter = thread_tid(thread);
6142 waitinfo->wait_type = thread->block_hint;
6143 waitinfo->wait_flags = 0;
6144
6145 switch (waitinfo->wait_type) {
6146 case kThreadWaitKernelMutex:
6147 kdp_lck_mtx_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6148 break;
6149 case kThreadWaitPortReceive:
6150 kdp_mqueue_recv_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6151 waitinfo->portlabel_id = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_RECEIVE);
6152 break;
6153 case kThreadWaitPortSend:
6154 kdp_mqueue_send_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6155 waitinfo->portlabel_id = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_SEND);
6156 break;
6157 case kThreadWaitSemaphore:
6158 kdp_sema_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6159 break;
6160 case kThreadWaitUserLock:
6161 kdp_ulock_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6162 break;
6163 case kThreadWaitKernelRWLockRead:
6164 case kThreadWaitKernelRWLockWrite:
6165 case kThreadWaitKernelRWLockUpgrade:
6166 kdp_rwlck_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6167 break;
6168 case kThreadWaitPThreadMutex:
6169 case kThreadWaitPThreadRWLockRead:
6170 case kThreadWaitPThreadRWLockWrite:
6171 case kThreadWaitPThreadCondVar:
6172 kdp_pthread_find_owner(thread, waitinfo_v1);
6173 break;
6174 case kThreadWaitWorkloopSyncWait:
6175 kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo_v1);
6176 break;
6177 case kThreadWaitOnProcess:
6178 kdp_wait4_find_process(thread, thread->wait_event, waitinfo_v1);
6179 break;
6180 case kThreadWaitSleepWithInheritor:
6181 kdp_sleep_with_inheritor_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6182 break;
6183 case kThreadWaitEventlink:
6184 kdp_eventlink_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6185 break;
6186 case kThreadWaitCompressor:
6187 kdp_compressor_busy_find_owner(thread->wait_event, waitinfo_v1);
6188 break;
6189 #ifdef CONFIG_EXCLAVES
6190 case kThreadWaitExclaveCore:
6191 case kThreadWaitExclaveKit:
6192 kdp_esync_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6193 break;
6194 #endif /* CONFIG_EXCLAVES */
6195 case kThreadWaitPageBusy:
6196 kdp_vm_page_sleep_find_owner(thread->wait_event, waitinfo_v1);
6197 break;
6198 case kThreadWaitPagingInProgress:
6199 case kThreadWaitPagingActivity:
6200 case kThreadWaitPagerInit:
6201 case kThreadWaitPagerReady:
6202 case kThreadWaitMemoryBlocked:
6203 case kThreadWaitPageInThrottle:
6204 kdp_vm_object_sleep_find_owner(thread->wait_event, waitinfo->wait_type, waitinfo_v1);
6205 break;
6206 default:
6207 waitinfo->owner = 0;
6208 waitinfo->context = 0;
6209 break;
6210 }
6211 }
6212