1 /*
2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #include <mach/mach_types.h>
31 #include <mach/vm_param.h>
32 #include <mach/mach_vm.h>
33 #include <mach/clock_types.h>
34 #include <sys/code_signing.h>
35 #include <sys/errno.h>
36 #include <sys/stackshot.h>
37 #if defined(__arm64__)
38 #include <arm/cpu_internal.h>
39 #endif /* __arm64__ */
40 #ifdef IMPORTANCE_INHERITANCE
41 #include <ipc/ipc_importance.h>
42 #endif
43 #include <sys/appleapiopts.h>
44 #include <kern/debug.h>
45 #include <kern/block_hint.h>
46 #include <uuid/uuid.h>
47
48 #include <kdp/kdp_dyld.h>
49 #include <kdp/kdp_en_debugger.h>
50 #include <kdp/processor_core.h>
51 #include <kdp/kdp_common.h>
52
53 #include <libsa/types.h>
54 #include <libkern/version.h>
55 #include <libkern/section_keywords.h>
56
57 #include <string.h> /* bcopy */
58
59 #include <kern/kern_stackshot.h>
60 #include <kern/backtrace.h>
61 #include <kern/coalition.h>
62 #include <kern/epoch_sync.h>
63 #include <kern/exclaves_stackshot.h>
64 #include <kern/exclaves_inspection.h>
65 #include <kern/processor.h>
66 #include <kern/host_statistics.h>
67 #include <kern/counter.h>
68 #include <kern/thread.h>
69 #include <kern/thread_group.h>
70 #include <kern/task.h>
71 #include <kern/telemetry.h>
72 #include <kern/clock.h>
73 #include <kern/policy_internal.h>
74 #include <kern/socd_client.h>
75 #include <kern/startup.h>
76 #include <vm/vm_map_xnu.h>
77 #include <vm/vm_kern_xnu.h>
78 #include <vm/vm_pageout.h>
79 #include <vm/vm_fault.h>
80 #include <vm/vm_shared_region_xnu.h>
81 #include <vm/vm_compressor_xnu.h>
82 #include <libkern/OSKextLibPrivate.h>
83 #include <os/log.h>
84
85 #ifdef CONFIG_EXCLAVES
86 #include <kern/exclaves.tightbeam.h>
87 #endif /* CONFIG_EXCLAVES */
88
89 #include <kern/exclaves_test_stackshot.h>
90
91 #include <libkern/coreanalytics/coreanalytics.h>
92
93 #if defined(__x86_64__)
94 #include <i386/mp.h>
95 #include <i386/cpu_threads.h>
96 #endif
97
98 #include <pexpert/pexpert.h>
99
100 #if CONFIG_PERVASIVE_CPI
101 #include <kern/monotonic.h>
102 #endif /* CONFIG_PERVASIVE_CPI */
103
104 #include <san/kasan.h>
105
106 #if DEBUG || DEVELOPMENT
107 #define STACKSHOT_COLLECTS_DIAGNOSTICS 1
108 #define STACKSHOT_COLLECTS_LATENCY_INFO 1
109 #else
110 #define STACKSHOT_COLLECTS_DIAGNOSTICS 0
111 #define STACKSHOT_COLLECTS_LATENCY_INFO 0
112 #endif /* DEBUG || DEVELOPMENT */
113
114 #define STACKSHOT_COLLECTS_RDAR_126582377_DATA 0
115
116 #if defined(__AMP__)
117 #define STACKSHOT_NUM_WORKQUEUES 2
118 #else /* __AMP__ */
119 #define STACKSHOT_NUM_WORKQUEUES 1
120 #endif
121
122 #if defined(__arm64__)
123 #define STACKSHOT_NUM_BUFFERS MAX_CPU_CLUSTERS
124 #else /* __arm64__ */
125 #define STACKSHOT_NUM_BUFFERS 1
126 #endif /* __arm64__ */
127
128 /* The number of threads which will land a task in the hardest workqueue. */
129 #define STACKSHOT_HARDEST_THREADCOUNT 10
130
131 TUNABLE_DEV_WRITEABLE(unsigned int, stackshot_single_thread, "stackshot_single_thread", 0);
132
133 extern unsigned int not_in_kdp;
134
135 /* indicate to the compiler that some accesses are unaligned */
136 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
137
138 int kdp_snapshot = 0;
139
140 #pragma mark ---Stackshot Struct Definitions---
141
142 typedef struct linked_kcdata_descriptor {
143 struct kcdata_descriptor kcdata;
144 struct linked_kcdata_descriptor *next;
145 } * linked_kcdata_descriptor_t;
146
147 struct stackshot_workitem {
148 task_t sswi_task;
149 linked_kcdata_descriptor_t sswi_data; /* The kcdata for this task. */
150 int sswi_idx; /* The index of this job, used for ordering kcdata across multiple queues. */
151 };
152
153 struct stackshot_workqueue {
154 uint32_t _Atomic sswq_num_items; /* Only modified by main CPU */
155 uint32_t _Atomic sswq_cur_item; /* Modified by all CPUs */
156 size_t sswq_capacity; /* Constant after preflight */
157 bool _Atomic sswq_populated; /* Only modified by main CPU */
158 struct stackshot_workitem *__counted_by(capacity) sswq_items;
159 };
160
161 struct freelist_entry {
162 struct freelist_entry *fl_next; /* Next entry in the freelist */
163 size_t fl_size; /* Size of the entry (must be >= sizeof(struct freelist_entry)) */
164 };
165
166 struct stackshot_buffer {
167 void *ssb_ptr; /* Base of buffer */
168 size_t ssb_size;
169 size_t _Atomic ssb_used;
170 struct freelist_entry *ssb_freelist; /* First freelist entry */
171 int _Atomic ssb_freelist_lock;
172 size_t _Atomic ssb_overhead; /* Total amount ever freed (even if re-allocated from freelist) */
173 };
174
175 struct kdp_snapshot_args {
176 int pid;
177 void *buffer;
178 struct kcdata_descriptor *descriptor;
179 uint32_t buffer_size;
180 uint64_t flags;
181 uint64_t since_timestamp;
182 uint32_t pagetable_mask;
183 };
184
185 /*
186 * Keep a simple cache of the most recent validation done at a page granularity
187 * to avoid the expensive software KVA-to-phys translation in the VM.
188 */
189
190 struct _stackshot_validation_state {
191 vm_offset_t last_valid_page_kva;
192 size_t last_valid_size;
193 };
194
195 /* CPU-local generation counts for PLH */
196 struct _stackshot_plh_gen_state {
197 uint8_t *pgs_gen; /* last 'gen #' seen in */
198 int16_t pgs_curgen_min; /* min idx seen for this gen */
199 int16_t pgs_curgen_max; /* max idx seen for this gen */
200 uint8_t pgs_curgen; /* current gen */
201 };
202
203 /*
204 * For port labels, we have a small hash table we use to track the
205 * struct ipc_service_port_label pointers we see along the way.
206 * This structure encapsulates the global state.
207 *
208 * The hash table is insert-only, similar to "intern"ing strings. It's
209 * only used an manipulated in during the stackshot collection. We use
210 * seperate chaining, with the hash elements and chains being int16_ts
211 * indexes into the parallel arrays, with -1 ending the chain. Array indices are
212 * allocated using a bump allocator.
213 *
214 * The parallel arrays contain:
215 * - plh_array[idx] the pointer entered
216 * - plh_chains[idx] the hash chain
217 * - plh_gen[idx] the last 'generation #' seen
218 *
219 * Generation IDs are used to track entries looked up in the current
220 * task; 0 is never used, and the plh_gen array is cleared to 0 on
221 * rollover.
222 *
223 * The portlabel_ids we report externally are just the index in the array,
224 * plus 1 to avoid 0 as a value. 0 is NONE, -1 is UNKNOWN (e.g. there is
225 * one, but we ran out of space)
226 */
227 struct port_label_hash {
228 int _Atomic plh_lock; /* lock for concurrent modifications to this plh */
229 uint16_t plh_size; /* size of allocations; 0 disables tracking */
230 uint16_t plh_count; /* count of used entries in plh_array */
231 struct ipc_service_port_label **plh_array; /* _size allocated, _count used */
232 int16_t *plh_chains; /* _size allocated */
233 int16_t *plh_hash; /* (1 << STACKSHOT_PLH_SHIFT) entry hash table: hash(ptr) -> array index */
234 #if DEVELOPMENT || DEBUG
235 /* statistics */
236 uint32_t _Atomic plh_lookups; /* # lookups or inserts */
237 uint32_t _Atomic plh_found;
238 uint32_t _Atomic plh_found_depth;
239 uint32_t _Atomic plh_insert;
240 uint32_t _Atomic plh_insert_depth;
241 uint32_t _Atomic plh_bad;
242 uint32_t _Atomic plh_bad_depth;
243 uint32_t _Atomic plh_lookup_send;
244 uint32_t _Atomic plh_lookup_receive;
245 #define PLH_STAT_OP(...) (void)(__VA_ARGS__)
246 #else /* DEVELOPMENT || DEBUG */
247 #define PLH_STAT_OP(...) (void)(0)
248 #endif /* DEVELOPMENT || DEBUG */
249 };
250
251 #define plh_lock(plh) while(!os_atomic_cmpxchg(&(plh)->plh_lock, 0, 1, acquire)) { loop_wait(); }
252 #define plh_unlock(plh) os_atomic_store(&(plh)->plh_lock, 0, release);
253
254 #define STACKSHOT_PLH_SHIFT 7
255 #define STACKSHOT_PLH_SIZE_MAX ((kdp_ipc_have_splabel)? 1024 : 0)
256 size_t stackshot_port_label_size = (2 * (1u << STACKSHOT_PLH_SHIFT));
257 #define STASKSHOT_PLH_SIZE(x) MIN((x), STACKSHOT_PLH_SIZE_MAX)
258
259 struct stackshot_cpu_context {
260 bool scc_can_work; /* Whether the CPU can do more stackshot work */
261 bool scc_did_work; /* Whether the CPU actually did any stackshot work */
262 linked_kcdata_descriptor_t scc_kcdata_head; /* See `linked_kcdata_alloc_callback */
263 linked_kcdata_descriptor_t scc_kcdata_tail; /* See `linked_kcdata_alloc_callback */
264 uintptr_t *scc_stack_buffer; /* A buffer for stacktraces. */
265 struct stackshot_fault_stats scc_fault_stats;
266 struct _stackshot_validation_state scc_validation_state;
267 struct _stackshot_plh_gen_state scc_plh_gen;
268 };
269
270 /*
271 * When directly modifying the stackshot state, always use the macros below to
272 * work wth this enum - the higher order bits are used to store an error code
273 * in the case of SS_ERRORED.
274 *
275 * +------------------------------------+-------------------+
276 * | | |
277 * v | |
278 * +-------------+ +----------+ +------------+ +------------+
279 * | SS_INACTIVE |---->| SS_SETUP |---->| SS_RUNNING |---->| SS_ERRORED |
280 * +-------------+ +----------+ +------------+ +------------+
281 * | | | ^ |
282 * | +----------------|----------------+ |
283 * +-------------+ | | |
284 * | SS_PANICKED |<--------+-------------------+ |
285 * +-------------+ |
286 * ^ |
287 * | |
288 * +--------------------------------------------------------+
289 */
290 __enum_closed_decl(stackshot_state_t, uint, {
291 SS_INACTIVE = 0x0, /* -> SS_SETUP */
292 SS_SETUP = 0x1, /* -> SS_RUNNING, SS_ERRORED, SS_PANICKED */
293 SS_RUNNING = 0x2, /* -> SS_ERRORED, SS_PANICKED, SS_INACTIVE */
294 SS_ERRORED = 0x3, /* -> SS_INACTIVE, SS_PANICKED */
295 SS_PANICKED = 0x4, /* -> N/A */
296 _SS_COUNT
297 });
298
299 static_assert(_SS_COUNT <= 0x5);
300 /* Get the stackshot state ID from a stackshot_state_t. */
301 #define SS_STATE(state) ((state) & 0x7u)
302 /* Get the error code from a stackshot_state_t. */
303 #define SS_ERRCODE(state) ((state) >> 3)
304 /* Make a stackshot error state with a given code. */
305 #define SS_MKERR(code) (((code) << 3) | SS_ERRORED)
306
307 struct stackshot_context {
308 /* Constants & Arguments */
309 struct kdp_snapshot_args sc_args;
310 int sc_calling_cpuid;
311 int sc_main_cpuid;
312 bool sc_enable_faulting;
313 uint64_t sc_microsecs; /* Timestamp */
314 bool sc_panic_stackshot;
315 size_t sc_min_kcdata_size;
316 bool sc_is_singlethreaded;
317
318 /* State & Errors */
319 stackshot_state_t _Atomic sc_state; /* Only modified by calling CPU, main CPU, or panicking CPU. See comment above type definition for details. */
320 kern_return_t sc_retval; /* The return value of the main thread */
321 uint32_t _Atomic sc_cpus_working;
322
323 /* KCData */
324 linked_kcdata_descriptor_t sc_pretask_kcdata;
325 linked_kcdata_descriptor_t sc_posttask_kcdata;
326 kcdata_descriptor_t sc_finalized_kcdata;
327
328 /* Buffers & Queues */
329 struct stackshot_buffer __counted_by(num_buffers) sc_buffers[STACKSHOT_NUM_BUFFERS];
330 size_t sc_num_buffers;
331 struct stackshot_workqueue __counted_by(STACKSHOT_NUM_WORKQUEUES) sc_workqueues[STACKSHOT_NUM_WORKQUEUES];
332 struct port_label_hash sc_plh;
333
334 /* Statistics */
335 struct stackshot_duration_v2 sc_duration;
336 uint32_t sc_bytes_traced;
337 uint32_t sc_bytes_uncompressed;
338 #if STACKSHOT_COLLECTS_LATENCY_INFO
339 struct stackshot_latency_collection_v2 sc_latency;
340 #endif
341 };
342
343 #define STACKSHOT_DEBUG_TRACEBUF_SIZE 16
344
345 struct stackshot_trace_entry {
346 int sste_line_no;
347 uint64_t sste_timestamp;
348 mach_vm_address_t sste_data;
349 };
350
351 struct stackshot_trace_buffer {
352 uint64_t sstb_last_trace_timestamp;
353 size_t sstb_tail_idx;
354 size_t sstb_size;
355 struct stackshot_trace_entry __counted_by(STACKSHOT_DEBUG_TRACEBUF_SIZE) sstb_entries[STACKSHOT_DEBUG_TRACEBUF_SIZE];
356 };
357
358 #pragma mark ---Stackshot State and Data---
359
360 /*
361 * Two stackshot states, one for panic and one for normal.
362 * That way, we can take a stackshot during a panic without clobbering state.
363 */
364 #define STACKSHOT_CTX_IDX_NORMAL 0
365 #define STACKSHOT_CTX_IDX_PANIC 1
366 size_t cur_stackshot_ctx_idx = STACKSHOT_CTX_IDX_NORMAL;
367 struct stackshot_context stackshot_contexts[2] = {{0}, {0}};
368 #define stackshot_ctx (stackshot_contexts[cur_stackshot_ctx_idx])
369 #define stackshot_args (stackshot_ctx.sc_args)
370 #define stackshot_flags (stackshot_args.flags)
371
372 static struct {
373 uint64_t last_abs_start; /* start time of last stackshot */
374 uint64_t last_abs_end; /* end time of last stackshot */
375 uint64_t stackshots_taken; /* total stackshots taken since boot */
376 uint64_t stackshots_duration; /* total abs time spent in stackshot_trap() since boot */
377 } stackshot_stats = { 0 };
378
379 #if STACKSHOT_COLLECTS_LATENCY_INFO
380 static struct stackshot_latency_cpu PERCPU_DATA(stackshot_cpu_latency_percpu);
381 #define stackshot_cpu_latency (*PERCPU_GET(stackshot_cpu_latency_percpu))
382 #endif
383
384 static struct stackshot_cpu_context PERCPU_DATA(stackshot_cpu_ctx_percpu);
385 #define stackshot_cpu_ctx (*PERCPU_GET(stackshot_cpu_ctx_percpu))
386
387 static struct kcdata_descriptor PERCPU_DATA(stackshot_kcdata_percpu);
388 #define stackshot_kcdata_p (PERCPU_GET(stackshot_kcdata_percpu))
389
390 #if STACKSHOT_COLLECTS_LATENCY_INFO
391 static bool collect_latency_info = true;
392 #endif
393
394 static uint64_t stackshot_max_fault_time;
395
396 #if STACKSHOT_COLLECTS_DIAGNOSTICS
397 static struct stackshot_trace_buffer PERCPU_DATA(stackshot_trace_buffer);
398 #endif
399
400 #pragma mark ---Stackshot Global State---
401
402 uint32_t stackshot_estimate_adj = 25; /* experiment factor: 0-100, adjust our estimate up by this amount */
403
404 static uint32_t stackshot_initial_estimate;
405 static uint32_t stackshot_initial_estimate_adj;
406 static uint64_t stackshot_duration_prior_abs; /* prior attempts, abs */
407 static unaligned_u64 * stackshot_duration_outer;
408 static uint64_t stackshot_tries;
409
410 void * kernel_stackshot_buf = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
411 int kernel_stackshot_buf_size = 0;
412
413 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
414
415 #if CONFIG_EXCLAVES
416 static ctid_t *stackshot_exclave_inspect_ctids = NULL;
417 static size_t stackshot_exclave_inspect_ctid_count = 0;
418 static size_t stackshot_exclave_inspect_ctid_capacity = 0;
419
420 static kern_return_t stackshot_exclave_kr = KERN_SUCCESS;
421 #endif /* CONFIG_EXCLAVES */
422
423 #if DEBUG || DEVELOPMENT
424 TUNABLE(bool, disable_exclave_stackshot, "-disable_exclave_stackshot", false);
425 #else
426 const bool disable_exclave_stackshot = false;
427 #endif
428
429 #pragma mark ---Stackshot Static Function Declarations---
430
431 __private_extern__ void stackshot_init( void );
432 static boolean_t memory_iszero(void *addr, size_t size);
433 static void stackshot_cpu_do_work(void);
434 static kern_return_t stackshot_finalize_kcdata(void);
435 static kern_return_t stackshot_finalize_singlethreaded_kcdata(void);
436 static kern_return_t stackshot_collect_kcdata(void);
437 static int kdp_stackshot_kcdata_format();
438 static void kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
439 static vm_offset_t stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags);
440 static boolean_t stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
441 static int stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
442 static boolean_t stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
443 static uint64_t proc_was_throttled_from_task(task_t task);
444 static void stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t * waitinfo);
445 static int stackshot_thread_has_valid_waitinfo(thread_t thread);
446 static void stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo);
447 static int stackshot_thread_has_valid_turnstileinfo(thread_t thread);
448 static uint32_t get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj, uint64_t trace_flags, pid_t target_pid);
449 static kern_return_t kdp_snapshot_preflight_internal(struct kdp_snapshot_args args);
450
451 #if CONFIG_COALITIONS
452 static void stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
453 static void stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
454 #endif /* CONFIG_COALITIONS */
455
456 #if CONFIG_THREAD_GROUPS
457 static void stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
458 static void stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
459 #endif /* CONFIG_THREAD_GROUPS */
460
461 extern uint64_t workqueue_get_task_ss_flags_from_pwq_state_kdp(void *proc);
462
463 static kcdata_descriptor_t linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size);
464
465 #pragma mark ---Stackshot Externs---
466
467 struct proc;
468 extern int proc_pid(struct proc *p);
469 extern uint64_t proc_uniqueid(void *p);
470 extern uint64_t proc_was_throttled(void *p);
471 extern uint64_t proc_did_throttle(void *p);
472 extern int proc_exiting(void *p);
473 extern int proc_in_teardown(void *p);
474 static uint64_t proc_did_throttle_from_task(task_t task);
475 extern void proc_name_kdp(struct proc *p, char * buf, int size);
476 extern int proc_threadname_kdp(void * uth, char * buf, size_t size);
477 extern void proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
478 extern void proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
479 extern uint64_t proc_getcsflags_kdp(void * p);
480 extern boolean_t proc_binary_uuid_kdp(task_t task, uuid_t uuid);
481 extern int memorystatus_get_pressure_status_kdp(void);
482 extern void memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
483 extern void panic_stackshot_release_lock(void);
484
485 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
486
487 #if CONFIG_TELEMETRY
488 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
489 #endif /* CONFIG_TELEMETRY */
490
491 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
492 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
493
494 static size_t stackshot_plh_est_size(void);
495
496 #if CONFIG_EXCLAVES
497 static kern_return_t collect_exclave_threads(uint64_t);
498 static kern_return_t stackshot_setup_exclave_waitlist(void);
499 #endif
500
501 /*
502 * Validates that the given address for a word is both a valid page and has
503 * default caching attributes for the current map.
504 */
505 bool machine_trace_thread_validate_kva(vm_offset_t);
506 /*
507 * Validates a region that stackshot will potentially inspect.
508 */
509 static bool _stackshot_validate_kva(vm_offset_t, size_t);
510 /*
511 * Must be called whenever stackshot is re-driven.
512 */
513 static void _stackshot_validation_reset(void);
514 /*
515 * A kdp-safe strlen() call. Returns:
516 * -1 if we reach maxlen or a bad address before the end of the string, or
517 * strlen(s)
518 */
519 static long _stackshot_strlen(const char *s, size_t maxlen);
520
521 #define MAX_FRAMES 1000
522 #define STACKSHOT_PAGETABLE_BUFSZ 4000
523 #define MAX_LOADINFOS 500
524 #define MAX_DYLD_COMPACTINFO (20 * 1024) // max bytes of compactinfo to include per proc/shared region
525 #define TASK_IMP_WALK_LIMIT 20
526
527 typedef struct thread_snapshot *thread_snapshot_t;
528 typedef struct task_snapshot *task_snapshot_t;
529
530 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
531 extern kdp_send_t kdp_en_send_pkt;
532 #endif
533
534 /*
535 * Stackshot locking and other defines.
536 */
537 LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
538 LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
539
540 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
541 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
542 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
543 #define STACKSHOT_SUBSYS_ASSERT_LOCKED() lck_mtx_assert(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
544
545 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
546 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
547
548 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
549 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
550
551 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
552
553 /*
554 * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
555 * for non-panic stackshots where faulting is requested.
556 */
557 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
558
559
560 #ifndef ROUNDUP
561 #define ROUNDUP(x, y) ((((x)+(y)-1)/(y))*(y))
562 #endif
563
564 #define STACKSHOT_QUEUE_LABEL_MAXSIZE 64
565
566 #pragma mark ---Stackshot Useful Macros---
567
568 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
569 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
570 /*
571 * Use of the kcd_exit_on_error(action) macro requires a local
572 * 'kern_return_t error' variable and 'error_exit' label.
573 */
574 #define kcd_exit_on_error(action) \
575 do { \
576 if (KERN_SUCCESS != (error = (action))) { \
577 STACKSHOT_TRACE(error); \
578 if (error == KERN_RESOURCE_SHORTAGE) { \
579 error = KERN_INSUFFICIENT_BUFFER_SIZE; \
580 } \
581 goto error_exit; \
582 } \
583 } while (0); /* end kcd_exit_on_error */
584
585 #if defined(__arm64__)
586 #define loop_wait_noguard() __builtin_arm_wfe()
587 #elif defined(__x86_64__)
588 #define loop_wait_noguard() __builtin_ia32_pause()
589 #else
590 #define loop_wait_noguard()
591 #endif /* __x86_64__ */
592
593 #define loop_wait() { loop_wait_noguard(); stackshot_panic_guard(); }
594
595 static inline void stackshot_panic_guard(void);
596
597 static __attribute__((noreturn, noinline)) void
stackshot_panic_spin(void)598 stackshot_panic_spin(void)
599 {
600 if (stackshot_cpu_ctx.scc_can_work) {
601 stackshot_cpu_ctx.scc_can_work = false;
602 os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
603 }
604 if (stackshot_ctx.sc_calling_cpuid == cpu_number()) {
605 while (os_atomic_load(&stackshot_ctx.sc_cpus_working, acquire) != 0) {
606 loop_wait_noguard();
607 }
608 panic_stackshot_release_lock();
609 }
610 while (1) {
611 loop_wait_noguard();
612 }
613 }
614
615 /**
616 * Immediately aborts if another CPU panicked during the stackshot.
617 */
618 static inline void
stackshot_panic_guard(void)619 stackshot_panic_guard(void)
620 {
621 if (__improbable(os_atomic_load(&stackshot_ctx.sc_state, relaxed) == SS_PANICKED)) {
622 stackshot_panic_spin();
623 }
624 }
625
626 /*
627 * Signal that we panicked during a stackshot by setting an atomic flag and
628 * waiting for others to coalesce before continuing the panic. Other CPUs will
629 * spin on this as soon as they see it set in order to prevent multiple
630 * concurrent panics. The calling CPU (i.e. the one holding the debugger lock)
631 * will release it for us in `stackshot_panic_spin` so we can continue
632 * panicking.
633 *
634 * This is called from panic_trap_to_debugger.
635 */
636 void
stackshot_cpu_signal_panic(void)637 stackshot_cpu_signal_panic(void)
638 {
639 stackshot_state_t o_state;
640 if (stackshot_active()) {
641 /* Check if someone else panicked before we did. */
642 o_state = os_atomic_xchg(&stackshot_ctx.sc_state, SS_PANICKED, seq_cst);
643 if (o_state == SS_PANICKED) {
644 stackshot_panic_spin();
645 }
646
647 /* We're the first CPU to panic - wait for everyone to coalesce. */
648 if (stackshot_cpu_ctx.scc_can_work) {
649 stackshot_cpu_ctx.scc_can_work = false;
650 os_atomic_dec(&stackshot_ctx.sc_cpus_working, acquire);
651 }
652 while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
653 loop_wait_noguard();
654 }
655 }
656 }
657
658 /*
659 * Sets the stackshot state to SS_ERRORED along with the error code.
660 * Only works if the current state is SS_RUNNING or SS_SETUP.
661 */
662 static inline void
stackshot_set_error(kern_return_t error)663 stackshot_set_error(kern_return_t error)
664 {
665 stackshot_state_t cur_state;
666 stackshot_state_t err_state = SS_MKERR(error);
667 if (__improbable(!os_atomic_cmpxchgv(&stackshot_ctx.sc_state, SS_RUNNING, err_state, &cur_state, seq_cst))) {
668 if (cur_state == SS_SETUP) {
669 os_atomic_cmpxchg(&stackshot_ctx.sc_state, SS_SETUP, err_state, seq_cst);
670 } else {
671 /* Our state is something other than SS_RUNNING or SS_SETUP... Check for panic. */
672 stackshot_panic_guard();
673 }
674 }
675 }
676
677 /* Returns an error code if the current stackshot context has errored out.
678 * Also functions as a panic guard.
679 */
680 __result_use_check
681 static inline kern_return_t
stackshot_status_check(void)682 stackshot_status_check(void)
683 {
684 stackshot_state_t state = os_atomic_load(&stackshot_ctx.sc_state, relaxed);
685
686 /* Check for panic */
687 if (__improbable(SS_STATE(state) == SS_PANICKED)) {
688 stackshot_panic_spin();
689 }
690
691 /* Check for error */
692 if (__improbable(SS_STATE(state) == SS_ERRORED)) {
693 kern_return_t err = SS_ERRCODE(state);
694 assert(err != KERN_SUCCESS); /* SS_ERRORED should always store an associated error code. */
695 return err;
696 }
697
698 return KERN_SUCCESS;
699 }
700
701 #pragma mark ---Stackshot Tracing---
702
703 #if STACKSHOT_COLLECTS_DIAGNOSTICS
704 static void
stackshot_trace(int line_no,mach_vm_address_t data)705 stackshot_trace(int line_no, mach_vm_address_t data)
706 {
707 struct stackshot_trace_buffer *buffer = PERCPU_GET(stackshot_trace_buffer);
708 buffer->sstb_entries[buffer->sstb_tail_idx] = (struct stackshot_trace_entry) {
709 .sste_line_no = line_no,
710 .sste_timestamp = mach_continuous_time(),
711 .sste_data = data
712 };
713 buffer->sstb_tail_idx = (buffer->sstb_tail_idx + 1) % STACKSHOT_DEBUG_TRACEBUF_SIZE;
714 buffer->sstb_size = MIN(buffer->sstb_size + 1, STACKSHOT_DEBUG_TRACEBUF_SIZE);
715 }
716 #define STACKSHOT_TRACE(data) stackshot_trace(__LINE__, (mach_vm_address_t) (data))
717
718 #else /* STACKSHOT_COLLECTS_DIAGNOSTICS */
719 #define STACKSHOT_TRACE(data) ((void) data)
720 #endif /* !STACKSHOT_COLLECTS_DIAGNOSTICS */
721
722 #pragma mark ---Stackshot Buffer Management---
723
724 #define freelist_lock(buffer) while(!os_atomic_cmpxchg(&buffer->ssb_freelist_lock, 0, 1, acquire)) { loop_wait(); }
725 #define freelist_unlock(buffer) os_atomic_store(&buffer->ssb_freelist_lock, 0, release);
726
727 /**
728 * Allocates some data from the shared stackshot buffer freelist.
729 * This should not be used directly, it is a last resort if we run out of space.
730 */
731 static void *
stackshot_freelist_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)732 stackshot_freelist_alloc(
733 size_t size,
734 struct stackshot_buffer *buffer,
735 kern_return_t *error)
736 {
737 struct freelist_entry **cur_freelist, **best_freelist = NULL, *ret = NULL;
738
739 freelist_lock(buffer);
740
741 cur_freelist = &buffer->ssb_freelist;
742
743 while (*cur_freelist != NULL) {
744 if (((*cur_freelist)->fl_size >= size) && ((best_freelist == NULL) || ((*best_freelist)->fl_size > (*cur_freelist)->fl_size))) {
745 best_freelist = cur_freelist;
746 if ((*best_freelist)->fl_size == size) {
747 break;
748 }
749 }
750 cur_freelist = &((*cur_freelist)->fl_next);
751 }
752
753 /* If we found a freelist entry, update the freelist */
754 if (best_freelist != NULL) {
755 os_atomic_sub(&buffer->ssb_overhead, size, relaxed);
756 ret = *best_freelist;
757
758 /* If there's enough unused space at the end of this entry, we should make a new one */
759 if (((*best_freelist)->fl_size - size) > sizeof(struct freelist_entry)) {
760 struct freelist_entry *new_freelist = (struct freelist_entry*) ((mach_vm_address_t) *best_freelist + size);
761 *new_freelist = (struct freelist_entry) {
762 .fl_next = (*best_freelist)->fl_next,
763 .fl_size = (*best_freelist)->fl_size - size
764 };
765 (*best_freelist)->fl_next = new_freelist;
766 }
767
768 /* Update previous entry with next or new entry */
769 *best_freelist = (*best_freelist)->fl_next;
770 }
771
772 freelist_unlock(buffer);
773
774 if (error != NULL) {
775 if (ret == NULL) {
776 *error = KERN_INSUFFICIENT_BUFFER_SIZE;
777 } else {
778 *error = KERN_SUCCESS;
779 }
780 }
781
782 return ret;
783 }
784
785 /**
786 * Allocates some data from the shared stackshot buffer.
787 * Should not be used directly - see the `stackshot_alloc` and
788 * `stackshot_alloc_arr` macros.
789 */
790 static void *
stackshot_buffer_alloc(size_t size,struct stackshot_buffer * buffer,kern_return_t * error)791 stackshot_buffer_alloc(
792 size_t size,
793 struct stackshot_buffer *buffer,
794 kern_return_t *error)
795 {
796 size_t o_used, new_used;
797
798 stackshot_panic_guard();
799 assert(!stackshot_ctx.sc_is_singlethreaded);
800
801 os_atomic_rmw_loop(&buffer->ssb_used, o_used, new_used, relaxed, {
802 new_used = o_used + size;
803 if (new_used > buffer->ssb_size) {
804 os_atomic_rmw_loop_give_up(return stackshot_freelist_alloc(size, buffer, error));
805 }
806 });
807
808 if (error != NULL) {
809 *error = KERN_SUCCESS;
810 }
811
812 return (void*) ((mach_vm_address_t) buffer->ssb_ptr + o_used);
813 }
814
815 /**
816 * Finds the best stackshot buffer to use (prefer our cluster's buffer)
817 * and allocates from it.
818 * Should not be used directly - see the `stackshot_alloc` and
819 * `stackshot_alloc_arr` macros.
820 */
821 __result_use_check
822 static void *
stackshot_best_buffer_alloc(size_t size,kern_return_t * error)823 stackshot_best_buffer_alloc(size_t size, kern_return_t *error)
824 {
825 #if defined(__AMP__)
826 kern_return_t err;
827 int my_cluster;
828 void *ret = NULL;
829 #endif /* __AMP__ */
830
831 #if STACKSHOT_COLLECTS_LATENCY_INFO
832 stackshot_cpu_latency.total_buf += size;
833 #endif
834
835 #if defined(__AMP__)
836 /* First, try our cluster's buffer */
837 my_cluster = cpu_cluster_id();
838 ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[my_cluster], &err);
839
840 /* Try other buffers now. */
841 if (err != KERN_SUCCESS) {
842 for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
843 if (buf_idx == my_cluster) {
844 continue;
845 }
846
847 ret = stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[buf_idx], &err);
848 if (err == KERN_SUCCESS) {
849 #if STACKSHOT_COLLECTS_LATENCY_INFO
850 stackshot_cpu_latency.intercluster_buf_used += size;
851 #endif
852 break;
853 }
854 }
855 }
856
857 if (error != NULL) {
858 *error = err;
859 }
860
861 return ret;
862 #else /* __AMP__ */
863 return stackshot_buffer_alloc(size, &stackshot_ctx.sc_buffers[0], error);
864 #endif /* !__AMP__ */
865 }
866
867 /**
868 * Frees some data from the shared stackshot buffer and adds it to the freelist.
869 */
870 static void
stackshot_buffer_free(void * ptr,struct stackshot_buffer * buffer,size_t size)871 stackshot_buffer_free(
872 void *ptr,
873 struct stackshot_buffer *buffer,
874 size_t size)
875 {
876 stackshot_panic_guard();
877
878 /* This should never be called during a singlethreaded stackshot. */
879 assert(!stackshot_ctx.sc_is_singlethreaded);
880
881 os_atomic_add(&buffer->ssb_overhead, size, relaxed);
882
883 /* Make sure we have enough space for the freelist entry */
884 if (size < sizeof(struct freelist_entry)) {
885 return;
886 }
887
888 freelist_lock(buffer);
889
890 /* Create new freelist entry and push it to the front of the list */
891 *((struct freelist_entry*) ptr) = (struct freelist_entry) {
892 .fl_size = size,
893 .fl_next = buffer->ssb_freelist
894 };
895 buffer->ssb_freelist = ptr;
896
897 freelist_unlock(buffer);
898 }
899
900 /**
901 * Allocates some data from the stackshot buffer. Uses the bump allocator in
902 * multithreaded mode and endalloc in singlethreaded.
903 * err must ALWAYS be nonnull.
904 * Should not be used directly - see the macros in kern_stackshot.h.
905 */
906 void *
stackshot_alloc_with_size(size_t size,kern_return_t * err)907 stackshot_alloc_with_size(size_t size, kern_return_t *err)
908 {
909 void *ptr;
910 assert(err != NULL);
911 assert(stackshot_active());
912
913 stackshot_panic_guard();
914
915 if (stackshot_ctx.sc_is_singlethreaded) {
916 ptr = kcdata_endalloc(stackshot_kcdata_p, size);
917 if (ptr == NULL) {
918 *err = KERN_INSUFFICIENT_BUFFER_SIZE;
919 }
920 } else {
921 ptr = stackshot_best_buffer_alloc(size, err);
922 if (ptr == NULL) {
923 /* We should always return an error if we return a null ptr */
924 assert3u(*err, !=, KERN_SUCCESS);
925 }
926 }
927
928 return ptr;
929 }
930
931 /**
932 * Initializes a new kcdata buffer somewhere in a linked kcdata list.
933 * Allocates a buffer for the kcdata from the shared stackshot buffer.
934 *
935 * See `linked_kcdata_alloc_callback` for the implementation details of
936 * linked kcdata for stackshot.
937 */
938 __result_use_check
939 static kern_return_t
linked_kcdata_init(linked_kcdata_descriptor_t descriptor,size_t min_size,unsigned int data_type,unsigned int flags)940 linked_kcdata_init(
941 linked_kcdata_descriptor_t descriptor,
942 size_t min_size,
943 unsigned int data_type,
944 unsigned int flags)
945 {
946 void *buf_ptr;
947 kern_return_t error;
948 size_t buf_size = MAX(min_size, stackshot_ctx.sc_min_kcdata_size);
949
950 buf_ptr = stackshot_alloc_arr(uint8_t, buf_size, &error);
951 if (error != KERN_SUCCESS) {
952 return error;
953 }
954
955 error = kcdata_memory_static_init(&descriptor->kcdata, (mach_vm_address_t) buf_ptr, data_type, buf_size, flags);
956 if (error != KERN_SUCCESS) {
957 return error;
958 }
959
960 descriptor->kcdata.kcd_alloc_callback = linked_kcdata_alloc_callback;
961
962 return KERN_SUCCESS;
963 }
964
965 static void
stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)966 stackshot_kcdata_free_unused(kcdata_descriptor_t descriptor)
967 {
968 /*
969 * If we have free space at the end of the kcdata, we can add it to the
970 * freelist. We always add to *our* cluster's freelist, no matter where
971 * the data was originally allocated.
972 *
973 * Important Note: We do not use kcdata_memory_get_used_bytes here because
974 * that includes extra space for the end tag (which we do not care about).
975 */
976 int buffer;
977 size_t used_size = descriptor->kcd_addr_end - descriptor->kcd_addr_begin;
978 size_t free_size = (descriptor->kcd_length - used_size);
979 if (free_size > 0) {
980 #if defined(__arm64__)
981 buffer = cpu_cluster_id();
982 #else /* __arm64__ */
983 buffer = 0;
984 #endif /* !__arm64__ */
985 stackshot_buffer_free((void*) descriptor->kcd_addr_end, &stackshot_ctx.sc_buffers[buffer], free_size);
986 descriptor->kcd_length = used_size;
987 }
988 }
989
990 /**
991 * The callback for linked kcdata, which is called when one of the kcdata
992 * buffers runs out of space. This allocates a new kcdata descriptor &
993 * buffer in the linked list and sets it up.
994 *
995 * When kcdata calls this callback, it takes the returned descriptor
996 * and copies it to its own descriptor (which will be the per-cpu kcdata
997 * descriptor, in the case of stackshot).
998 *
999 * --- Stackshot linked kcdata details ---
1000 * The way stackshot allocates kcdata buffers (in a non-panic context) is via
1001 * a basic bump allocator (see `stackshot_buffer_alloc`) and a linked list of
1002 * kcdata structures. The kcdata are allocated with a reasonable size based on
1003 * some system heuristics (or more if whatever is being pushed into the buffer
1004 * is larger). When the current kcdata buffer runs out of space, it calls this
1005 * callback, which allocates a new linked kcdata object at the tail of the
1006 * current list.
1007 *
1008 * The per-cpu `stackshot_kcdata_p` descriptor is the "tail" of the list, but
1009 * is not actually part of the linked list (this simplified implementation,
1010 * since it didn't require changing every kcdata call & a bunch of
1011 * kcdata code, since the current in-use descriptor is always in the same place
1012 * this way). When it is filled up and this callback is called, the
1013 * `stackshot_kcdata_p` descriptor is copied to the *actual* tail of the list
1014 * (in stackshot_cpu_ctx.scc_kcdata_tail), and a new linked kcdata struct is
1015 * allocated at the tail.
1016 */
1017 static kcdata_descriptor_t
linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor,size_t min_size)1018 linked_kcdata_alloc_callback(kcdata_descriptor_t descriptor, size_t min_size)
1019 {
1020 kern_return_t error;
1021 linked_kcdata_descriptor_t new_kcdata = NULL;
1022
1023 /* This callback should ALWAYS be coming from our per-cpu kcdata. If not, something has gone horribly wrong.*/
1024 stackshot_panic_guard();
1025 assert(descriptor == stackshot_kcdata_p);
1026
1027 /* Free the unused space in the buffer and copy it to the tail of the linked kcdata list. */
1028 stackshot_kcdata_free_unused(descriptor);
1029 stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *descriptor;
1030
1031 /* Allocate another linked_kcdata and initialize it. */
1032 new_kcdata = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1033 if (error != KERN_SUCCESS) {
1034 return NULL;
1035 }
1036
1037 /* It doesn't matter what we mark the data type as - we're throwing it away when weave the data together anyway. */
1038 error = linked_kcdata_init(new_kcdata, min_size, KCDATA_BUFFER_BEGIN_STACKSHOT, descriptor->kcd_flags);
1039 if (error != KERN_SUCCESS) {
1040 return NULL;
1041 }
1042
1043 bzero(descriptor, sizeof(struct kcdata_descriptor));
1044 stackshot_cpu_ctx.scc_kcdata_tail->next = new_kcdata;
1045 stackshot_cpu_ctx.scc_kcdata_tail = new_kcdata;
1046
1047 return &new_kcdata->kcdata;
1048 }
1049
1050 /**
1051 * Allocates a new linked kcdata list for the current CPU and sets it up.
1052 * If there was a previous linked kcdata descriptor, you should call
1053 * `stackshot_finalize_linked_kcdata` first, or otherwise save it somewhere.
1054 */
1055 __result_use_check
1056 static kern_return_t
stackshot_new_linked_kcdata(void)1057 stackshot_new_linked_kcdata(void)
1058 {
1059 kern_return_t error;
1060
1061 stackshot_panic_guard();
1062 assert(!stackshot_ctx.sc_panic_stackshot);
1063
1064 stackshot_cpu_ctx.scc_kcdata_head = stackshot_alloc(struct linked_kcdata_descriptor, &error);
1065 if (error != KERN_SUCCESS) {
1066 return error;
1067 }
1068
1069 kcd_exit_on_error(linked_kcdata_init(stackshot_cpu_ctx.scc_kcdata_head, 0,
1070 KCDATA_BUFFER_BEGIN_STACKSHOT,
1071 KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER | KCFLAG_ALLOC_CALLBACK));
1072
1073 stackshot_cpu_ctx.scc_kcdata_tail = stackshot_cpu_ctx.scc_kcdata_head;
1074 *stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_head->kcdata;
1075
1076 error_exit:
1077 return error;
1078 }
1079
1080 /**
1081 * Finalizes the current linked kcdata structure for the CPU by updating the
1082 * tail of the list with the per-cpu kcdata descriptor.
1083 */
1084 static void
stackshot_finalize_linked_kcdata(void)1085 stackshot_finalize_linked_kcdata(void)
1086 {
1087 stackshot_panic_guard();
1088 assert(!stackshot_ctx.sc_panic_stackshot);
1089 stackshot_kcdata_free_unused(stackshot_kcdata_p);
1090 if (stackshot_cpu_ctx.scc_kcdata_tail != NULL) {
1091 stackshot_cpu_ctx.scc_kcdata_tail->kcdata = *stackshot_kcdata_p;
1092 }
1093 *stackshot_kcdata_p = (struct kcdata_descriptor){};
1094 }
1095
1096 /*
1097 * Initialize the mutex governing access to the stack snapshot subsystem
1098 * and other stackshot related bits.
1099 */
1100 __private_extern__ void
stackshot_init(void)1101 stackshot_init(void)
1102 {
1103 mach_timebase_info_data_t timebase;
1104
1105 clock_timebase_info(&timebase);
1106 stackshot_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
1107
1108 max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
1109
1110 PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
1111 }
1112
1113 /*
1114 * Called with interrupts disabled after stackshot context has been
1115 * initialized.
1116 */
1117 static kern_return_t
stackshot_trap(void)1118 stackshot_trap(void)
1119 {
1120 kern_return_t rv;
1121
1122 #if defined(__x86_64__)
1123 /*
1124 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
1125 * operation, it's essential to apply mutual exclusion to the other when one
1126 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
1127 * capture CPUs.
1128 *
1129 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
1130 * allowed, so we check to ensure there there is no rendezvous in progress before
1131 * trying to grab the lock (if there is, a deadlock will occur when we try to
1132 * grab the lock). This is accomplished by setting cpu_rendezvous_in_progress to
1133 * TRUE in the mp rendezvous action function. If stackshot_trap() is called by
1134 * a subordinate of the call chain within the mp rendezvous action, this flag will
1135 * be set and can be used to detect the inevitable deadlock that would occur
1136 * if this thread tried to grab the rendezvous lock.
1137 */
1138
1139 if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
1140 panic("Calling stackshot from a rendezvous is not allowed!");
1141 }
1142
1143 mp_rendezvous_lock();
1144 #endif
1145
1146 stackshot_stats.last_abs_start = mach_absolute_time();
1147 stackshot_stats.last_abs_end = 0;
1148
1149 rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0, NULL);
1150
1151 stackshot_stats.last_abs_end = mach_absolute_time();
1152 stackshot_stats.stackshots_taken++;
1153 stackshot_stats.stackshots_duration += (stackshot_stats.last_abs_end - stackshot_stats.last_abs_start);
1154
1155 #if defined(__x86_64__)
1156 mp_rendezvous_unlock();
1157 #endif
1158 return rv;
1159 }
1160
1161 extern void stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration);
1162 void
stackshot_get_timing(uint64_t * last_abs_start,uint64_t * last_abs_end,uint64_t * count,uint64_t * total_duration)1163 stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration)
1164 {
1165 STACKSHOT_SUBSYS_LOCK();
1166 *last_abs_start = stackshot_stats.last_abs_start;
1167 *last_abs_end = stackshot_stats.last_abs_end;
1168 *count = stackshot_stats.stackshots_taken;
1169 *total_duration = stackshot_stats.stackshots_duration;
1170 STACKSHOT_SUBSYS_UNLOCK();
1171 }
1172
1173 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)1174 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
1175 {
1176 kern_return_t error = KERN_SUCCESS;
1177 boolean_t istate;
1178 struct kdp_snapshot_args args;
1179
1180 args = (struct kdp_snapshot_args) {
1181 .pid = pid,
1182 .buffer = buf,
1183 .buffer_size = size,
1184 .flags = flags,
1185 .since_timestamp = delta_since_timestamp,
1186 .pagetable_mask = pagetable_mask
1187 };
1188
1189 #if DEVELOPMENT || DEBUG
1190 if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
1191 return KERN_NOT_SUPPORTED;
1192 }
1193 #endif
1194 if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
1195 return KERN_INVALID_ARGUMENT;
1196 }
1197
1198 /* zero caller's buffer to match KMA_ZERO in other path */
1199 bzero(buf, size);
1200
1201 /* cap in individual stackshot to max_tracebuf_size */
1202 if (size > max_tracebuf_size) {
1203 size = max_tracebuf_size;
1204 }
1205
1206 /* Serialize tracing */
1207 if (flags & STACKSHOT_TRYLOCK) {
1208 if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
1209 return KERN_LOCK_OWNED;
1210 }
1211 } else {
1212 STACKSHOT_SUBSYS_LOCK();
1213 }
1214
1215 #if CONFIG_EXCLAVES
1216 assert(!stackshot_exclave_inspect_ctids);
1217 #endif
1218
1219 stackshot_initial_estimate = 0;
1220 stackshot_duration_prior_abs = 0;
1221 stackshot_duration_outer = NULL;
1222
1223 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
1224 flags, size, pid, delta_since_timestamp);
1225
1226 /* Prepare the compressor for a stackshot */
1227 error = vm_compressor_kdp_init();
1228 if (error != KERN_SUCCESS) {
1229 return error;
1230 }
1231
1232 #if STACKSHOT_COLLECTS_RDAR_126582377_DATA
1233 // Opportunistically collect reports of the rdar://126582377 failure.
1234 // If the allocation doesn't succeed, or if another CPU "steals" the
1235 // allocated event first, that is acceptable.
1236 ca_event_t new_event = CA_EVENT_ALLOCATE_FLAGS(bad_stackshot_upper16, Z_NOWAIT);
1237 if (new_event) {
1238 if (os_atomic_cmpxchg(&rdar_126582377_event, NULL, new_event, relaxed) == 0) {
1239 // Already set up, so free it
1240 CA_EVENT_DEALLOCATE(new_event);
1241 }
1242 }
1243 #endif
1244
1245 istate = ml_set_interrupts_enabled(FALSE);
1246 uint64_t time_start = mach_absolute_time();
1247
1248 /* Emit a SOCD tracepoint that we are initiating a stackshot */
1249 SOCD_TRACE_XNU_START(STACKSHOT);
1250
1251 /* Preload trace parameters*/
1252 error = kdp_snapshot_preflight_internal(args);
1253
1254 /*
1255 * Trap to the debugger to obtain a coherent stack snapshot; this populates
1256 * the trace buffer
1257 */
1258 if (error == KERN_SUCCESS) {
1259 error = stackshot_trap();
1260 }
1261
1262 uint64_t time_end = mach_absolute_time();
1263
1264 /* Emit a SOCD tracepoint that we have completed the stackshot */
1265 SOCD_TRACE_XNU_END(STACKSHOT);
1266
1267 ml_set_interrupts_enabled(istate);
1268
1269 #if CONFIG_EXCLAVES
1270 /* stackshot trap should only finish successfully or with no pending Exclave threads */
1271 assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
1272 #endif
1273
1274 /*
1275 * Stackshot is no longer active.
1276 * (We have to do this here for the special interrupt disable timeout case to work)
1277 */
1278 os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
1279
1280 /* Release kdp compressor buffers */
1281 vm_compressor_kdp_teardown();
1282
1283 /* Collect multithreaded kcdata into one finalized buffer */
1284 if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
1285 error = stackshot_collect_kcdata();
1286 }
1287
1288 #if CONFIG_EXCLAVES
1289 if (error == KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
1290 error = collect_exclave_threads(flags);
1291 }
1292 #endif /* CONFIG_EXCLAVES */
1293
1294 if (error == KERN_SUCCESS) {
1295 if (!stackshot_ctx.sc_is_singlethreaded) {
1296 error = stackshot_finalize_kcdata();
1297 } else {
1298 error = stackshot_finalize_singlethreaded_kcdata();
1299 }
1300 }
1301
1302 if (stackshot_duration_outer) {
1303 *stackshot_duration_outer = time_end - time_start;
1304 }
1305 *bytes_traced = kdp_stack_snapshot_bytes_traced();
1306
1307 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
1308 error, (time_end - time_start), size, *bytes_traced);
1309
1310 STACKSHOT_SUBSYS_UNLOCK();
1311 return error;
1312 }
1313
1314 #if CONFIG_TELEMETRY
1315 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)1316 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
1317 {
1318 int error = KERN_FAILURE;
1319 uint32_t bytes_traced = 0;
1320
1321 /*
1322 * "Flags" is actually treated as an enumeration, make sure only one value
1323 * is passed at a time.
1324 */
1325 bool set_mark = flags & STACKSHOT_SET_MICROSTACKSHOT_MARK;
1326 flags &= ~STACKSHOT_SET_MICROSTACKSHOT_MARK;
1327 if (__builtin_popcount(flags) != 1) {
1328 return KERN_INVALID_ARGUMENT;
1329 }
1330
1331 /*
1332 * Ensure that there's space to copyout to.
1333 */
1334 if (tracebuf == USER_ADDR_NULL || tracebuf_size == 0) {
1335 return KERN_INVALID_ARGUMENT;
1336 }
1337
1338 STACKSHOT_SUBSYS_LOCK();
1339
1340 switch (flags) {
1341 case STACKSHOT_GET_KERNEL_MICROSTACKSHOT:
1342 /*
1343 * Kernel samples consume from their buffer, so using a mark is the only
1344 * allowed option.
1345 */
1346 if (!set_mark) {
1347 error = KERN_INVALID_ARGUMENT;
1348 break;
1349 }
1350 bytes_traced = tracebuf_size;
1351 error = telemetry_kernel_gather(tracebuf, &bytes_traced);
1352 *retval = (int)bytes_traced;
1353 break;
1354 case STACKSHOT_GET_MICROSTACKSHOT: {
1355 if (tracebuf_size > max_tracebuf_size) {
1356 error = KERN_INVALID_ARGUMENT;
1357 break;
1358 }
1359
1360 bytes_traced = tracebuf_size;
1361 error = telemetry_gather(tracebuf, &bytes_traced, set_mark);
1362 *retval = (int)bytes_traced;
1363 break;
1364 }
1365 default:
1366 error = KERN_NOT_SUPPORTED;
1367 break;
1368 }
1369
1370 STACKSHOT_SUBSYS_UNLOCK();
1371 return error;
1372 }
1373 #endif /* CONFIG_TELEMETRY */
1374
1375 /**
1376 * Grabs the next work item from the stackshot work queue.
1377 */
1378 static struct stackshot_workitem *
stackshot_get_workitem(struct stackshot_workqueue * queue)1379 stackshot_get_workitem(struct stackshot_workqueue *queue)
1380 {
1381 uint32_t old_count, new_count;
1382
1383 /* note: this relies on give_up not performing the write, just bailing out immediately */
1384 os_atomic_rmw_loop(&queue->sswq_cur_item, old_count, new_count, acq_rel, {
1385 if (old_count >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1386 os_atomic_rmw_loop_give_up(return NULL);
1387 }
1388 new_count = old_count + 1;
1389 });
1390
1391 return &queue->sswq_items[old_count];
1392 };
1393
1394 /**
1395 * Puts an item on the appropriate stackshot work queue.
1396 * We don't need the lock for this, but only because it's
1397 * only called by one writer..
1398 *
1399 * @returns
1400 * true if the item fit in the queue, false if not.
1401 */
1402 static kern_return_t
stackshot_put_workitem(struct stackshot_workitem item)1403 stackshot_put_workitem(struct stackshot_workitem item)
1404 {
1405 struct stackshot_workqueue *queue;
1406
1407 /* Put in higher queue if task has more threads, with highest queue having >= STACKSHOT_HARDEST_THREADCOUNT threads */
1408 size_t queue_idx = ((item.sswi_task->thread_count * (STACKSHOT_NUM_WORKQUEUES - 1)) / STACKSHOT_HARDEST_THREADCOUNT);
1409 queue_idx = MIN(queue_idx, STACKSHOT_NUM_WORKQUEUES - 1);
1410
1411 queue = &stackshot_ctx.sc_workqueues[queue_idx];
1412
1413 size_t num_items = os_atomic_load(&queue->sswq_num_items, relaxed);
1414
1415 if (num_items >= queue->sswq_capacity) {
1416 return KERN_INSUFFICIENT_BUFFER_SIZE;
1417 }
1418
1419 queue->sswq_items[num_items] = item;
1420 os_atomic_inc(&queue->sswq_num_items, release);
1421
1422 return KERN_SUCCESS;
1423 }
1424
1425 #define calc_num_linked_kcdata_frames(size, kcdata_size) (1 + ((size) - 1) / (kcdata_size))
1426 #define calc_linked_kcdata_size(size, kcdata_size) (calc_num_linked_kcdata_frames((size), (kcdata_size)) * ((kcdata_size) + sizeof(struct linked_kcdata_descriptor)))
1427
1428 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
1429 #define TASK_SHARED_CACHE_AVG_SIZE (128) /* Average space consumed by task shared cache info */
1430 #define sizeof_if_traceflag(a, flag) (((trace_flags & (flag)) != 0) ? sizeof(a) : 0)
1431
1432 #define FUDGED_SIZE(size, adj) (((size) * ((adj) + 100)) / 100)
1433
1434 /*
1435 * Return the estimated size of a single task (including threads)
1436 * in a stackshot with the given flags.
1437 */
1438 static uint32_t
get_stackshot_est_tasksize(uint64_t trace_flags)1439 get_stackshot_est_tasksize(uint64_t trace_flags)
1440 {
1441 size_t total_size;
1442 size_t threads_per_task = (((threads_count + terminated_threads_count) - 1) / (tasks_count + terminated_tasks_count)) + 1;
1443 size_t est_thread_size = sizeof(struct thread_snapshot_v4) + 42 * sizeof(uintptr_t);
1444 size_t est_task_size = sizeof(struct task_snapshot_v2) +
1445 TASK_UUID_AVG_SIZE +
1446 TASK_SHARED_CACHE_AVG_SIZE +
1447 sizeof_if_traceflag(struct io_stats_snapshot, STACKSHOT_INSTRS_CYCLES) +
1448 sizeof_if_traceflag(uint32_t, STACKSHOT_ASID) +
1449 sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ, STACKSHOT_PAGE_TABLES) +
1450 sizeof_if_traceflag(struct instrs_cycles_snapshot_v2, STACKSHOT_INSTRS_CYCLES) +
1451 sizeof(struct stackshot_cpu_architecture) +
1452 sizeof(struct stackshot_task_codesigning_info);
1453
1454 #if STACKSHOT_COLLECTS_LATENCY_INFO
1455 if (collect_latency_info) {
1456 est_thread_size += sizeof(struct stackshot_latency_thread);
1457 est_task_size += sizeof(struct stackshot_latency_task);
1458 }
1459 #endif
1460
1461 total_size = est_task_size + threads_per_task * est_thread_size;
1462
1463 return total_size;
1464 }
1465
1466 /*
1467 * Return the estimated size of a stackshot based on the
1468 * number of currently running threads and tasks.
1469 *
1470 * adj is an adjustment in units of percentage
1471 */
1472 static uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj,uint64_t trace_flags,pid_t target_pid)1473 get_stackshot_estsize(
1474 uint32_t prev_size_hint,
1475 uint32_t adj,
1476 uint64_t trace_flags,
1477 pid_t target_pid)
1478 {
1479 vm_size_t thread_and_task_total;
1480 uint64_t size;
1481 uint32_t estimated_size;
1482 bool process_scoped = ((target_pid != -1) && ((trace_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0));
1483
1484 /*
1485 * We use the estimated task size (with a fudge factor) as the default
1486 * linked kcdata buffer size in an effort to reduce overhead (ideally, we want
1487 * each task to only need a single kcdata buffer.)
1488 */
1489 uint32_t est_task_size = get_stackshot_est_tasksize(trace_flags);
1490 uint32_t est_kcdata_size = FUDGED_SIZE(est_task_size, adj);
1491 uint64_t est_preamble_size = calc_linked_kcdata_size(8192 * 4, est_kcdata_size);
1492 uint64_t est_postamble_size = calc_linked_kcdata_size(8192 * 2, est_kcdata_size);
1493 uint64_t est_extra_size = 0;
1494
1495 adj = MIN(adj, 100u); /* no more than double our estimate */
1496
1497 #if STACKSHOT_COLLECTS_LATENCY_INFO
1498 est_extra_size += real_ncpus * sizeof(struct stackshot_latency_cpu);
1499 est_extra_size += sizeof(struct stackshot_latency_collection_v2);
1500 #endif
1501
1502 est_extra_size += real_ncpus * MAX_FRAMES * sizeof(uintptr_t); /* Stacktrace buffers */
1503 est_extra_size += FUDGED_SIZE(tasks_count, 10) * sizeof(uintptr_t) * STACKSHOT_NUM_WORKQUEUES; /* Work queues */
1504 est_extra_size += sizeof_if_traceflag(sizeof(uintptr_t) * STACKSHOT_PAGETABLE_BUFSZ * real_ncpus, STACKSHOT_PAGE_TABLES);
1505
1506 thread_and_task_total = calc_linked_kcdata_size(est_task_size, est_kcdata_size);
1507 if (!process_scoped) {
1508 thread_and_task_total *= tasks_count;
1509 }
1510 size = thread_and_task_total + est_preamble_size + est_postamble_size + est_extra_size; /* estimate */
1511 size = FUDGED_SIZE(size, adj); /* add adj */
1512 size = MAX(size, prev_size_hint); /* allow hint to increase */
1513 size += stackshot_plh_est_size(); /* add space for the port label hash */
1514 size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK)); /* avoid overflow */
1515 estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
1516
1517 return estimated_size;
1518 }
1519
1520 /**
1521 * Copies a linked list of kcdata structures into a final kcdata structure.
1522 * Only used from stackshot_finalize_kcdata.
1523 */
1524 __result_use_check
1525 static kern_return_t
stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata,linked_kcdata_descriptor_t linked_kcdata)1526 stackshot_copy_linked_kcdata(kcdata_descriptor_t final_kcdata, linked_kcdata_descriptor_t linked_kcdata)
1527 {
1528 kern_return_t error = KERN_SUCCESS;
1529
1530 while (linked_kcdata) {
1531 /* Walk linked kcdata list */
1532 kcdata_descriptor_t cur_kcdata = &linked_kcdata->kcdata;
1533 if ((cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin) == 0) {
1534 linked_kcdata = linked_kcdata->next;
1535 continue;
1536 }
1537
1538 /* Every item in the linked kcdata should have a header tag of type KCDATA_BUFFER_BEGIN_STACKSHOT. */
1539 assert(((struct kcdata_item*) cur_kcdata->kcd_addr_begin)->type == KCDATA_BUFFER_BEGIN_STACKSHOT);
1540 assert((final_kcdata->kcd_addr_begin + final_kcdata->kcd_length) > final_kcdata->kcd_addr_end);
1541 size_t header_size = sizeof(kcdata_item_t) + kcdata_calc_padding(sizeof(kcdata_item_t));
1542 size_t size = cur_kcdata->kcd_addr_end - cur_kcdata->kcd_addr_begin - header_size;
1543 size_t free = (final_kcdata->kcd_length + final_kcdata->kcd_addr_begin) - final_kcdata->kcd_addr_end;
1544 if (free < size) {
1545 error = KERN_INSUFFICIENT_BUFFER_SIZE;
1546 goto error_exit;
1547 }
1548
1549 /* Just memcpy the data over (and compress if we need to.) */
1550 kcdata_compression_window_open(final_kcdata);
1551 error = kcdata_memcpy(final_kcdata, final_kcdata->kcd_addr_end, (void*) (cur_kcdata->kcd_addr_begin + header_size), size);
1552 if (error != KERN_SUCCESS) {
1553 goto error_exit;
1554 }
1555 final_kcdata->kcd_addr_end += size;
1556 kcdata_compression_window_close(final_kcdata);
1557
1558 linked_kcdata = linked_kcdata->next;
1559 }
1560
1561 error_exit:
1562 return error;
1563 }
1564
1565 /**
1566 * Copies the duration, latency, and diagnostic info into a final kcdata buffer.
1567 * Only used by stackshot_finalize_kcdata and stackshot_finalize_singlethreaded_kcdata.
1568 */
1569 __result_use_check
1570 static kern_return_t
stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)1571 stackshot_push_duration_and_latency(kcdata_descriptor_t kcdata)
1572 {
1573 kern_return_t error;
1574 mach_vm_address_t out_addr;
1575 bool use_fault_path = ((stackshot_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
1576 #if STACKSHOT_COLLECTS_LATENCY_INFO
1577 size_t buffer_used = 0;
1578 size_t buffer_overhead = 0;
1579 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1580
1581 if (use_fault_path) {
1582 struct stackshot_fault_stats stats = (struct stackshot_fault_stats) {
1583 .sfs_pages_faulted_in = 0,
1584 .sfs_time_spent_faulting = 0,
1585 .sfs_system_max_fault_time = stackshot_max_fault_time,
1586 .sfs_stopped_faulting = false
1587 };
1588 percpu_foreach_base(base) {
1589 struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
1590 if (!cpu_ctx->scc_did_work) {
1591 continue;
1592 }
1593 stats.sfs_pages_faulted_in += cpu_ctx->scc_fault_stats.sfs_pages_faulted_in;
1594 stats.sfs_time_spent_faulting += cpu_ctx->scc_fault_stats.sfs_time_spent_faulting;
1595 stats.sfs_stopped_faulting = stats.sfs_stopped_faulting || cpu_ctx->scc_fault_stats.sfs_stopped_faulting;
1596 }
1597 kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
1598 sizeof(struct stackshot_fault_stats), &stats);
1599 }
1600
1601 #if STACKSHOT_COLLECTS_LATENCY_INFO
1602 int num_working_cpus = 0;
1603 if (collect_latency_info) {
1604 /* Add per-CPU latency info */
1605 percpu_foreach(cpu_ctx, stackshot_cpu_ctx_percpu) {
1606 if (cpu_ctx->scc_did_work) {
1607 num_working_cpus++;
1608 }
1609 }
1610 kcdata_compression_window_open(kcdata);
1611 kcd_exit_on_error(kcdata_get_memory_addr_for_array(
1612 kcdata, STACKSHOT_KCTYPE_LATENCY_INFO_CPU, sizeof(struct stackshot_latency_cpu), num_working_cpus, &out_addr));
1613 percpu_foreach_base(base) {
1614 if (PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu)->scc_did_work) {
1615 kcdata_memcpy(kcdata, out_addr, PERCPU_GET_WITH_BASE(base, stackshot_cpu_latency_percpu),
1616 sizeof(struct stackshot_latency_cpu));
1617 out_addr += sizeof(struct stackshot_latency_cpu);
1618 }
1619 }
1620 kcd_exit_on_error(kcdata_compression_window_close(kcdata));
1621
1622 /* Add up buffer info */
1623 for (size_t buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
1624 struct stackshot_buffer *buf = &stackshot_ctx.sc_buffers[buf_idx];
1625 buffer_used += os_atomic_load(&buf->ssb_used, relaxed);
1626 buffer_overhead += os_atomic_load(&buf->ssb_overhead, relaxed);
1627 }
1628 stackshot_ctx.sc_latency.buffer_size = stackshot_ctx.sc_args.buffer_size;
1629 stackshot_ctx.sc_latency.buffer_overhead = buffer_overhead;
1630 stackshot_ctx.sc_latency.buffer_used = buffer_used;
1631 stackshot_ctx.sc_latency.buffer_count = stackshot_ctx.sc_num_buffers;
1632
1633 /* Add overall latency info */
1634 kcd_exit_on_error(kcdata_push_data(
1635 kcdata, STACKSHOT_KCTYPE_LATENCY_INFO,
1636 sizeof(stackshot_ctx.sc_latency), &stackshot_ctx.sc_latency));
1637 }
1638 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
1639
1640 if ((stackshot_flags & STACKSHOT_DO_COMPRESS) == 0) {
1641 assert(!stackshot_ctx.sc_panic_stackshot);
1642 kcd_exit_on_error(kcdata_get_memory_addr(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
1643 sizeof(struct stackshot_duration_v2), &out_addr));
1644 struct stackshot_duration_v2 *duration_p = (void *) out_addr;
1645 memcpy(duration_p, &stackshot_ctx.sc_duration, sizeof(*duration_p));
1646 stackshot_duration_outer = (unaligned_u64 *) &duration_p->stackshot_duration_outer;
1647 kcd_exit_on_error(kcdata_add_uint64_with_description(kcdata, stackshot_tries, "stackshot_tries"));
1648 } else {
1649 kcd_exit_on_error(kcdata_push_data(kcdata, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_ctx.sc_duration), &stackshot_ctx.sc_duration));
1650 stackshot_duration_outer = NULL;
1651 }
1652
1653 error_exit:
1654 return error;
1655 }
1656
1657 /**
1658 * Allocates the final kcdata buffer for a mulitithreaded stackshot,
1659 * where all of the per-task kcdata (and exclave kcdata) will end up.
1660 */
1661 __result_use_check
1662 static kern_return_t
stackshot_alloc_final_kcdata(void)1663 stackshot_alloc_final_kcdata(void)
1664 {
1665 vm_offset_t final_kcdata_buffer = 0;
1666 kern_return_t error = KERN_SUCCESS;
1667 uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1668 : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
1669 : KCDATA_BUFFER_BEGIN_STACKSHOT;
1670
1671 if (stackshot_ctx.sc_is_singlethreaded) {
1672 return KERN_SUCCESS;
1673 }
1674
1675 if ((error = kmem_alloc(kernel_map, &final_kcdata_buffer, stackshot_args.buffer_size,
1676 KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
1677 os_log_error(OS_LOG_DEFAULT, "stackshot: final allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, stackshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
1678 return KERN_RESOURCE_SHORTAGE;
1679 }
1680
1681 stackshot_ctx.sc_finalized_kcdata = kcdata_memory_alloc_init(final_kcdata_buffer, hdr_tag,
1682 stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
1683
1684 if (stackshot_ctx.sc_finalized_kcdata == NULL) {
1685 kmem_free(kernel_map, final_kcdata_buffer, stackshot_args.buffer_size);
1686 return KERN_FAILURE;
1687 }
1688
1689 return KERN_SUCCESS;
1690 }
1691
1692 /**
1693 * Frees the final kcdata buffer.
1694 */
1695 static void
stackshot_free_final_kcdata(void)1696 stackshot_free_final_kcdata(void)
1697 {
1698 if (stackshot_ctx.sc_is_singlethreaded || (stackshot_ctx.sc_finalized_kcdata == NULL)) {
1699 return;
1700 }
1701
1702 kmem_free(kernel_map, stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1703 kcdata_memory_destroy(stackshot_ctx.sc_finalized_kcdata);
1704 stackshot_ctx.sc_finalized_kcdata = NULL;
1705 }
1706
1707 /**
1708 * Called once we exit the debugger trap to collate all of the separate linked
1709 * kcdata lists into one kcdata buffer. The calling thread will run this, and
1710 * it is guaranteed that nobody else is touching any stackshot state at this
1711 * point. In the case of a panic stackshot, this is never called since we only
1712 * use one thread.
1713 *
1714 * Called with interrupts enabled, stackshot subsys lock held.
1715 */
1716 __result_use_check
1717 static kern_return_t
stackshot_collect_kcdata(void)1718 stackshot_collect_kcdata(void)
1719 {
1720 kern_return_t error = 0;
1721 uint32_t hdr_tag;
1722
1723 assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1724 LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1725
1726 /* Allocate our final kcdata buffer. */
1727 kcd_exit_on_error(stackshot_alloc_final_kcdata());
1728 assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1729
1730 /* Setup compression if we need it. */
1731 if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
1732 hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1733 : KCDATA_BUFFER_BEGIN_STACKSHOT;
1734 kcd_exit_on_error(kcdata_init_compress(stackshot_ctx.sc_finalized_kcdata, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
1735 }
1736
1737 /* Copy over all of the pre task-iteration kcdata (to preserve order as if it were single-threaded) */
1738 kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_pretask_kcdata));
1739
1740 /* Set each queue's cur_item to 0. */
1741 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1742 os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_cur_item, 0, relaxed);
1743 }
1744
1745 /*
1746 * Iterate over work queue(s) and copy the kcdata in.
1747 */
1748 while (true) {
1749 struct stackshot_workitem *next_item = NULL;
1750 struct stackshot_workqueue *next_queue = NULL;
1751 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
1752 struct stackshot_workqueue *queue = &stackshot_ctx.sc_workqueues[i];
1753 size_t cur_item = os_atomic_load(&queue->sswq_cur_item, relaxed);
1754
1755 /* Check if we're done with this queue */
1756 if (cur_item >= os_atomic_load(&queue->sswq_num_items, relaxed)) {
1757 continue;
1758 }
1759
1760 /* Check if this workitem should come next */
1761 struct stackshot_workitem *item = &queue->sswq_items[cur_item];
1762 if ((next_item == NULL) || (next_item->sswi_idx > item->sswi_idx)) {
1763 next_item = item;
1764 next_queue = queue;
1765 }
1766 }
1767
1768 /* Queues are empty. */
1769 if (next_item == NULL) {
1770 break;
1771 }
1772
1773 assert(next_queue);
1774 assert(next_item->sswi_data != NULL);
1775
1776 os_atomic_inc(&next_queue->sswq_cur_item, relaxed);
1777 kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, next_item->sswi_data));
1778 }
1779
1780 /* Write post-task kcdata */
1781 kcd_exit_on_error(stackshot_copy_linked_kcdata(stackshot_ctx.sc_finalized_kcdata, stackshot_ctx.sc_posttask_kcdata));
1782 error_exit:
1783 if (error != KERN_SUCCESS) {
1784 stackshot_free_final_kcdata();
1785 }
1786 return error;
1787 }
1788
1789
1790 /**
1791 * Called at the very end of stackshot data generation, to write final timing
1792 * data to the kcdata structure and close compression. Only called for
1793 * multi-threaded stackshots; see stackshot_finalize_singlethreaded_kcata for
1794 * single-threaded variant.
1795 *
1796 * Called with interrupts enabled, stackshot subsys lock held.
1797 */
1798 __result_use_check
1799 static kern_return_t
stackshot_finalize_kcdata(void)1800 stackshot_finalize_kcdata(void)
1801 {
1802 kern_return_t error = 0;
1803
1804 assert(!stackshot_ctx.sc_panic_stackshot && !stackshot_ctx.sc_is_singlethreaded);
1805 LCK_MTX_ASSERT(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
1806
1807 assert(stackshot_ctx.sc_finalized_kcdata != NULL);
1808
1809 /* Write stackshot timing info */
1810 kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1811
1812 /* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1813 kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1814 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1815 kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1816
1817 stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1818 stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1819
1820 if (os_atomic_load(&stackshot_ctx.sc_retval, relaxed) == KERN_SUCCESS) {
1821 /* releases and zeros done */
1822 kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1823 }
1824
1825 memcpy(stackshot_args.buffer, (void*) stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin, stackshot_args.buffer_size);
1826
1827 /* Fix duration_outer offset */
1828 if (stackshot_duration_outer != NULL) {
1829 stackshot_duration_outer = (unaligned_u64*) ((mach_vm_address_t) stackshot_args.buffer + ((mach_vm_address_t) stackshot_duration_outer - stackshot_ctx.sc_finalized_kcdata->kcd_addr_begin));
1830 }
1831
1832 error_exit:
1833 stackshot_free_final_kcdata();
1834 return error;
1835 }
1836
1837 /**
1838 * Finalizes the kcdata for a singlethreaded stackshot.
1839 *
1840 * May be called from interrupt/panic context.
1841 */
1842 __result_use_check
1843 static kern_return_t
stackshot_finalize_singlethreaded_kcdata(void)1844 stackshot_finalize_singlethreaded_kcdata(void)
1845 {
1846 kern_return_t error;
1847
1848 assert(stackshot_ctx.sc_is_singlethreaded);
1849
1850 kcd_exit_on_error(stackshot_push_duration_and_latency(stackshot_ctx.sc_finalized_kcdata));
1851 /* Note: exactly 0 or 1 call to something pushing more data can be called after kcd_finalize_compression */
1852 kcd_finalize_compression(stackshot_ctx.sc_finalized_kcdata);
1853 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_ctx.sc_finalized_kcdata, stackshot_flags, "stackshot_out_flags"));
1854 kcd_exit_on_error(kcdata_write_buffer_end(stackshot_ctx.sc_finalized_kcdata));
1855
1856 stackshot_ctx.sc_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_ctx.sc_finalized_kcdata);
1857 stackshot_ctx.sc_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_ctx.sc_finalized_kcdata);
1858
1859 kcd_exit_on_error(kcdata_finish(stackshot_ctx.sc_finalized_kcdata));
1860
1861 if (stackshot_ctx.sc_panic_stackshot) {
1862 *stackshot_args.descriptor = *stackshot_ctx.sc_finalized_kcdata;
1863 }
1864
1865 error_exit:
1866 return error;
1867 }
1868
1869 /*
1870 * stackshot_remap_buffer: Utility function to remap bytes_traced bytes starting at stackshotbuf
1871 * into the current task's user space and subsequently copy out the address
1872 * at which the buffer has been mapped in user space to out_buffer_addr.
1873 *
1874 * Inputs: stackshotbuf - pointer to the original buffer in the kernel's address space
1875 * bytes_traced - length of the buffer to remap starting from stackshotbuf
1876 * out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
1877 * out_size_addr - pointer to be filled in with the size of the buffer
1878 *
1879 * Outputs: ENOSPC if there is not enough free space in the task's address space to remap the buffer
1880 * EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
1881 * an error from copyout
1882 */
1883 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)1884 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
1885 {
1886 int error = 0;
1887 mach_vm_offset_t stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
1888 vm_prot_t cur_prot = VM_PROT_NONE, max_prot = VM_PROT_NONE;
1889
1890 error = mach_vm_remap(current_map(), &stackshotbuf_user_addr, bytes_traced, 0,
1891 VM_FLAGS_ANYWHERE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE,
1892 &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
1893 /*
1894 * If the call to mach_vm_remap fails, we return the appropriate converted error
1895 */
1896 if (error == KERN_SUCCESS) {
1897 /* If the user addr somehow didn't get set, we should make sure that we fail, and (eventually)
1898 * panic on development kernels to find out why
1899 */
1900 if (stackshotbuf_user_addr == (mach_vm_offset_t)NULL) {
1901 #if DEVELOPMENT || DEBUG
1902 os_log_error(OS_LOG_DEFAULT, "stackshot: mach_vm_remap succeeded with NULL\n");
1903 #endif // DEVELOPMENT || DEBUG
1904 return KERN_FAILURE;
1905 }
1906
1907 /*
1908 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
1909 * we just made in the task's user space.
1910 */
1911 error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
1912 if (error != KERN_SUCCESS) {
1913 mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1914 return error;
1915 }
1916 error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
1917 if (error != KERN_SUCCESS) {
1918 mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
1919 return error;
1920 }
1921 }
1922 return error;
1923 }
1924
1925 #if CONFIG_EXCLAVES
1926
1927 static kern_return_t
stackshot_setup_exclave_waitlist(void)1928 stackshot_setup_exclave_waitlist(void)
1929 {
1930 kern_return_t error = KERN_SUCCESS;
1931 size_t exclave_threads_max = exclaves_ipc_buffer_count();
1932 size_t waitlist_size = 0;
1933
1934 assert(!stackshot_exclave_inspect_ctids);
1935
1936 if (exclaves_inspection_is_initialized() && exclave_threads_max) {
1937 if (os_mul_overflow(exclave_threads_max, sizeof(ctid_t), &waitlist_size)) {
1938 error = KERN_INVALID_ARGUMENT;
1939 goto error;
1940 }
1941 stackshot_exclave_inspect_ctids = stackshot_alloc_with_size(waitlist_size, &error);
1942 if (!stackshot_exclave_inspect_ctids) {
1943 goto error;
1944 }
1945 stackshot_exclave_inspect_ctid_count = 0;
1946 stackshot_exclave_inspect_ctid_capacity = exclave_threads_max;
1947 }
1948
1949 error:
1950 return error;
1951 }
1952
1953 static kern_return_t
collect_exclave_threads(uint64_t ss_flags)1954 collect_exclave_threads(uint64_t ss_flags)
1955 {
1956 size_t i;
1957 ctid_t ctid;
1958 thread_t thread;
1959 kern_return_t kr = KERN_SUCCESS;
1960 STACKSHOT_SUBSYS_ASSERT_LOCKED();
1961
1962 lck_mtx_lock(&exclaves_collect_mtx);
1963
1964 if (stackshot_exclave_inspect_ctid_count == 0) {
1965 /* Nothing to do */
1966 goto out;
1967 }
1968
1969 // When asking for ASIDs, make sure we get all exclaves asids and mappings as well
1970 exclaves_stackshot_raw_addresses = (ss_flags & STACKSHOT_ASID);
1971 exclaves_stackshot_all_address_spaces = (ss_flags & (STACKSHOT_ASID | STACKSHOT_EXCLAVES));
1972
1973 /* This error is intentionally ignored: we are now committed to collecting
1974 * these threads, or at least properly waking them. If this fails, the first
1975 * collected thread should also fail to append to the kcdata, and will abort
1976 * further collection, properly clearing the AST and waking these threads.
1977 */
1978 kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_BEGIN,
1979 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
1980
1981 for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
1982 ctid = stackshot_exclave_inspect_ctids[i];
1983 thread = ctid_get_thread(ctid);
1984 assert(thread);
1985 exclaves_inspection_queue_add(&exclaves_inspection_queue_stackshot, &thread->th_exclaves_inspection_queue_stackshot);
1986 }
1987 exclaves_inspection_begin_collecting();
1988 exclaves_inspection_wait_complete(&exclaves_inspection_queue_stackshot);
1989 kr = stackshot_exclave_kr; /* Read the result of work done on our behalf, by collection thread */
1990 if (kr != KERN_SUCCESS) {
1991 goto out;
1992 }
1993
1994 kr = kcdata_add_container_marker(stackshot_ctx.sc_finalized_kcdata, KCDATA_TYPE_CONTAINER_END,
1995 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
1996 if (kr != KERN_SUCCESS) {
1997 goto out;
1998 }
1999 out:
2000 /* clear Exclave buffer now that it's been used */
2001 stackshot_exclave_inspect_ctids = NULL;
2002 stackshot_exclave_inspect_ctid_capacity = 0;
2003 stackshot_exclave_inspect_ctid_count = 0;
2004
2005 lck_mtx_unlock(&exclaves_collect_mtx);
2006 return kr;
2007 }
2008
2009 static kern_return_t
stackshot_exclaves_process_stacktrace(const address_v__opt_s * _Nonnull st,void * kcdata_ptr)2010 stackshot_exclaves_process_stacktrace(const address_v__opt_s *_Nonnull st, void *kcdata_ptr)
2011 {
2012 kern_return_t error = KERN_SUCCESS;
2013 exclave_ecstackentry_addr_t * addr = NULL;
2014 __block size_t count = 0;
2015
2016 if (!st->has_value) {
2017 goto error_exit;
2018 }
2019
2020 address__v_visit(&st->value, ^(size_t __unused i, const stackshottypes_address_s __unused item) {
2021 count++;
2022 });
2023
2024 kcdata_compression_window_open(kcdata_ptr);
2025 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_ECSTACK,
2026 sizeof(exclave_ecstackentry_addr_t), count, (mach_vm_address_t*)&addr));
2027
2028 address__v_visit(&st->value, ^(size_t i, const stackshottypes_address_s item) {
2029 addr[i] = (exclave_ecstackentry_addr_t)item;
2030 });
2031
2032 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2033
2034 error_exit:
2035 return error;
2036 }
2037
2038 static kern_return_t
stackshot_exclaves_process_ipcstackentry(uint64_t index,const stackshottypes_ipcstackentry_s * _Nonnull ise,void * kcdata_ptr)2039 stackshot_exclaves_process_ipcstackentry(uint64_t index, const stackshottypes_ipcstackentry_s *_Nonnull ise, void *kcdata_ptr)
2040 {
2041 kern_return_t error = KERN_SUCCESS;
2042
2043 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2044 STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2045
2046 struct exclave_ipcstackentry_info info = { 0 };
2047 info.eise_asid = ise->asid;
2048
2049 info.eise_tnid = ise->tnid;
2050
2051 if (ise->invocationid.has_value) {
2052 info.eise_flags |= kExclaveIpcStackEntryHaveInvocationID;
2053 info.eise_invocationid = ise->invocationid.value;
2054 } else {
2055 info.eise_invocationid = 0;
2056 }
2057
2058 info.eise_flags |= (ise->stacktrace.has_value ? kExclaveIpcStackEntryHaveStack : 0);
2059
2060 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_INFO, sizeof(struct exclave_ipcstackentry_info), &info));
2061
2062 if (ise->stacktrace.has_value) {
2063 kcd_exit_on_error(stackshot_exclaves_process_stacktrace(&ise->stacktrace, kcdata_ptr));
2064 }
2065
2066 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2067 STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
2068
2069 error_exit:
2070 return error;
2071 }
2072
2073 static kern_return_t
stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s * _Nonnull ipcstack,void * kcdata_ptr)2074 stackshot_exclaves_process_ipcstack(const stackshottypes_ipcstackentry_v__opt_s *_Nonnull ipcstack, void *kcdata_ptr)
2075 {
2076 __block kern_return_t kr = KERN_SUCCESS;
2077
2078 if (!ipcstack->has_value) {
2079 goto error_exit;
2080 }
2081
2082 stackshottypes_ipcstackentry__v_visit(&ipcstack->value, ^(size_t i, const stackshottypes_ipcstackentry_s *_Nonnull item) {
2083 if (kr == KERN_SUCCESS) {
2084 kr = stackshot_exclaves_process_ipcstackentry(i, item, kcdata_ptr);
2085 }
2086 });
2087
2088 error_exit:
2089 return kr;
2090 }
2091
2092 static kern_return_t
stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s * _Nonnull se,void * kcdata_ptr)2093 stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s *_Nonnull se, void *kcdata_ptr)
2094 {
2095 kern_return_t error = KERN_SUCCESS;
2096
2097 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2098 STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2099
2100 struct exclave_scresult_info info = { 0 };
2101 info.esc_id = se->scid;
2102 info.esc_flags = se->ipcstack.has_value ? kExclaveScresultHaveIPCStack : 0;
2103
2104 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_SCRESULT_INFO, sizeof(struct exclave_scresult_info), &info));
2105
2106 if (se->ipcstack.has_value) {
2107 kcd_exit_on_error(stackshot_exclaves_process_ipcstack(&se->ipcstack, kcdata_ptr));
2108 }
2109
2110 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2111 STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
2112
2113 error_exit:
2114 return error;
2115 }
2116
2117 static kern_return_t
stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2118 stackshot_exclaves_process_textlayout_segments(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2119 {
2120 kern_return_t error = KERN_SUCCESS;
2121 __block struct exclave_textlayout_segment_v2 * info = NULL;
2122
2123 __block size_t count = 0;
2124 stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s __unused *_Nonnull item) {
2125 count++;
2126 });
2127
2128 if (!count) {
2129 goto error_exit;
2130 }
2131
2132 kcdata_compression_window_open(kcdata_ptr);
2133 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_SEGMENTS,
2134 sizeof(struct exclave_textlayout_segment_v2), count, (mach_vm_address_t*)&info));
2135
2136 stackshottypes_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshottypes_textsegment_s *_Nonnull item) {
2137 memcpy(&info->layoutSegment_uuid, item->uuid, sizeof(uuid_t));
2138 info->layoutSegment_loadAddress = item->loadaddress;
2139 if (want_raw_addresses) {
2140 info->layoutSegment_rawLoadAddress = item->rawloadaddress.has_value ? item->rawloadaddress.value: 0;
2141 } else {
2142 info->layoutSegment_rawLoadAddress = 0;
2143 }
2144 info++;
2145 });
2146
2147 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2148
2149 error_exit:
2150 return error;
2151 }
2152
2153 static kern_return_t
stackshot_exclaves_process_textlayout(const stackshottypes_textlayout_s * _Nonnull tl,void * kcdata_ptr,bool want_raw_addresses)2154 stackshot_exclaves_process_textlayout(const stackshottypes_textlayout_s *_Nonnull tl, void *kcdata_ptr, bool want_raw_addresses)
2155 {
2156 kern_return_t error = KERN_SUCCESS;
2157 __block struct exclave_textlayout_info info = { 0 };
2158
2159 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2160 STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, tl->textlayoutid));
2161
2162 // tightbeam optional interfaced don't have enough const.
2163 u32__opt_s sharedcacheindex_opt = tl->sharedcacheindex;
2164 const uint32_t *sharedcache_index = u32__opt_get(&sharedcacheindex_opt);
2165
2166 info.layout_id = tl->textlayoutid;
2167
2168 info.etl_flags =
2169 (want_raw_addresses ? 0 : kExclaveTextLayoutLoadAddressesUnslid) |
2170 (sharedcache_index == NULL ? 0 : kExclaveTextLayoutHasSharedCache);
2171 info.sharedcache_index = (sharedcache_index == NULL) ? UINT32_MAX : *sharedcache_index;
2172
2173 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_INFO, sizeof(struct exclave_textlayout_info), &info));
2174 kcd_exit_on_error(stackshot_exclaves_process_textlayout_segments(tl, kcdata_ptr, want_raw_addresses));
2175 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2176 STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, tl->textlayoutid));
2177 error_exit:
2178 return error;
2179 }
2180
2181 static kern_return_t
stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s * _Nonnull as,void * kcdata_ptr,bool want_raw_addresses)2182 stackshot_exclaves_process_addressspace(const stackshottypes_addressspace_s *_Nonnull as, void *kcdata_ptr, bool want_raw_addresses)
2183 {
2184 kern_return_t error = KERN_SUCCESS;
2185 struct exclave_addressspace_info info = { 0 };
2186 __block size_t name_len = 0;
2187 uint8_t * name = NULL;
2188
2189 u8__v_visit(&as->name, ^(size_t __unused i, const uint8_t __unused item) {
2190 name_len++;
2191 });
2192
2193 info.eas_id = as->asid;
2194
2195 if (want_raw_addresses && as->rawaddressslide.has_value) {
2196 info.eas_flags = kExclaveAddressSpaceHaveSlide;
2197 info.eas_slide = as->rawaddressslide.value;
2198 } else {
2199 info.eas_flags = 0;
2200 info.eas_slide = UINT64_MAX;
2201 }
2202
2203 info.eas_layoutid = as->textlayoutid; // text layout for this address space
2204 info.eas_asroot = as->asroot.has_value ? as->asroot.value : 0;
2205
2206 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
2207 STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2208 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_INFO, sizeof(struct exclave_addressspace_info), &info));
2209
2210 if (name_len > 0) {
2211 kcdata_compression_window_open(kcdata_ptr);
2212 kcd_exit_on_error(kcdata_get_memory_addr(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_NAME, name_len + 1, (mach_vm_address_t*)&name));
2213
2214 u8__v_visit(&as->name, ^(size_t i, const uint8_t item) {
2215 name[i] = item;
2216 });
2217 name[name_len] = 0;
2218
2219 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
2220 }
2221
2222 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
2223 STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
2224 error_exit:
2225 return error;
2226 }
2227
2228 kern_return_t
2229 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses);
2230
2231 kern_return_t
stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s * result,void * kcdata_ptr,bool want_raw_addresses)2232 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr, bool want_raw_addresses)
2233 {
2234 __block kern_return_t kr = KERN_SUCCESS;
2235
2236 stackshot_stackshotentry__v_visit(&result->stackshotentries, ^(size_t __unused i, const stackshot_stackshotentry_s *_Nonnull item) {
2237 if (kr == KERN_SUCCESS) {
2238 kr = stackshot_exclaves_process_stackshotentry(item, kcdata_ptr);
2239 }
2240 });
2241
2242 stackshottypes_addressspace__v_visit(&result->addressspaces, ^(size_t __unused i, const stackshottypes_addressspace_s *_Nonnull item) {
2243 if (kr == KERN_SUCCESS) {
2244 kr = stackshot_exclaves_process_addressspace(item, kcdata_ptr, want_raw_addresses);
2245 }
2246 });
2247
2248 stackshottypes_textlayout__v_visit(&result->textlayouts, ^(size_t __unused i, const stackshottypes_textlayout_s *_Nonnull item) {
2249 if (kr == KERN_SUCCESS) {
2250 kr = stackshot_exclaves_process_textlayout(item, kcdata_ptr, want_raw_addresses);
2251 }
2252 });
2253
2254 return kr;
2255 }
2256
2257 kern_return_t
2258 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses);
2259
2260 kern_return_t
stackshot_exclaves_process_result(kern_return_t collect_kr,const stackshot_stackshotresult_s * result,bool want_raw_addresses)2261 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result, bool want_raw_addresses)
2262 {
2263 kern_return_t kr = KERN_SUCCESS;
2264 if (result == NULL) {
2265 return collect_kr;
2266 }
2267
2268 kr = stackshot_exclaves_process_stackshot(result, stackshot_ctx.sc_finalized_kcdata, want_raw_addresses);
2269
2270 stackshot_exclave_kr = kr;
2271
2272 return kr;
2273 }
2274
2275
2276 static void
commit_exclaves_ast(void)2277 commit_exclaves_ast(void)
2278 {
2279 size_t i = 0;
2280 thread_t thread = NULL;
2281 size_t count;
2282
2283 assert(debug_mode_active());
2284
2285 count = os_atomic_load(&stackshot_exclave_inspect_ctid_count, acquire);
2286
2287 if (stackshot_exclave_inspect_ctids) {
2288 for (i = 0; i < count; ++i) {
2289 thread = ctid_get_thread(stackshot_exclave_inspect_ctids[i]);
2290 assert(thread);
2291 thread_reference(thread);
2292 os_atomic_or(&thread->th_exclaves_inspection_state, TH_EXCLAVES_INSPECTION_STACKSHOT, relaxed);
2293 }
2294 }
2295 }
2296
2297 #endif /* CONFIG_EXCLAVES */
2298
2299 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)2300 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
2301 {
2302 int error = 0;
2303 boolean_t prev_interrupt_state;
2304 bool did_copyout = false;
2305 uint32_t bytes_traced = 0;
2306 uint32_t stackshot_estimate = 0;
2307 struct kdp_snapshot_args snapshot_args;
2308
2309 void * buf_to_free = NULL;
2310 int size_to_free = 0;
2311 bool is_traced = false; /* has FUNC_START tracepoint fired? */
2312 uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
2313
2314 /* Parsed arguments */
2315 uint64_t out_buffer_addr;
2316 uint64_t out_size_addr;
2317 uint32_t size_hint = 0;
2318
2319 snapshot_args.pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
2320
2321 if (stackshot_config == NULL) {
2322 return KERN_INVALID_ARGUMENT;
2323 }
2324 #if DEVELOPMENT || DEBUG
2325 /* TBD: ask stackshot clients to avoid issuing stackshots in this
2326 * configuration in lieu of the kernel feature override.
2327 */
2328 if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
2329 return KERN_NOT_SUPPORTED;
2330 }
2331 #endif
2332
2333 switch (stackshot_config_version) {
2334 case STACKSHOT_CONFIG_TYPE:
2335 if (stackshot_config_size != sizeof(stackshot_config_t)) {
2336 return KERN_INVALID_ARGUMENT;
2337 }
2338 stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
2339 out_buffer_addr = config->sc_out_buffer_addr;
2340 out_size_addr = config->sc_out_size_addr;
2341 snapshot_args.pid = config->sc_pid;
2342 snapshot_args.flags = config->sc_flags;
2343 snapshot_args.since_timestamp = config->sc_delta_timestamp;
2344 if (config->sc_size <= max_tracebuf_size) {
2345 size_hint = config->sc_size;
2346 }
2347 /*
2348 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
2349 * dump every level if the pagetable_mask is not set
2350 */
2351 if (snapshot_args.flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
2352 snapshot_args.pagetable_mask = config->sc_pagetable_mask;
2353 }
2354 break;
2355 default:
2356 return KERN_NOT_SUPPORTED;
2357 }
2358
2359 /*
2360 * Currently saving a kernel buffer and trylock are only supported from the
2361 * internal/KEXT API.
2362 */
2363 if (stackshot_from_user) {
2364 if (snapshot_args.flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
2365 return KERN_NO_ACCESS;
2366 }
2367 #if !DEVELOPMENT && !DEBUG
2368 if (snapshot_args.flags & (STACKSHOT_DO_COMPRESS)) {
2369 return KERN_NO_ACCESS;
2370 }
2371 #endif
2372 } else {
2373 if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2374 return KERN_NOT_SUPPORTED;
2375 }
2376 }
2377
2378 if (!((snapshot_args.flags & STACKSHOT_KCDATA_FORMAT) || (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
2379 return KERN_NOT_SUPPORTED;
2380 }
2381
2382 /* Compresssed delta stackshots or page dumps are not yet supported */
2383 if (((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (snapshot_args.flags & STACKSHOT_PAGE_TABLES))
2384 && (snapshot_args.flags & STACKSHOT_DO_COMPRESS)) {
2385 return KERN_NOT_SUPPORTED;
2386 }
2387
2388 /*
2389 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
2390 */
2391 if ((!out_buffer_addr || !out_size_addr) && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2392 return KERN_INVALID_ARGUMENT;
2393 }
2394
2395 if (snapshot_args.since_timestamp != 0 && ((snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
2396 return KERN_INVALID_ARGUMENT;
2397 }
2398
2399 /* EXCLAVES and SKIP_EXCLAVES conflict */
2400 if ((snapshot_args.flags & (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) == (STACKSHOT_EXCLAVES | STACKSHOT_SKIP_EXCLAVES)) {
2401 return KERN_INVALID_ARGUMENT;
2402 }
2403
2404 #if CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS
2405 if (!mt_core_supported) {
2406 snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2407 }
2408 #else /* CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS */
2409 snapshot_args.flags &= ~STACKSHOT_INSTRS_CYCLES;
2410 #endif /* !CONFIG_PERVASIVE_CPI || !CONFIG_CPU_COUNTERS */
2411
2412 STACKSHOT_TESTPOINT(TP_WAIT_START_STACKSHOT);
2413 STACKSHOT_SUBSYS_LOCK();
2414
2415 stackshot_tries = 0;
2416
2417 if (snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
2418 /*
2419 * Don't overwrite an existing stackshot
2420 */
2421 if (kernel_stackshot_buf != NULL) {
2422 error = KERN_MEMORY_PRESENT;
2423 goto error_early_exit;
2424 }
2425 } else if (snapshot_args.flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
2426 if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
2427 error = KERN_NOT_IN_SET;
2428 goto error_early_exit;
2429 }
2430 error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
2431 out_buffer_addr, out_size_addr);
2432 /*
2433 * If we successfully remapped the buffer into the user's address space, we
2434 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
2435 * and then clear the kernel stackshot pointer and associated size.
2436 */
2437 if (error == KERN_SUCCESS) {
2438 did_copyout = true;
2439 buf_to_free = kernel_stackshot_buf;
2440 size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
2441 kernel_stackshot_buf = NULL;
2442 kernel_stackshot_buf_size = 0;
2443 }
2444
2445 goto error_early_exit;
2446 }
2447
2448 if (snapshot_args.flags & STACKSHOT_GET_BOOT_PROFILE) {
2449 void *bootprofile = NULL;
2450 uint32_t len = 0;
2451 #if CONFIG_TELEMETRY
2452 bootprofile_get(&bootprofile, &len);
2453 #endif
2454 if (!bootprofile || !len) {
2455 error = KERN_NOT_IN_SET;
2456 goto error_early_exit;
2457 }
2458 error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
2459 if (error == KERN_SUCCESS) {
2460 did_copyout = true;
2461 }
2462 goto error_early_exit;
2463 }
2464
2465 stackshot_duration_prior_abs = 0;
2466 stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
2467 snapshot_args.buffer_size = stackshot_estimate =
2468 get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj, snapshot_args.flags, snapshot_args.pid);
2469 stackshot_initial_estimate = stackshot_estimate;
2470
2471 // ensure at least one attempt, even if the initial size from estimate was too big
2472 snapshot_args.buffer_size = MIN(snapshot_args.buffer_size, max_tracebuf_size);
2473
2474 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
2475 snapshot_args.flags, snapshot_args.buffer_size, snapshot_args.pid, snapshot_args.since_timestamp);
2476 is_traced = true;
2477
2478 #if CONFIG_EXCLAVES
2479 assert(!stackshot_exclave_inspect_ctids);
2480 #endif
2481
2482 for (; snapshot_args.buffer_size <= max_tracebuf_size; snapshot_args.buffer_size = MIN(snapshot_args.buffer_size << 1, max_tracebuf_size)) {
2483 stackshot_tries++;
2484 if ((error = kmem_alloc(kernel_map, (vm_offset_t *)&snapshot_args.buffer, snapshot_args.buffer_size,
2485 KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG)) != KERN_SUCCESS) {
2486 os_log_error(OS_LOG_DEFAULT, "stackshot: initial allocation failed: %d, allocating %u bytes of %u max, try %llu\n", (int)error, snapshot_args.buffer_size, max_tracebuf_size, stackshot_tries);
2487 error = KERN_RESOURCE_SHORTAGE;
2488 goto error_exit;
2489 }
2490
2491 uint32_t hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2492 : (snapshot_args.flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2493 : KCDATA_BUFFER_BEGIN_STACKSHOT;
2494 #pragma unused(hdr_tag)
2495
2496 stackshot_duration_outer = NULL;
2497
2498 /* if compression was requested, allocate the extra zlib scratch area */
2499 if (snapshot_args.flags & STACKSHOT_DO_COMPRESS) {
2500 hdr_tag = (snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2501 : KCDATA_BUFFER_BEGIN_STACKSHOT;
2502 if (error != KERN_SUCCESS) {
2503 os_log_error(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
2504 (int) error);
2505 goto error_exit;
2506 }
2507 }
2508
2509 /* Prepare the compressor for a stackshot */
2510 error = vm_compressor_kdp_init();
2511 if (error != KERN_SUCCESS) {
2512 goto error_exit;
2513 }
2514
2515 /*
2516 * Disable interrupts and save the current interrupt state.
2517 */
2518 prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
2519 uint64_t time_start = mach_absolute_time();
2520
2521 /* Emit a SOCD tracepoint that we are initiating a stackshot */
2522 SOCD_TRACE_XNU_START(STACKSHOT);
2523
2524 /*
2525 * Load stackshot parameters.
2526 */
2527 error = kdp_snapshot_preflight_internal(snapshot_args);
2528
2529 if (error == KERN_SUCCESS) {
2530 error = stackshot_trap();
2531 }
2532
2533 /* Emit a SOCD tracepoint that we have completed the stackshot */
2534 SOCD_TRACE_XNU_END(STACKSHOT);
2535 ml_set_interrupts_enabled(prev_interrupt_state);
2536
2537 #if CONFIG_EXCLAVES
2538 /* stackshot trap should only finish successfully or with no pending Exclave threads */
2539 assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
2540 #endif
2541
2542 /*
2543 * Stackshot is no longer active.
2544 * (We have to do this here for the special interrupt disable timeout case to work)
2545 */
2546 os_atomic_store(&stackshot_ctx.sc_state, SS_INACTIVE, release);
2547
2548 /* Release compressor kdp buffers */
2549 vm_compressor_kdp_teardown();
2550
2551 /* Record duration that interrupts were disabled */
2552 uint64_t time_end = mach_absolute_time();
2553 tot_interrupts_off_abs += (time_end - time_start);
2554
2555 /* Collect multithreaded kcdata into one finalized buffer */
2556 if (error == KERN_SUCCESS && !stackshot_ctx.sc_is_singlethreaded) {
2557 error = stackshot_collect_kcdata();
2558 }
2559
2560 #if CONFIG_EXCLAVES
2561 if (error == KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
2562 if (stackshot_exclave_inspect_ctid_count > 0) {
2563 STACKSHOT_TESTPOINT(TP_START_COLLECTION);
2564 }
2565 error = collect_exclave_threads(snapshot_args.flags);
2566 }
2567 #endif /* CONFIG_EXCLAVES */
2568
2569 if (error == KERN_SUCCESS) {
2570 if (stackshot_ctx.sc_is_singlethreaded) {
2571 error = stackshot_finalize_singlethreaded_kcdata();
2572 } else {
2573 error = stackshot_finalize_kcdata();
2574 }
2575
2576 if ((error != KERN_SUCCESS) && (error != KERN_INSUFFICIENT_BUFFER_SIZE)) {
2577 goto error_exit;
2578 }
2579 if (error == KERN_INSUFFICIENT_BUFFER_SIZE && snapshot_args.buffer_size == max_tracebuf_size) {
2580 os_log_error(OS_LOG_DEFAULT, "stackshot: final buffer size was insufficient at maximum size\n");
2581 error = KERN_RESOURCE_SHORTAGE;
2582 goto error_exit;
2583 }
2584 }
2585
2586 /* record the duration that interupts were disabled + kcdata was being finalized */
2587 if (stackshot_duration_outer) {
2588 *stackshot_duration_outer = mach_absolute_time() - time_start;
2589 }
2590
2591 if (error != KERN_SUCCESS) {
2592 os_log_error(OS_LOG_DEFAULT, "stackshot: debugger call failed: %d, try %llu, buffer %u estimate %u\n", (int)error, stackshot_tries, snapshot_args.buffer_size, stackshot_estimate);
2593 kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2594 snapshot_args.buffer = NULL;
2595 if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
2596 /*
2597 * If we didn't allocate a big enough buffer, deallocate and try again.
2598 */
2599 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
2600 time_end - time_start, stackshot_estimate, snapshot_args.buffer_size);
2601 stackshot_duration_prior_abs += (time_end - time_start);
2602 if (snapshot_args.buffer_size == max_tracebuf_size) {
2603 os_log_error(OS_LOG_DEFAULT, "stackshot: initial buffer size was insufficient at maximum size\n");
2604 error = KERN_RESOURCE_SHORTAGE;
2605 goto error_exit;
2606 }
2607 continue;
2608 } else {
2609 goto error_exit;
2610 }
2611 }
2612
2613 bytes_traced = kdp_stack_snapshot_bytes_traced();
2614 if (bytes_traced <= 0) {
2615 error = KERN_ABORTED;
2616 goto error_exit;
2617 }
2618
2619 if (!(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
2620 error = stackshot_remap_buffer(snapshot_args.buffer, bytes_traced, out_buffer_addr, out_size_addr);
2621 if (error == KERN_SUCCESS) {
2622 did_copyout = true;
2623 }
2624 goto error_exit;
2625 }
2626
2627 if (!(snapshot_args.flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT)) {
2628 os_log_info(OS_LOG_DEFAULT, "stackshot: succeeded, traced %u bytes to %u buffer (estimate %u) try %llu\n", bytes_traced, snapshot_args.buffer_size, stackshot_estimate, stackshot_tries);
2629 }
2630
2631 /*
2632 * Save the stackshot in the kernel buffer.
2633 */
2634 kernel_stackshot_buf = snapshot_args.buffer;
2635 kernel_stackshot_buf_size = bytes_traced;
2636 /*
2637 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
2638 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
2639 * update size_to_free for kmem_free accordingly.
2640 */
2641 size_to_free = snapshot_args.buffer_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
2642
2643 assert(size_to_free >= 0);
2644
2645 if (size_to_free != 0) {
2646 buf_to_free = (void *)((uint64_t)snapshot_args.buffer + snapshot_args.buffer_size - size_to_free);
2647 }
2648
2649 snapshot_args.buffer = NULL;
2650 snapshot_args.buffer_size = 0;
2651 goto error_exit;
2652 }
2653
2654 error_exit:
2655 if (is_traced) {
2656 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
2657 error, tot_interrupts_off_abs, snapshot_args.buffer_size, bytes_traced);
2658 }
2659
2660 error_early_exit:
2661 if (snapshot_args.buffer != NULL) {
2662 kmem_free(kernel_map, (vm_offset_t)snapshot_args.buffer, snapshot_args.buffer_size);
2663 }
2664 if (buf_to_free != NULL) {
2665 kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
2666 }
2667
2668 if (error == KERN_SUCCESS && !(snapshot_args.flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) && !did_copyout) {
2669 /* If we return success, we must have done the copyout to userspace. If
2670 * we somehow did not, we need to indicate failure instead.
2671 */
2672 #if DEVELOPMENT || DEBUG
2673 os_log_error(OS_LOG_DEFAULT, "stackshot: reached end without doing copyout\n");
2674 #endif // DEVELOPMENT || DEBUG
2675 error = KERN_FAILURE;
2676 }
2677
2678 STACKSHOT_SUBSYS_UNLOCK();
2679 STACKSHOT_TESTPOINT(TP_STACKSHOT_DONE);
2680
2681 return error;
2682 }
2683
2684 /*
2685 * Set up state and parameters for a stackshot.
2686 * (This runs on the calling CPU before other CPUs enter the debugger trap.)
2687 * Called when interrupts are disabled, but we're not in the debugger trap yet.
2688 */
2689 __result_use_check
2690 static kern_return_t
kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)2691 kdp_snapshot_preflight_internal(struct kdp_snapshot_args args)
2692 {
2693 kern_return_t error = KERN_SUCCESS;
2694 uint64_t microsecs = 0, secs = 0;
2695 bool is_panic = ((args.flags & STACKSHOT_FROM_PANIC) != 0);
2696 bool process_scoped = (args.pid != -1) &&
2697 ((args.flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
2698 bool is_singlethreaded = stackshot_single_thread || (process_scoped || is_panic || ((args.flags & STACKSHOT_PAGE_TABLES) != 0));
2699 clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)µsecs);
2700
2701 cur_stackshot_ctx_idx = (is_panic ? STACKSHOT_CTX_IDX_PANIC : STACKSHOT_CTX_IDX_NORMAL);
2702
2703 /* Setup overall state */
2704 stackshot_ctx = (struct stackshot_context) {
2705 .sc_args = args,
2706 .sc_state = SS_SETUP,
2707 .sc_bytes_traced = 0,
2708 .sc_bytes_uncompressed = 0,
2709 .sc_microsecs = microsecs + (secs * USEC_PER_SEC),
2710 .sc_panic_stackshot = is_panic,
2711 .sc_is_singlethreaded = is_singlethreaded,
2712 .sc_cpus_working = 0,
2713 .sc_retval = 0,
2714 .sc_calling_cpuid = cpu_number(),
2715 .sc_main_cpuid = is_singlethreaded ? cpu_number() : -1,
2716 .sc_min_kcdata_size = get_stackshot_est_tasksize(args.flags),
2717 .sc_enable_faulting = false,
2718 };
2719
2720 if (!stackshot_ctx.sc_panic_stackshot) {
2721 #if defined(__AMP__)
2722 /* On AMP systems, we want to split the buffers up by cluster to avoid cache line effects. */
2723 stackshot_ctx.sc_num_buffers = is_singlethreaded ? 1 : ml_get_cluster_count();
2724 #else /* __AMP__ */
2725 stackshot_ctx.sc_num_buffers = 1;
2726 #endif /* !__AMP__ */
2727 size_t bufsz = args.buffer_size / stackshot_ctx.sc_num_buffers;
2728 for (int buf_idx = 0; buf_idx < stackshot_ctx.sc_num_buffers; buf_idx++) {
2729 stackshot_ctx.sc_buffers[buf_idx] = (struct stackshot_buffer) {
2730 .ssb_ptr = (void*) ((mach_vm_address_t) args.buffer + (bufsz * buf_idx)),
2731 .ssb_size = bufsz,
2732 .ssb_used = 0,
2733 .ssb_freelist = NULL,
2734 .ssb_freelist_lock = 0,
2735 .ssb_overhead = 0
2736 };
2737 }
2738
2739 /* Setup per-cpu state */
2740 percpu_foreach_base(base) {
2741 *PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu) = (struct stackshot_cpu_context) { 0 };
2742 }
2743
2744 if (is_singlethreaded) {
2745 /* If the stackshot is singlethreaded, set up the kcdata - we don't bother with linked-list kcdata in singlethreaded mode. */
2746 uint32_t hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2747 : (stackshot_flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
2748 : KCDATA_BUFFER_BEGIN_STACKSHOT;
2749 kcdata_memory_static_init(stackshot_kcdata_p, (mach_vm_address_t) stackshot_args.buffer, hdr_tag,
2750 stackshot_args.buffer_size, KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
2751 if (stackshot_flags & STACKSHOT_DO_COMPRESS) {
2752 hdr_tag = (stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
2753 : KCDATA_BUFFER_BEGIN_STACKSHOT;
2754 kcd_exit_on_error(kcdata_init_compress(stackshot_kcdata_p, hdr_tag, kdp_memcpy, KCDCT_ZLIB));
2755 }
2756 stackshot_cpu_ctx.scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES);
2757 }
2758 } else {
2759 /*
2760 * If this is a panic stackshot, we need to handle things differently.
2761 * The panic code hands us a kcdata descriptor to work with instead of
2762 * us making one ourselves.
2763 */
2764 *stackshot_kcdata_p = *stackshot_args.descriptor;
2765 stackshot_cpu_ctx = (struct stackshot_cpu_context) {
2766 .scc_can_work = true,
2767 .scc_stack_buffer = kcdata_endalloc(stackshot_kcdata_p, sizeof(uintptr_t) * MAX_FRAMES)
2768 };
2769 #if STACKSHOT_COLLECTS_LATENCY_INFO
2770 *(PERCPU_GET(stackshot_trace_buffer)) = (struct stackshot_trace_buffer) {};
2771 #endif
2772 }
2773
2774 /* Set up our cpu state */
2775 stackshot_cpu_preflight();
2776
2777 error_exit:
2778 return error;
2779 }
2780
2781 /*
2782 * The old function signature for kdp_snapshot_preflight, used in the panic path.
2783 * Called when interrupts are disabled, but we're not in the debugger trap yet.
2784 */
2785 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)2786 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
2787 kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
2788 {
2789 __assert_only kern_return_t err;
2790 err = kdp_snapshot_preflight_internal((struct kdp_snapshot_args) {
2791 .pid = pid,
2792 .buffer = tracebuf,
2793 .buffer_size = tracebuf_size,
2794 .flags = flags,
2795 .descriptor = data_p,
2796 .since_timestamp = since_timestamp,
2797 .pagetable_mask = pagetable_mask
2798 });
2799
2800
2801 /* This shouldn't ever return an error in the panic path. */
2802 assert(err == KERN_SUCCESS);
2803 }
2804
2805 static void
stackshot_reset_state(void)2806 stackshot_reset_state(void)
2807 {
2808 stackshot_ctx = (struct stackshot_context) { 0 };
2809 }
2810
2811 void
panic_stackshot_reset_state(void)2812 panic_stackshot_reset_state(void)
2813 {
2814 stackshot_reset_state();
2815 }
2816
2817 boolean_t
stackshot_active(void)2818 stackshot_active(void)
2819 {
2820 return os_atomic_load(&stackshot_ctx.sc_state, relaxed) != SS_INACTIVE;
2821 }
2822
2823 boolean_t
panic_stackshot_active(void)2824 panic_stackshot_active(void)
2825 {
2826 return os_atomic_load(&stackshot_contexts[STACKSHOT_CTX_IDX_PANIC].sc_state, relaxed) != SS_INACTIVE;
2827 }
2828
2829 uint32_t
kdp_stack_snapshot_bytes_traced(void)2830 kdp_stack_snapshot_bytes_traced(void)
2831 {
2832 return stackshot_ctx.sc_bytes_traced;
2833 }
2834
2835 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)2836 kdp_stack_snapshot_bytes_uncompressed(void)
2837 {
2838 return stackshot_ctx.sc_bytes_uncompressed;
2839 }
2840
2841 static boolean_t
memory_iszero(void * addr,size_t size)2842 memory_iszero(void *addr, size_t size)
2843 {
2844 char *data = (char *)addr;
2845 for (size_t i = 0; i < size; i++) {
2846 if (data[i] != 0) {
2847 return FALSE;
2848 }
2849 }
2850 return TRUE;
2851 }
2852
2853 static void
_stackshot_validation_reset(void)2854 _stackshot_validation_reset(void)
2855 {
2856 percpu_foreach_base(base) {
2857 struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2858 cpu_ctx->scc_validation_state.last_valid_page_kva = -1;
2859 cpu_ctx->scc_validation_state.last_valid_size = 0;
2860 }
2861 }
2862
2863 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)2864 _stackshot_validate_kva(vm_offset_t addr, size_t size)
2865 {
2866 vm_offset_t page_addr = atop_kernel(addr);
2867 if (stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva == page_addr &&
2868 stackshot_cpu_ctx.scc_validation_state.last_valid_size <= size) {
2869 return true;
2870 }
2871
2872 if (ml_validate_nofault(addr, size)) {
2873 stackshot_cpu_ctx.scc_validation_state.last_valid_page_kva = page_addr;
2874 stackshot_cpu_ctx.scc_validation_state.last_valid_size = size;
2875 return true;
2876 }
2877 return false;
2878 }
2879
2880 static long
_stackshot_strlen(const char * s,size_t maxlen)2881 _stackshot_strlen(const char *s, size_t maxlen)
2882 {
2883 size_t len = 0;
2884 for (len = 0; _stackshot_validate_kva((vm_offset_t)s, 1); len++, s++) {
2885 if (*s == 0) {
2886 return len;
2887 }
2888 if (len >= maxlen) {
2889 return -1;
2890 }
2891 }
2892 return -1; /* failed before end of string */
2893 }
2894
2895
2896 static size_t
stackshot_plh_est_size(void)2897 stackshot_plh_est_size(void)
2898 {
2899 struct port_label_hash *plh = &stackshot_ctx.sc_plh;
2900 size_t size = STASKSHOT_PLH_SIZE(stackshot_port_label_size);
2901
2902 if (size == 0) {
2903 return 0;
2904 }
2905 #define SIZE_EST(x) ROUNDUP((x), sizeof (uintptr_t))
2906 return SIZE_EST(size * sizeof(*plh->plh_array)) +
2907 SIZE_EST(size * sizeof(*plh->plh_chains)) +
2908 SIZE_EST(size * sizeof(*stackshot_cpu_ctx.scc_plh_gen.pgs_gen) * real_ncpus) +
2909 SIZE_EST((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh->plh_hash));
2910 #undef SIZE_EST
2911 }
2912
2913 static void
stackshot_plh_reset(void)2914 stackshot_plh_reset(void)
2915 {
2916 stackshot_ctx.sc_plh = (struct port_label_hash){.plh_size = 0}; /* structure assignment */
2917 }
2918
2919 static kern_return_t
stackshot_plh_setup(void)2920 stackshot_plh_setup(void)
2921 {
2922 kern_return_t error;
2923 size_t size;
2924 bool percpu_alloc_failed = false;
2925 struct port_label_hash plh = {
2926 .plh_size = STASKSHOT_PLH_SIZE(stackshot_port_label_size),
2927 .plh_count = 0,
2928 };
2929
2930 stackshot_plh_reset();
2931
2932 percpu_foreach_base(base) {
2933 struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2934 cpu_ctx->scc_plh_gen = (struct _stackshot_plh_gen_state){
2935 .pgs_gen = NULL,
2936 .pgs_curgen = 1,
2937 .pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX,
2938 .pgs_curgen_max = 0,
2939 };
2940 }
2941
2942 size = plh.plh_size;
2943 if (size == 0) {
2944 return KERN_SUCCESS;
2945 }
2946 plh.plh_array = stackshot_alloc_with_size(size * sizeof(*plh.plh_array), &error);
2947 plh.plh_chains = stackshot_alloc_with_size(size * sizeof(*plh.plh_chains), &error);
2948 percpu_foreach_base(base) {
2949 struct stackshot_cpu_context *cpu_ctx = PERCPU_GET_WITH_BASE(base, stackshot_cpu_ctx_percpu);
2950 cpu_ctx->scc_plh_gen.pgs_gen = stackshot_alloc_with_size(size * sizeof(*cpu_ctx->scc_plh_gen.pgs_gen), &error);
2951 if (cpu_ctx->scc_plh_gen.pgs_gen == NULL) {
2952 percpu_alloc_failed = true;
2953 break;
2954 }
2955 for (int x = 0; x < size; x++) {
2956 cpu_ctx->scc_plh_gen.pgs_gen[x] = 0;
2957 }
2958 }
2959 plh.plh_hash = stackshot_alloc_with_size((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh.plh_hash), &error);
2960 if (error != KERN_SUCCESS) {
2961 return error;
2962 }
2963 if (plh.plh_array == NULL || plh.plh_chains == NULL || percpu_alloc_failed || plh.plh_hash == NULL) {
2964 PLH_STAT_OP(os_atomic_inc(&stackshot_ctx.sc_plh.plh_bad, relaxed));
2965 return KERN_SUCCESS;
2966 }
2967 for (int x = 0; x < size; x++) {
2968 plh.plh_array[x] = NULL;
2969 plh.plh_chains[x] = -1;
2970 }
2971 for (int x = 0; x < (1ul << STACKSHOT_PLH_SHIFT); x++) {
2972 plh.plh_hash[x] = -1;
2973 }
2974 stackshot_ctx.sc_plh = plh; /* structure assignment */
2975 return KERN_SUCCESS;
2976 }
2977
2978 static int16_t
stackshot_plh_hash(struct ipc_service_port_label * ispl)2979 stackshot_plh_hash(struct ipc_service_port_label *ispl)
2980 {
2981 uintptr_t ptr = (uintptr_t)ispl;
2982 static_assert(STACKSHOT_PLH_SHIFT < 16, "plh_hash must fit in 15 bits");
2983 #define PLH_HASH_STEP(ptr, x) \
2984 ((((x) * STACKSHOT_PLH_SHIFT) < (sizeof(ispl) * CHAR_BIT)) ? ((ptr) >> ((x) * STACKSHOT_PLH_SHIFT)) : 0)
2985 ptr ^= PLH_HASH_STEP(ptr, 16);
2986 ptr ^= PLH_HASH_STEP(ptr, 8);
2987 ptr ^= PLH_HASH_STEP(ptr, 4);
2988 ptr ^= PLH_HASH_STEP(ptr, 2);
2989 ptr ^= PLH_HASH_STEP(ptr, 1);
2990 #undef PLH_HASH_STEP
2991 return (int16_t)(ptr & ((1ul << STACKSHOT_PLH_SHIFT) - 1));
2992 }
2993
2994 enum stackshot_plh_lookup_type {
2995 STACKSHOT_PLH_LOOKUP_UNKNOWN,
2996 STACKSHOT_PLH_LOOKUP_SEND,
2997 STACKSHOT_PLH_LOOKUP_RECEIVE,
2998 };
2999
3000 static void
stackshot_plh_resetgen(void)3001 stackshot_plh_resetgen(void)
3002 {
3003 struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3004 uint16_t plh_size = stackshot_ctx.sc_plh.plh_size;
3005
3006 if (pgs->pgs_curgen_min == STACKSHOT_PLH_SIZE_MAX && pgs->pgs_curgen_max == 0) {
3007 return; // no lookups, nothing using the current generation
3008 }
3009 pgs->pgs_curgen++;
3010 pgs->pgs_curgen_min = STACKSHOT_PLH_SIZE_MAX;
3011 pgs->pgs_curgen_max = 0;
3012 if (pgs->pgs_curgen == 0) { // wrapped, zero the array and increment the generation
3013 for (int x = 0; x < plh_size; x++) {
3014 pgs->pgs_gen[x] = 0;
3015 }
3016 pgs->pgs_curgen = 1;
3017 }
3018 }
3019
3020 static int16_t
stackshot_plh_lookup_locked(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3021 stackshot_plh_lookup_locked(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3022 {
3023 struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3024 int depth;
3025 int16_t cur;
3026 if (ispl == NULL) {
3027 return STACKSHOT_PORTLABELID_NONE;
3028 }
3029 switch (type) {
3030 case STACKSHOT_PLH_LOOKUP_SEND:
3031 PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_send, relaxed));
3032 break;
3033 case STACKSHOT_PLH_LOOKUP_RECEIVE:
3034 PLH_STAT_OP(os_atomic_inc(&plh->plh_lookup_receive, relaxed));
3035 break;
3036 default:
3037 break;
3038 }
3039 PLH_STAT_OP(os_atomic_inc(&plh->plh_lookups, relaxed));
3040 if (plh->plh_size == 0) {
3041 return STACKSHOT_PORTLABELID_MISSING;
3042 }
3043 int16_t hash = stackshot_plh_hash(ispl);
3044 assert(hash >= 0 && hash < (1ul << STACKSHOT_PLH_SHIFT));
3045 depth = 0;
3046 for (cur = plh->plh_hash[hash]; cur >= 0; cur = plh->plh_chains[cur]) {
3047 /* cur must be in-range, and chain depth can never be above our # allocated */
3048 if (cur >= plh->plh_count || depth > plh->plh_count || depth > plh->plh_size) {
3049 PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3050 PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3051 return STACKSHOT_PORTLABELID_MISSING;
3052 }
3053 assert(cur < plh->plh_count);
3054 if (plh->plh_array[cur] == ispl) {
3055 PLH_STAT_OP(os_atomic_inc(&plh->plh_found, relaxed));
3056 PLH_STAT_OP(os_atomic_add(&plh->plh_found_depth, depth, relaxed));
3057 goto found;
3058 }
3059 depth++;
3060 }
3061 /* not found in hash table, so alloc and insert it */
3062 if (cur != -1) {
3063 PLH_STAT_OP(os_atomic_inc(&plh->plh_bad, relaxed));
3064 PLH_STAT_OP(os_atomic_add(&plh->plh_bad_depth, depth, relaxed));
3065 return STACKSHOT_PORTLABELID_MISSING; /* bad end of chain */
3066 }
3067 PLH_STAT_OP(os_atomic_inc(&plh->plh_insert, relaxed));
3068 PLH_STAT_OP(os_atomic_add(&plh->plh_insert_depth, depth, relaxed));
3069 if (plh->plh_count >= plh->plh_size) {
3070 return STACKSHOT_PORTLABELID_MISSING; /* no space */
3071 }
3072 cur = plh->plh_count;
3073 plh->plh_count++;
3074 plh->plh_array[cur] = ispl;
3075 plh->plh_chains[cur] = plh->plh_hash[hash];
3076 plh->plh_hash[hash] = cur;
3077 found: ;
3078 struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3079 pgs->pgs_gen[cur] = pgs->pgs_curgen;
3080 if (pgs->pgs_curgen_min > cur) {
3081 pgs->pgs_curgen_min = cur;
3082 }
3083 if (pgs->pgs_curgen_max < cur) {
3084 pgs->pgs_curgen_max = cur;
3085 }
3086 return cur + 1; /* offset to avoid 0 */
3087 }
3088
3089 static kern_return_t
kdp_stackshot_plh_record_locked(void)3090 kdp_stackshot_plh_record_locked(void)
3091 {
3092 kern_return_t error = KERN_SUCCESS;
3093 struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3094 struct _stackshot_plh_gen_state *pgs = &stackshot_cpu_ctx.scc_plh_gen;
3095 uint16_t count = plh->plh_count;
3096 uint8_t curgen = pgs->pgs_curgen;
3097 int16_t curgen_min = pgs->pgs_curgen_min;
3098 int16_t curgen_max = pgs->pgs_curgen_max;
3099 if (curgen_min <= curgen_max && curgen_max < count &&
3100 count <= plh->plh_size && plh->plh_size <= STACKSHOT_PLH_SIZE_MAX) {
3101 struct ipc_service_port_label **arr = plh->plh_array;
3102 size_t ispl_size, max_namelen;
3103 kdp_ipc_splabel_size(&ispl_size, &max_namelen);
3104 for (int idx = curgen_min; idx <= curgen_max; idx++) {
3105 struct ipc_service_port_label *ispl = arr[idx];
3106 struct portlabel_info spl = {
3107 .portlabel_id = (idx + 1),
3108 };
3109 const char *name = NULL;
3110 long name_sz = 0;
3111 if (pgs->pgs_gen[idx] != curgen) {
3112 continue;
3113 }
3114 if (_stackshot_validate_kva((vm_offset_t)ispl, ispl_size)) {
3115 kdp_ipc_fill_splabel(ispl, &spl, &name);
3116 #if STACKSHOT_COLLECTS_RDAR_126582377_DATA
3117 } else {
3118 if (ispl != NULL && (vm_offset_t)ispl >> 48 == 0x0000) {
3119 ca_event_t event_to_send = os_atomic_xchg(&rdar_126582377_event, NULL, relaxed);
3120 if (event_to_send) {
3121 CA_EVENT_SEND(event_to_send);
3122 }
3123 }
3124 #endif
3125 }
3126
3127 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3128 STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3129 if (name != NULL && (name_sz = _stackshot_strlen(name, max_namelen)) > 0) { /* validates the kva */
3130 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL_NAME, name_sz + 1, name));
3131 } else {
3132 spl.portlabel_flags |= STACKSHOT_PORTLABEL_READFAILED;
3133 }
3134 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL, sizeof(spl), &spl));
3135 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3136 STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
3137 }
3138 }
3139
3140 error_exit:
3141 return error;
3142 }
3143
3144 // record any PLH referenced since the last stackshot_plh_resetgen() call
3145 static kern_return_t
kdp_stackshot_plh_record(void)3146 kdp_stackshot_plh_record(void)
3147 {
3148 kern_return_t error;
3149 plh_lock(&stackshot_ctx.sc_plh);
3150 error = kdp_stackshot_plh_record_locked();
3151 plh_unlock(&stackshot_ctx.sc_plh);
3152 return error;
3153 }
3154
3155 static int16_t
stackshot_plh_lookup(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)3156 stackshot_plh_lookup(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
3157 {
3158 int16_t result;
3159 plh_lock(&stackshot_ctx.sc_plh);
3160 result = stackshot_plh_lookup_locked(ispl, type);
3161 plh_unlock(&stackshot_ctx.sc_plh);
3162 return result;
3163 }
3164
3165 #if DEVELOPMENT || DEBUG
3166 static kern_return_t
kdp_stackshot_plh_stats(void)3167 kdp_stackshot_plh_stats(void)
3168 {
3169 kern_return_t error = KERN_SUCCESS;
3170 struct port_label_hash *plh = &stackshot_ctx.sc_plh;
3171
3172 #define PLH_STAT(x) do { if (os_atomic_load(&plh->x, relaxed) != 0) { \
3173 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, os_atomic_load(&plh->x, relaxed), "stackshot_" #x)); \
3174 } } while (0)
3175 PLH_STAT(plh_size);
3176 PLH_STAT(plh_lookups);
3177 PLH_STAT(plh_found);
3178 PLH_STAT(plh_found_depth);
3179 PLH_STAT(plh_insert);
3180 PLH_STAT(plh_insert_depth);
3181 PLH_STAT(plh_bad);
3182 PLH_STAT(plh_bad_depth);
3183 PLH_STAT(plh_lookup_send);
3184 PLH_STAT(plh_lookup_receive);
3185 #undef PLH_STAT
3186
3187 error_exit:
3188 return error;
3189 }
3190 #endif /* DEVELOPMENT || DEBUG */
3191
3192 static uint64_t
kcdata_get_task_ss_flags(task_t task)3193 kcdata_get_task_ss_flags(task_t task)
3194 {
3195 uint64_t ss_flags = 0;
3196 boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3197 void *bsd_info = get_bsdtask_info(task);
3198
3199 if (task_64bit_addr) {
3200 ss_flags |= kUser64_p;
3201 }
3202 if (!task->active || task_is_a_corpse(task) || proc_exiting(bsd_info)) {
3203 ss_flags |= kTerminatedSnapshot;
3204 }
3205 if (task->pidsuspended) {
3206 ss_flags |= kPidSuspended;
3207 }
3208 if (task->frozen) {
3209 ss_flags |= kFrozen;
3210 }
3211 if (task->effective_policy.tep_darwinbg == 1) {
3212 ss_flags |= kTaskDarwinBG;
3213 }
3214 if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
3215 ss_flags |= kTaskIsForeground;
3216 }
3217 if (task->requested_policy.trp_boosted == 1) {
3218 ss_flags |= kTaskIsBoosted;
3219 }
3220 if (task->effective_policy.tep_sup_active == 1) {
3221 ss_flags |= kTaskIsSuppressed;
3222 }
3223 #if CONFIG_MEMORYSTATUS
3224
3225 boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
3226 memorystatus_proc_flags_unsafe(bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
3227 if (dirty) {
3228 ss_flags |= kTaskIsDirty;
3229 }
3230 if (dirty_tracked) {
3231 ss_flags |= kTaskIsDirtyTracked;
3232 }
3233 if (allow_idle_exit) {
3234 ss_flags |= kTaskAllowIdleExit;
3235 }
3236
3237 #endif
3238 if (task->effective_policy.tep_tal_engaged) {
3239 ss_flags |= kTaskTALEngaged;
3240 }
3241
3242 ss_flags |= workqueue_get_task_ss_flags_from_pwq_state_kdp(bsd_info);
3243
3244 #if IMPORTANCE_INHERITANCE
3245 if (task->task_imp_base) {
3246 if (task->task_imp_base->iit_donor) {
3247 ss_flags |= kTaskIsImpDonor;
3248 }
3249 if (task->task_imp_base->iit_live_donor) {
3250 ss_flags |= kTaskIsLiveImpDonor;
3251 }
3252 }
3253 #endif
3254 return ss_flags;
3255 }
3256
3257 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)3258 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
3259 {
3260 kern_return_t error = KERN_SUCCESS;
3261
3262 uint64_t shared_cache_slide = 0;
3263 uint64_t shared_cache_first_mapping = 0;
3264 uint32_t kdp_fault_results = 0;
3265 uint32_t shared_cache_id = 0;
3266 struct dyld_shared_cache_loadinfo shared_cache_data = {0};
3267
3268
3269 assert(task_snap_ss_flags != NULL);
3270
3271 /* Get basic info about the shared region pointer, regardless of any failures */
3272 if (task->shared_region == NULL) {
3273 *task_snap_ss_flags |= kTaskSharedRegionNone;
3274 } else if (task->shared_region == primary_system_shared_region) {
3275 *task_snap_ss_flags |= kTaskSharedRegionSystem;
3276 } else {
3277 *task_snap_ss_flags |= kTaskSharedRegionOther;
3278 }
3279
3280 if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
3281 struct vm_shared_region *sr = task->shared_region;
3282 shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
3283
3284 shared_cache_id = sr->sr_id;
3285 } else {
3286 *task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
3287 goto error_exit;
3288 }
3289
3290 /* We haven't copied in the shared region UUID yet as part of setup */
3291 if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
3292 goto error_exit;
3293 }
3294
3295
3296 /*
3297 * No refcounting here, but we are in debugger context, so that should be safe.
3298 */
3299 shared_cache_slide = task->shared_region->sr_slide;
3300
3301 if (task->shared_region == primary_system_shared_region) {
3302 /* skip adding shared cache info -- it's the same as the system level one */
3303 goto error_exit;
3304 }
3305 /*
3306 * New-style shared cache reference: for non-primary shared regions,
3307 * just include the ID of the shared cache we're attached to. Consumers
3308 * should use the following info from the task's ts_ss_flags as well:
3309 *
3310 * kTaskSharedRegionNone - task is not attached to a shared region
3311 * kTaskSharedRegionSystem - task is attached to the shared region
3312 * with kSharedCacheSystemPrimary set in sharedCacheFlags.
3313 * kTaskSharedRegionOther - task is attached to the shared region with
3314 * sharedCacheID matching the STACKSHOT_KCTYPE_SHAREDCACHE_ID entry.
3315 */
3316 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_ID, sizeof(shared_cache_id), &shared_cache_id));
3317
3318 /*
3319 * For backwards compatibility; this should eventually be removed.
3320 *
3321 * Historically, this data was in a dyld_uuid_info_64 structure, but the
3322 * naming of both the structure and fields for this use wasn't great. The
3323 * dyld_shared_cache_loadinfo structure has better names, but the same
3324 * layout and content as the original.
3325 *
3326 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
3327 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
3328 * entries; here, it's the slid first mapping, and we leave it that way
3329 * for backwards compatibility.
3330 */
3331 shared_cache_data.sharedCacheSlide = shared_cache_slide;
3332 kdp_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
3333 shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
3334 shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
3335 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
3336
3337 error_exit:
3338 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3339 *task_snap_ss_flags |= kTaskUUIDInfoMissing;
3340 }
3341
3342 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3343 *task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3344 }
3345
3346 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3347 *task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3348 }
3349
3350 return error;
3351 }
3352
3353 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)3354 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
3355 {
3356 bool save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
3357 bool save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
3358 bool save_compactinfo_p = ((trace_flags & STACKSHOT_SAVE_DYLD_COMPACTINFO) != 0);
3359 bool should_fault = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
3360
3361 kern_return_t error = KERN_SUCCESS;
3362 mach_vm_address_t out_addr = 0;
3363
3364 mach_vm_address_t dyld_compactinfo_addr = 0;
3365 uint32_t dyld_compactinfo_size = 0;
3366
3367 uint32_t uuid_info_count = 0;
3368 mach_vm_address_t uuid_info_addr = 0;
3369 uint64_t uuid_info_timestamp = 0;
3370 #pragma unused(uuid_info_timestamp)
3371 kdp_fault_result_flags_t kdp_fault_results = 0;
3372
3373
3374 assert(task_snap_ss_flags != NULL);
3375
3376 int task_pid = pid_from_task(task);
3377 boolean_t task_64bit_addr = task_has_64Bit_addr(task);
3378
3379 if ((save_loadinfo_p || save_compactinfo_p) && have_pmap && task->active && task_pid > 0) {
3380 /* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
3381 if (task_64bit_addr) {
3382 struct user64_dyld_all_image_infos task_image_infos;
3383 if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3384 sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3385 uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
3386 uuid_info_addr = task_image_infos.uuidArray;
3387 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3388 uuid_info_timestamp = task_image_infos.timestamp;
3389 }
3390 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3391 dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3392 dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3393 }
3394
3395 }
3396 } else {
3397 struct user32_dyld_all_image_infos task_image_infos;
3398 if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
3399 sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
3400 uuid_info_count = task_image_infos.uuidArrayCount;
3401 uuid_info_addr = task_image_infos.uuidArray;
3402 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
3403 uuid_info_timestamp = task_image_infos.timestamp;
3404 }
3405 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
3406 dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
3407 dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
3408 }
3409 }
3410 }
3411
3412 /*
3413 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
3414 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
3415 * for this task.
3416 */
3417 if (!uuid_info_addr) {
3418 uuid_info_count = 0;
3419 }
3420
3421 if (!dyld_compactinfo_addr) {
3422 dyld_compactinfo_size = 0;
3423 }
3424
3425 }
3426
3427 if (have_pmap && task_pid == 0) {
3428 if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
3429 uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
3430 } else {
3431 uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
3432 }
3433 }
3434
3435 if (save_compactinfo_p && task_pid > 0) {
3436 if (dyld_compactinfo_size == 0) {
3437 *task_snap_ss_flags |= kTaskDyldCompactInfoNone;
3438 } else if (dyld_compactinfo_size > MAX_DYLD_COMPACTINFO) {
3439 *task_snap_ss_flags |= kTaskDyldCompactInfoTooBig;
3440 } else {
3441 kdp_fault_result_flags_t ci_kdp_fault_results = 0;
3442
3443 /* Open a compression window to avoid overflowing the stack */
3444 kcdata_compression_window_open(kcd);
3445 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_DYLD_COMPACTINFO,
3446 dyld_compactinfo_size, &out_addr));
3447
3448 if (!stackshot_copyin(task->map, dyld_compactinfo_addr, (void *)out_addr,
3449 dyld_compactinfo_size, should_fault, &ci_kdp_fault_results)) {
3450 bzero((void *)out_addr, dyld_compactinfo_size);
3451 }
3452 if (ci_kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3453 *task_snap_ss_flags |= kTaskDyldCompactInfoMissing;
3454 }
3455
3456 if (ci_kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3457 *task_snap_ss_flags |= kTaskDyldCompactInfoTriedFault;
3458 }
3459
3460 if (ci_kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3461 *task_snap_ss_flags |= kTaskDyldCompactInfoFaultedIn;
3462 }
3463
3464 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3465 }
3466 }
3467 if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
3468 uint32_t copied_uuid_count = 0;
3469 uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
3470 uint32_t uuid_info_array_size = 0;
3471
3472 /* Open a compression window to avoid overflowing the stack */
3473 kcdata_compression_window_open(kcd);
3474
3475 /* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
3476 if (uuid_info_count > 0) {
3477 uuid_info_array_size = uuid_info_count * uuid_info_size;
3478
3479 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3480 uuid_info_size, uuid_info_count, &out_addr));
3481
3482 if (!stackshot_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
3483 bzero((void *)out_addr, uuid_info_array_size);
3484 } else {
3485 copied_uuid_count = uuid_info_count;
3486 }
3487 }
3488
3489 uuid_t binary_uuid;
3490 if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
3491 /* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
3492 if (uuid_info_array_size == 0) {
3493 /* We just need to store one UUID */
3494 uuid_info_array_size = uuid_info_size;
3495 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
3496 uuid_info_size, 1, &out_addr));
3497 }
3498
3499 if (task_64bit_addr) {
3500 struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
3501 uint64_t image_load_address = task->mach_header_vm_address;
3502
3503 kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3504 kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3505 } else {
3506 struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
3507 uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
3508
3509 kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
3510 kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
3511 }
3512 }
3513
3514 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3515 } else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
3516 uintptr_t image_load_address;
3517
3518 do {
3519 #if defined(__arm64__)
3520 if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
3521 struct dyld_uuid_info_64 kc_uuid = {0};
3522 kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3523 kdp_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
3524 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
3525 break;
3526 }
3527 #endif /* defined(__arm64__) */
3528
3529 if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
3530 /* Kernel UUID not found or inaccessible */
3531 break;
3532 }
3533
3534 uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
3535 if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
3536 uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
3537 #if defined(__arm64__)
3538 kc_format_t primary_kc_type = KCFormatUnknown;
3539 if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
3540 /* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
3541 uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
3542 }
3543 #endif
3544 }
3545
3546 /*
3547 * The element count of the array can vary - avoid overflowing the
3548 * stack by opening a window.
3549 */
3550 kcdata_compression_window_open(kcd);
3551 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
3552 sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
3553 kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
3554
3555 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
3556 #if defined(__arm64__)
3557 if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3558 /* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3559 extern vm_offset_t segTEXTEXECB;
3560 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
3561 }
3562 #endif
3563 uuid_info_array[0].imageLoadAddress = image_load_address;
3564 kdp_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
3565
3566 if (save_kextloadinfo_p &&
3567 _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
3568 _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
3569 gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
3570 uint32_t kexti;
3571 for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
3572 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
3573 #if defined(__arm64__)
3574 if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
3575 /* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
3576 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
3577 }
3578 #endif
3579 uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
3580 kdp_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
3581 }
3582 }
3583 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3584 } while (0);
3585 }
3586
3587 error_exit:
3588 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
3589 *task_snap_ss_flags |= kTaskUUIDInfoMissing;
3590 }
3591
3592 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
3593 *task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
3594 }
3595
3596 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
3597 *task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
3598 }
3599
3600 return error;
3601 }
3602
3603 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)3604 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
3605 {
3606 kern_return_t error = KERN_SUCCESS;
3607 mach_vm_address_t out_addr = 0;
3608
3609 /* I/O Statistics if any counters are non zero */
3610 assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
3611 if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
3612 /* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
3613 kcdata_compression_window_open(kcd);
3614 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
3615 struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
3616 _iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
3617 _iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
3618 _iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
3619 _iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
3620 _iostat->ss_paging_count = task->task_io_stats->paging.count;
3621 _iostat->ss_paging_size = task->task_io_stats->paging.size;
3622 _iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
3623 _iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
3624 _iostat->ss_metadata_count = task->task_io_stats->metadata.count;
3625 _iostat->ss_metadata_size = task->task_io_stats->metadata.size;
3626 _iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
3627 _iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
3628 for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
3629 _iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
3630 _iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
3631 }
3632 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3633 }
3634
3635
3636 error_exit:
3637 return error;
3638 }
3639
3640 #if CONFIG_PERVASIVE_CPI
3641 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)3642 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
3643 {
3644 struct instrs_cycles_snapshot_v2 instrs_cycles = { 0 };
3645 struct recount_usage usage = { 0 };
3646 struct recount_usage perf_only = { 0 };
3647 recount_task_terminated_usage_perf_only(task, &usage, &perf_only);
3648 instrs_cycles.ics_instructions = recount_usage_instructions(&usage);
3649 instrs_cycles.ics_cycles = recount_usage_cycles(&usage);
3650 instrs_cycles.ics_p_instructions = recount_usage_instructions(&perf_only);
3651 instrs_cycles.ics_p_cycles = recount_usage_cycles(&perf_only);
3652
3653 return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
3654 }
3655 #endif /* CONFIG_PERVASIVE_CPI */
3656
3657 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)3658 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
3659 {
3660 struct stackshot_cpu_architecture cpu_architecture = {0};
3661 int32_t cputype;
3662 int32_t cpusubtype;
3663
3664 proc_archinfo_kdp(get_bsdtask_info(task), &cputype, &cpusubtype);
3665 cpu_architecture.cputype = cputype;
3666 cpu_architecture.cpusubtype = cpusubtype;
3667
3668 return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
3669 }
3670
3671 static kern_return_t
kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd,task_t task)3672 kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd, task_t task)
3673 {
3674 struct stackshot_task_codesigning_info codesigning_info = {};
3675 void * bsdtask_info = NULL;
3676 uint32_t trust = 0;
3677 kern_return_t ret = 0;
3678 pmap_t pmap = get_task_pmap(task);
3679 uint64_t cs_auxiliary_info = 0;
3680 if (task != kernel_task) {
3681 bsdtask_info = get_bsdtask_info(task);
3682 codesigning_info.csflags = proc_getcsflags_kdp(bsdtask_info);
3683 ret = get_trust_level_kdp(pmap, &trust);
3684 if (ret != KERN_SUCCESS) {
3685 trust = KCDATA_INVALID_CS_TRUST_LEVEL;
3686 }
3687 codesigning_info.cs_trust_level = trust;
3688 cs_auxiliary_info = task_get_cs_auxiliary_info_kdp(task);
3689 } else {
3690 return KERN_SUCCESS;
3691 }
3692 ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_CODESIGNING_INFO, sizeof(struct stackshot_task_codesigning_info), &codesigning_info);
3693 if (ret != KERN_SUCCESS) {
3694 return ret;
3695 }
3696 return kcdata_push_data(kcd, TASK_CRASHINFO_CS_AUXILIARY_INFO, sizeof(cs_auxiliary_info), &cs_auxiliary_info);
3697 }
3698
3699 static kern_return_t
kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd,task_t task)3700 kcdata_record_task_jit_address_range(kcdata_descriptor_t kcd, task_t task)
3701 {
3702 uint64_t jit_start_addr = 0;
3703 uint64_t jit_end_addr = 0;
3704 struct crashinfo_jit_address_range range = {};
3705 kern_return_t ret = 0;
3706 pmap_t pmap = get_task_pmap(task);
3707 if (task == kernel_task || NULL == pmap) {
3708 return KERN_SUCCESS;
3709 }
3710 ret = get_jit_address_range_kdp(pmap, (uintptr_t*)&jit_start_addr, (uintptr_t*)&jit_end_addr);
3711 if (KERN_SUCCESS == ret) {
3712 range.start_address = jit_start_addr;
3713 range.end_address = jit_end_addr;
3714 return kcdata_push_data(kcd, TASK_CRASHINFO_JIT_ADDRESS_RANGE, sizeof(struct crashinfo_jit_address_range), &range);
3715 } else {
3716 return KERN_SUCCESS;
3717 }
3718 }
3719
3720 #if CONFIG_TASK_SUSPEND_STATS
3721 static kern_return_t
kcdata_record_task_suspension_info(kcdata_descriptor_t kcd,task_t task)3722 kcdata_record_task_suspension_info(kcdata_descriptor_t kcd, task_t task)
3723 {
3724 kern_return_t ret = KERN_SUCCESS;
3725 struct stackshot_suspension_info suspension_info = {};
3726 task_suspend_stats_data_t suspend_stats;
3727 task_suspend_source_array_t suspend_sources;
3728 struct stackshot_suspension_source suspension_sources[TASK_SUSPEND_SOURCES_MAX];
3729 int i;
3730
3731 if (task == kernel_task) {
3732 return KERN_SUCCESS;
3733 }
3734
3735 ret = task_get_suspend_stats_kdp(task, &suspend_stats);
3736 if (ret != KERN_SUCCESS) {
3737 return ret;
3738 }
3739
3740 suspension_info.tss_count = suspend_stats.tss_count;
3741 suspension_info.tss_duration = suspend_stats.tss_duration;
3742 suspension_info.tss_last_end = suspend_stats.tss_last_end;
3743 suspension_info.tss_last_start = suspend_stats.tss_last_start;
3744 ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_SUSPENSION_INFO, sizeof(suspension_info), &suspension_info);
3745 if (ret != KERN_SUCCESS) {
3746 return ret;
3747 }
3748
3749 ret = task_get_suspend_sources_kdp(task, suspend_sources);
3750 if (ret != KERN_SUCCESS) {
3751 return ret;
3752 }
3753
3754 for (i = 0; i < TASK_SUSPEND_SOURCES_MAX; ++i) {
3755 suspension_sources[i].tss_pid = suspend_sources[i].tss_pid;
3756 strlcpy(suspension_sources[i].tss_procname, suspend_sources[i].tss_procname, sizeof(suspend_sources[i].tss_procname));
3757 suspension_sources[i].tss_tid = suspend_sources[i].tss_tid;
3758 suspension_sources[i].tss_time = suspend_sources[i].tss_time;
3759 }
3760 return kcdata_push_array(kcd, STACKSHOT_KCTYPE_SUSPENSION_SOURCE, sizeof(suspension_sources[0]), TASK_SUSPEND_SOURCES_MAX, &suspension_sources);
3761 }
3762 #endif /* CONFIG_TASK_SUSPEND_STATS */
3763
3764 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)3765 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
3766 {
3767 kern_return_t error = KERN_SUCCESS;
3768 mach_vm_address_t out_addr = 0;
3769 struct transitioning_task_snapshot * cur_tsnap = NULL;
3770
3771 int task_pid = pid_from_task(task);
3772 /* Is returning -1 ok for terminating task ok ??? */
3773 uint64_t task_uniqueid = get_task_uniqueid(task);
3774
3775 if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3776 /*
3777 * if this task is a transit task from another one, show the pid as
3778 * negative
3779 */
3780 task_pid = 0 - task_pid;
3781 }
3782
3783 /* the task_snapshot_v2 struct is large - avoid overflowing the stack */
3784 kcdata_compression_window_open(kcd);
3785 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
3786 cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
3787 bzero(cur_tsnap, sizeof(*cur_tsnap));
3788
3789 cur_tsnap->tts_unique_pid = task_uniqueid;
3790 cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task);
3791 cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
3792 cur_tsnap->tts_transition_type = transition_type;
3793 cur_tsnap->tts_pid = task_pid;
3794
3795 /* Add the BSD process identifiers */
3796 if (task_pid != -1 && get_bsdtask_info(task) != NULL) {
3797 proc_name_kdp(get_bsdtask_info(task), cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
3798 } else {
3799 cur_tsnap->tts_p_comm[0] = '\0';
3800 }
3801
3802 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3803
3804 error_exit:
3805 return error;
3806 }
3807
3808 static kern_return_t
3809 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)3810 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
3811 #else
3812 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
3813 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3814 {
3815 bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3816 bool collect_iostats = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
3817 #if CONFIG_PERVASIVE_CPI
3818 bool collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
3819 #endif /* CONFIG_PERVASIVE_CPI */
3820 #if __arm64__
3821 bool collect_asid = ((trace_flags & STACKSHOT_ASID) != 0);
3822 #endif
3823 bool collect_pagetables = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
3824
3825
3826 kern_return_t error = KERN_SUCCESS;
3827 mach_vm_address_t out_addr = 0;
3828 struct task_snapshot_v2 * cur_tsnap = NULL;
3829 #if STACKSHOT_COLLECTS_LATENCY_INFO
3830 latency_info->cur_tsnap_latency = mach_absolute_time();
3831 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3832
3833 int task_pid = pid_from_task(task);
3834 uint64_t task_uniqueid = get_task_uniqueid(task);
3835 void *bsd_info = get_bsdtask_info(task);
3836 uint64_t proc_starttime_secs = 0;
3837
3838 if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
3839 /*
3840 * if this task is a transit task from another one, show the pid as
3841 * negative
3842 */
3843 task_pid = 0 - task_pid;
3844 }
3845
3846 /* the task_snapshot_v2 struct is large - avoid overflowing the stack */
3847 kcdata_compression_window_open(kcd);
3848 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
3849 cur_tsnap = (struct task_snapshot_v2 *)out_addr;
3850 bzero(cur_tsnap, sizeof(*cur_tsnap));
3851
3852 cur_tsnap->ts_unique_pid = task_uniqueid;
3853 cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
3854 cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
3855
3856 struct recount_usage term_usage = { 0 };
3857 recount_task_terminated_usage(task, &term_usage);
3858 struct recount_times_mach term_times = recount_usage_times_mach(&term_usage);
3859 cur_tsnap->ts_user_time_in_terminated_threads = term_times.rtm_user;
3860 cur_tsnap->ts_system_time_in_terminated_threads = term_times.rtm_system;
3861
3862 proc_starttime_kdp(bsd_info, &proc_starttime_secs, NULL, NULL);
3863 cur_tsnap->ts_p_start_sec = proc_starttime_secs;
3864 cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
3865 cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
3866 cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
3867 cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
3868
3869 cur_tsnap->ts_suspend_count = task->suspend_count;
3870 cur_tsnap->ts_faults = counter_load(&task->faults);
3871 cur_tsnap->ts_pageins = counter_load(&task->pageins);
3872 cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
3873 cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
3874 LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
3875 cur_tsnap->ts_pid = task_pid;
3876
3877 /* Add the BSD process identifiers */
3878 if (task_pid != -1 && bsd_info != NULL) {
3879 proc_name_kdp(bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
3880 } else {
3881 cur_tsnap->ts_p_comm[0] = '\0';
3882 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
3883 if (task->task_imp_base != NULL) {
3884 kdp_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
3885 MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
3886 }
3887 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
3888 }
3889
3890 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3891
3892 #if CONFIG_COALITIONS
3893 if (task_pid != -1 && bsd_info != NULL &&
3894 (task->coalition[COALITION_TYPE_JETSAM] != NULL)) {
3895 /*
3896 * The jetsam coalition ID is always saved, even if
3897 * STACKSHOT_SAVE_JETSAM_COALITIONS is not set.
3898 */
3899 uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
3900 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
3901 }
3902 #endif /* CONFIG_COALITIONS */
3903
3904 #if __arm64__
3905 if (collect_asid && have_pmap) {
3906 uint32_t asid = PMAP_VASID(task->map->pmap);
3907 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
3908 }
3909 #endif
3910
3911 #if STACKSHOT_COLLECTS_LATENCY_INFO
3912 latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
3913 latency_info->pmap_latency = mach_absolute_time();
3914 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3915
3916 if (collect_pagetables && have_pmap) {
3917 #if SCHED_HYGIENE_DEBUG
3918 // pagetable dumps can be large; reset the interrupt timeout to avoid a panic
3919 ml_spin_debug_clear_self();
3920 #endif
3921 assert(stackshot_ctx.sc_is_singlethreaded);
3922 size_t bytes_dumped = 0;
3923 error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stackshot_args.pagetable_mask, &bytes_dumped);
3924 if (error != KERN_SUCCESS) {
3925 goto error_exit;
3926 } else {
3927 /* Variable size array - better not have it on the stack. */
3928 kcdata_compression_window_open(kcd);
3929 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
3930 sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
3931 kcd_exit_on_error(kcdata_compression_window_close(kcd));
3932 }
3933 }
3934
3935 #if STACKSHOT_COLLECTS_LATENCY_INFO
3936 latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
3937 latency_info->bsd_proc_ids_latency = mach_absolute_time();
3938 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3939
3940 #if STACKSHOT_COLLECTS_LATENCY_INFO
3941 latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
3942 latency_info->end_latency = mach_absolute_time();
3943 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3944
3945 if (collect_iostats) {
3946 kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
3947 }
3948
3949 #if CONFIG_PERVASIVE_CPI
3950 if (collect_instrs_cycles) {
3951 kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
3952 }
3953 #endif /* CONFIG_PERVASIVE_CPI */
3954
3955 kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
3956 kcd_exit_on_error(kcdata_record_task_codesigning_info(kcd, task));
3957 kcd_exit_on_error(kcdata_record_task_jit_address_range(kcd, task));
3958
3959 #if CONFIG_TASK_SUSPEND_STATS
3960 kcd_exit_on_error(kcdata_record_task_suspension_info(kcd, task));
3961 #endif /* CONFIG_TASK_SUSPEND_STATS */
3962
3963 #if STACKSHOT_COLLECTS_LATENCY_INFO
3964 latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
3965 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3966
3967 error_exit:
3968 return error;
3969 }
3970
3971 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)3972 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
3973 {
3974 #if !CONFIG_PERVASIVE_CPI
3975 #pragma unused(trace_flags)
3976 #endif /* !CONFIG_PERVASIVE_CPI */
3977 kern_return_t error = KERN_SUCCESS;
3978 struct task_delta_snapshot_v2 * cur_tsnap = NULL;
3979 mach_vm_address_t out_addr = 0;
3980 (void) trace_flags;
3981 #if __arm64__
3982 boolean_t collect_asid = ((trace_flags & STACKSHOT_ASID) != 0);
3983 #endif
3984 #if CONFIG_PERVASIVE_CPI
3985 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
3986 #endif /* CONFIG_PERVASIVE_CPI */
3987
3988 uint64_t task_uniqueid = get_task_uniqueid(task);
3989
3990 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
3991
3992 cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
3993
3994 cur_tsnap->tds_unique_pid = task_uniqueid;
3995 cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
3996 cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
3997
3998 struct recount_usage usage = { 0 };
3999 recount_task_terminated_usage(task, &usage);
4000 struct recount_times_mach term_times = recount_usage_times_mach(&usage);
4001
4002 cur_tsnap->tds_user_time_in_terminated_threads = term_times.rtm_user;
4003 cur_tsnap->tds_system_time_in_terminated_threads = term_times.rtm_system;
4004
4005 cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
4006
4007 cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
4008 cur_tsnap->tds_suspend_count = task->suspend_count;
4009 cur_tsnap->tds_faults = counter_load(&task->faults);
4010 cur_tsnap->tds_pageins = counter_load(&task->pageins);
4011 cur_tsnap->tds_cow_faults = counter_load(&task->cow_faults);
4012 cur_tsnap->tds_was_throttled = (uint32_t)proc_was_throttled_from_task(task);
4013 cur_tsnap->tds_did_throttle = (uint32_t)proc_did_throttle_from_task(task);
4014 cur_tsnap->tds_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
4015 ? LATENCY_QOS_TIER_UNSPECIFIED
4016 : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
4017
4018 #if __arm64__
4019 if (collect_asid && have_pmap) {
4020 uint32_t asid = PMAP_VASID(task->map->pmap);
4021 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
4022 kdp_memcpy((void*)out_addr, &asid, sizeof(asid));
4023 }
4024 #endif
4025
4026 #if CONFIG_PERVASIVE_CPI
4027 if (collect_instrs_cycles) {
4028 kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
4029 }
4030 #endif /* CONFIG_PERVASIVE_CPI */
4031
4032 error_exit:
4033 return error;
4034 }
4035
4036 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)4037 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
4038 {
4039 kern_return_t error = KERN_SUCCESS;
4040 mach_vm_address_t out_addr = 0;
4041
4042 /* I/O Statistics */
4043 assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
4044 if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
4045 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
4046 struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
4047 _iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
4048 _iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
4049 _iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
4050 _iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
4051 _iostat->ss_paging_count = thread->thread_io_stats->paging.count;
4052 _iostat->ss_paging_size = thread->thread_io_stats->paging.size;
4053 _iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
4054 _iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
4055 _iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
4056 _iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
4057 _iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
4058 _iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
4059 for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
4060 _iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
4061 _iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
4062 }
4063 }
4064
4065 error_exit:
4066 return error;
4067 }
4068
4069 bool
machine_trace_thread_validate_kva(vm_offset_t addr)4070 machine_trace_thread_validate_kva(vm_offset_t addr)
4071 {
4072 return _stackshot_validate_kva(addr, sizeof(uintptr_t));
4073 }
4074
4075 struct _stackshot_backtrace_context {
4076 vm_map_t sbc_map;
4077 vm_offset_t sbc_prev_page;
4078 vm_offset_t sbc_prev_kva;
4079 uint32_t sbc_flags;
4080 bool sbc_allow_faulting;
4081 };
4082
4083 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)4084 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
4085 {
4086 struct _stackshot_backtrace_context *ctx = vctx;
4087 size_t map_page_mask = 0;
4088 size_t __assert_only map_page_size = kdp_vm_map_get_page_size(ctx->sbc_map,
4089 &map_page_mask);
4090 assert(size < map_page_size);
4091 if (src & (size - 1)) {
4092 // The source should be aligned to the size passed in, like a stack
4093 // frame or word.
4094 return EINVAL;
4095 }
4096
4097 vm_offset_t src_page = src & ~map_page_mask;
4098 vm_offset_t src_kva = 0;
4099
4100 if (src_page != ctx->sbc_prev_page) {
4101 uint32_t res = 0;
4102 uint32_t flags = 0;
4103 vm_offset_t src_pa = stackshot_find_phys(ctx->sbc_map, src,
4104 ctx->sbc_allow_faulting, &res);
4105
4106 flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
4107 flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
4108 flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
4109 ctx->sbc_flags |= flags;
4110 if (src_pa == 0) {
4111 return EFAULT;
4112 }
4113
4114 src_kva = phystokv(src_pa);
4115 ctx->sbc_prev_page = src_page;
4116 ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
4117 } else {
4118 src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
4119 }
4120
4121 #if KASAN
4122 /*
4123 * KASan does not monitor accesses to userspace pages. Therefore, it is
4124 * pointless to maintain a shadow map for them. Instead, they are all
4125 * mapped to a single, always valid shadow map page. This approach saves
4126 * a considerable amount of shadow map pages which are limited and
4127 * precious.
4128 */
4129 kasan_notify_address_nopoison(src_kva, size);
4130 #endif
4131 memcpy(dst, (const void *)src_kva, size);
4132
4133 return 0;
4134 }
4135
4136 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)4137 kcdata_record_thread_snapshot(kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
4138 {
4139 boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
4140 boolean_t active_kthreads_only_p = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4141 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4142 boolean_t collect_iostats = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
4143 #if CONFIG_PERVASIVE_CPI
4144 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
4145 #endif /* CONFIG_PERVASIVE_CPI */
4146 kern_return_t error = KERN_SUCCESS;
4147
4148 #if STACKSHOT_COLLECTS_LATENCY_INFO
4149 struct stackshot_latency_thread latency_info;
4150 latency_info.cur_thsnap1_latency = mach_absolute_time();
4151 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4152
4153 mach_vm_address_t out_addr = 0;
4154 int saved_count = 0;
4155
4156 struct thread_snapshot_v4 * cur_thread_snap = NULL;
4157 char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
4158
4159 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
4160 cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
4161
4162 /* Populate the thread snapshot header */
4163 cur_thread_snap->ths_ss_flags = 0;
4164 cur_thread_snap->ths_thread_id = thread_tid(thread);
4165 cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
4166 cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
4167 cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
4168
4169 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4170 cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4171 } else {
4172 cur_thread_snap->ths_voucher_identifier = 0;
4173 }
4174
4175 #if STACKSHOT_COLLECTS_LATENCY_INFO
4176 latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
4177 latency_info.dispatch_serial_latency = mach_absolute_time();
4178 latency_info.dispatch_label_latency = 0;
4179 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4180
4181 cur_thread_snap->ths_dqserialnum = 0;
4182 if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
4183 uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
4184 if (dqkeyaddr != 0) {
4185 uint64_t dqaddr = 0;
4186 boolean_t copyin_ok = stackshot_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
4187 if (copyin_ok && dqaddr != 0) {
4188 uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
4189 uint64_t dqserialnum = 0;
4190 copyin_ok = stackshot_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
4191 if (copyin_ok) {
4192 cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
4193 cur_thread_snap->ths_dqserialnum = dqserialnum;
4194 }
4195
4196 #if STACKSHOT_COLLECTS_LATENCY_INFO
4197 latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
4198 latency_info.dispatch_label_latency = mach_absolute_time();
4199 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4200
4201 /* try copying in the queue label */
4202 uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
4203 if (label_offs) {
4204 uint64_t dqlabeladdr = dqaddr + label_offs;
4205 uint64_t actual_dqlabeladdr = 0;
4206
4207 copyin_ok = stackshot_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
4208 if (copyin_ok && actual_dqlabeladdr != 0) {
4209 char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
4210 int len;
4211
4212 bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
4213 len = stackshot_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
4214 if (len > 0) {
4215 mach_vm_address_t label_addr = 0;
4216 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
4217 kdp_strlcpy((char*)label_addr, &label_buf[0], len);
4218 }
4219 }
4220 }
4221 #if STACKSHOT_COLLECTS_LATENCY_INFO
4222 latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
4223 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4224 }
4225 }
4226 }
4227
4228 #if STACKSHOT_COLLECTS_LATENCY_INFO
4229 if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
4230 latency_info.dispatch_serial_latency = 0;
4231 }
4232 latency_info.cur_thsnap2_latency = mach_absolute_time();
4233 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4234
4235 struct recount_times_mach times = recount_thread_times(thread);
4236 cur_thread_snap->ths_user_time = times.rtm_user;
4237 cur_thread_snap->ths_sys_time = times.rtm_system;
4238
4239 if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
4240 cur_thread_snap->ths_ss_flags |= kThreadMain;
4241 }
4242 if (thread->effective_policy.thep_darwinbg) {
4243 cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
4244 }
4245 if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4246 cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
4247 }
4248 if (thread->suspend_count > 0) {
4249 cur_thread_snap->ths_ss_flags |= kThreadSuspended;
4250 }
4251 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4252 cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
4253 }
4254 #if CONFIG_EXCLAVES
4255 /* save exclave thread for later collection */
4256 if ((thread->th_exclaves_state & TH_EXCLAVES_RPC) && stackshot_exclave_inspect_ctids && !stackshot_ctx.sc_panic_stackshot) {
4257 /* certain threads, like the collector, must never be inspected */
4258 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) == 0) {
4259 uint32_t ctid_index = os_atomic_inc_orig(&stackshot_exclave_inspect_ctid_count, acq_rel);
4260 if (ctid_index < stackshot_exclave_inspect_ctid_capacity) {
4261 stackshot_exclave_inspect_ctids[ctid_index] = thread_get_ctid(thread);
4262 } else {
4263 os_atomic_store(&stackshot_exclave_inspect_ctid_count, stackshot_exclave_inspect_ctid_capacity, release);
4264 }
4265 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_STACKSHOT) != 0) {
4266 panic("stackshot: trying to inspect already-queued thread");
4267 }
4268 }
4269 }
4270 #endif /* CONFIG_EXCLAVES */
4271 if (thread_on_core) {
4272 cur_thread_snap->ths_ss_flags |= kThreadOnCore;
4273 }
4274 if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4275 cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
4276 }
4277
4278 /* make sure state flags defined in kcdata.h still match internal flags */
4279 static_assert(SS_TH_WAIT == TH_WAIT);
4280 static_assert(SS_TH_SUSP == TH_SUSP);
4281 static_assert(SS_TH_RUN == TH_RUN);
4282 static_assert(SS_TH_UNINT == TH_UNINT);
4283 static_assert(SS_TH_TERMINATE == TH_TERMINATE);
4284 static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
4285 static_assert(SS_TH_IDLE == TH_IDLE);
4286
4287 cur_thread_snap->ths_last_run_time = thread->last_run_time;
4288 cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
4289 cur_thread_snap->ths_state = thread->state;
4290 cur_thread_snap->ths_sched_flags = thread->sched_flags;
4291 cur_thread_snap->ths_base_priority = thread->base_pri;
4292 cur_thread_snap->ths_sched_priority = thread->sched_pri;
4293 cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
4294 cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
4295 cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
4296 thread->requested_policy.thrp_qos_workq_override);
4297 cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4298 cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
4299
4300 static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4301 static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4302 cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4303 cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4304
4305 #if STACKSHOT_COLLECTS_LATENCY_INFO
4306 latency_info.cur_thsnap2_latency = mach_absolute_time() - latency_info.cur_thsnap2_latency;
4307 latency_info.thread_name_latency = mach_absolute_time();
4308 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4309
4310 /* if there is thread name then add to buffer */
4311 cur_thread_name[0] = '\0';
4312 proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
4313 if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
4314 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
4315 kdp_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
4316 }
4317
4318 #if STACKSHOT_COLLECTS_LATENCY_INFO
4319 latency_info.thread_name_latency = mach_absolute_time() - latency_info.thread_name_latency;
4320 latency_info.sur_times_latency = mach_absolute_time();
4321 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4322
4323 /* record system, user, and runnable times */
4324 time_value_t runnable_time;
4325 thread_read_times(thread, NULL, NULL, &runnable_time);
4326 clock_sec_t user_sec = 0, system_sec = 0;
4327 clock_usec_t user_usec = 0, system_usec = 0;
4328 absolutetime_to_microtime(times.rtm_user, &user_sec, &user_usec);
4329 absolutetime_to_microtime(times.rtm_system, &system_sec, &system_usec);
4330
4331 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
4332 struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
4333 *stackshot_cpu_times = (struct stackshot_cpu_times_v2){
4334 .user_usec = user_sec * USEC_PER_SEC + user_usec,
4335 .system_usec = system_sec * USEC_PER_SEC + system_usec,
4336 .runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
4337 };
4338
4339 #if STACKSHOT_COLLECTS_LATENCY_INFO
4340 latency_info.sur_times_latency = mach_absolute_time() - latency_info.sur_times_latency;
4341 latency_info.user_stack_latency = mach_absolute_time();
4342 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4343
4344 /* Trace user stack, if any */
4345 if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
4346 uint32_t user_ths_ss_flags = 0;
4347
4348 /*
4349 * We don't know how big the stacktrace will be, so read it into our
4350 * per-cpu buffer, then copy it to the kcdata.
4351 */
4352 struct _stackshot_backtrace_context ctx = {
4353 .sbc_map = task->map,
4354 .sbc_allow_faulting = stackshot_ctx.sc_enable_faulting,
4355 .sbc_prev_page = -1,
4356 .sbc_prev_kva = -1,
4357 };
4358 struct backtrace_control ctl = {
4359 .btc_user_thread = thread,
4360 .btc_user_copy = _stackshot_backtrace_copy,
4361 .btc_user_copy_context = &ctx,
4362 };
4363 struct backtrace_user_info info = BTUINFO_INIT;
4364
4365 saved_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4366 &info);
4367 if (saved_count > 0) {
4368 #if __LP64__
4369 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
4370 #else // __LP64__
4371 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
4372 #endif // !__LP64__
4373 /* Now, copy the stacktrace into kcdata. */
4374 kcd_exit_on_error(kcdata_push_array(kcd, STACKLR_WORDS, sizeof(uintptr_t),
4375 saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4376 if (info.btui_info & BTI_64_BIT) {
4377 user_ths_ss_flags |= kUser64_p;
4378 }
4379 if ((info.btui_info & BTI_TRUNCATED) ||
4380 (ctx.sbc_flags & kThreadTruncatedBT)) {
4381 user_ths_ss_flags |= kThreadTruncatedBT;
4382 user_ths_ss_flags |= kThreadTruncUserBT;
4383 }
4384 user_ths_ss_flags |= ctx.sbc_flags;
4385 ctx.sbc_flags = 0;
4386 #if __LP64__
4387 /* We only support async stacks on 64-bit kernels */
4388 if (info.btui_async_frame_addr != 0) {
4389 uint32_t async_start_offset = info.btui_async_start_index;
4390 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
4391 sizeof(async_start_offset), &async_start_offset));
4392 ctl.btc_frame_addr = info.btui_async_frame_addr;
4393 ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
4394 info = BTUINFO_INIT;
4395 unsigned int async_count = backtrace_user(stackshot_cpu_ctx.scc_stack_buffer, MAX_FRAMES, &ctl,
4396 &info);
4397 if (async_count > 0) {
4398 kcd_exit_on_error(kcdata_push_array(kcd, STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64,
4399 sizeof(uintptr_t), async_count, stackshot_cpu_ctx.scc_stack_buffer));
4400 if ((info.btui_info & BTI_TRUNCATED) ||
4401 (ctx.sbc_flags & kThreadTruncatedBT)) {
4402 user_ths_ss_flags |= kThreadTruncatedBT;
4403 user_ths_ss_flags |= kThreadTruncUserAsyncBT;
4404 }
4405 user_ths_ss_flags |= ctx.sbc_flags;
4406 }
4407 }
4408 #endif /* _LP64 */
4409 }
4410 if (user_ths_ss_flags != 0) {
4411 cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
4412 }
4413 }
4414
4415 #if STACKSHOT_COLLECTS_LATENCY_INFO
4416 latency_info.user_stack_latency = mach_absolute_time() - latency_info.user_stack_latency;
4417 latency_info.kernel_stack_latency = mach_absolute_time();
4418 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4419
4420 /* Call through to the machine specific trace routines
4421 * Frames are added past the snapshot header.
4422 */
4423 if (thread->kernel_stack != 0) {
4424 uint32_t kern_ths_ss_flags = 0;
4425 #if defined(__LP64__)
4426 uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
4427 extern int machine_trace_thread64(thread_t thread, char *tracepos,
4428 char *tracebound, int nframes, uint32_t *thread_trace_flags);
4429 saved_count = machine_trace_thread64(
4430 #else
4431 uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
4432 extern int machine_trace_thread(thread_t thread, char *tracepos,
4433 char *tracebound, int nframes, uint32_t *thread_trace_flags);
4434 saved_count = machine_trace_thread(
4435 #endif
4436 thread, (char*) stackshot_cpu_ctx.scc_stack_buffer,
4437 (char *) (stackshot_cpu_ctx.scc_stack_buffer + MAX_FRAMES), MAX_FRAMES,
4438 &kern_ths_ss_flags);
4439 if (saved_count > 0) {
4440 int frame_size = sizeof(uintptr_t);
4441 #if defined(__LP64__)
4442 cur_thread_snap->ths_ss_flags |= kKernel64_p;
4443 #endif
4444 #if CONFIG_EXCLAVES
4445 if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
4446 struct thread_exclaves_info info = { 0 };
4447
4448 info.tei_flags = kExclaveRPCActive;
4449 if (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) {
4450 info.tei_flags |= kExclaveSchedulerRequest;
4451 }
4452 if (thread->th_exclaves_state & TH_EXCLAVES_UPCALL) {
4453 info.tei_flags |= kExclaveUpcallActive;
4454 }
4455 info.tei_scid = thread->th_exclaves_ipc_ctx.scid;
4456 info.tei_thread_offset = exclaves_stack_offset(stackshot_cpu_ctx.scc_stack_buffer, saved_count / frame_size, false);
4457
4458 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERN_EXCLAVES_THREADINFO, sizeof(struct thread_exclaves_info), &info));
4459 }
4460 #endif /* CONFIG_EXCLAVES */
4461 kcd_exit_on_error(kcdata_push_array(kcd, stack_kcdata_type,
4462 frame_size, saved_count / frame_size, stackshot_cpu_ctx.scc_stack_buffer));
4463 }
4464 if (kern_ths_ss_flags & kThreadTruncatedBT) {
4465 kern_ths_ss_flags |= kThreadTruncKernBT;
4466 }
4467 if (kern_ths_ss_flags != 0) {
4468 cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
4469 }
4470 }
4471
4472 #if STACKSHOT_COLLECTS_LATENCY_INFO
4473 latency_info.kernel_stack_latency = mach_absolute_time() - latency_info.kernel_stack_latency;
4474 latency_info.misc_latency = mach_absolute_time();
4475 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4476
4477 #if CONFIG_THREAD_GROUPS
4478 if (trace_flags & STACKSHOT_THREAD_GROUP) {
4479 uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
4480 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
4481 kdp_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
4482 }
4483 #endif /* CONFIG_THREAD_GROUPS */
4484
4485 if (collect_iostats) {
4486 kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
4487 }
4488
4489 #if CONFIG_PERVASIVE_CPI
4490 if (collect_instrs_cycles) {
4491 struct recount_usage usage = { 0 };
4492 recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
4493 &usage);
4494
4495 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
4496 struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
4497 instrs_cycles->ics_instructions = recount_usage_instructions(&usage);
4498 instrs_cycles->ics_cycles = recount_usage_cycles(&usage);
4499 }
4500 #endif /* CONFIG_PERVASIVE_CPI */
4501
4502 #if STACKSHOT_COLLECTS_LATENCY_INFO
4503 latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4504 if (collect_latency_info) {
4505 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
4506 }
4507 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4508
4509 error_exit:
4510 return error;
4511 }
4512
4513 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)4514 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
4515 {
4516 cur_thread_snap->tds_thread_id = thread_tid(thread);
4517 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
4518 cur_thread_snap->tds_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
4519 } else {
4520 cur_thread_snap->tds_voucher_identifier = 0;
4521 }
4522
4523 cur_thread_snap->tds_ss_flags = 0;
4524 if (thread->effective_policy.thep_darwinbg) {
4525 cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
4526 }
4527 if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
4528 cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
4529 }
4530 if (thread->suspend_count > 0) {
4531 cur_thread_snap->tds_ss_flags |= kThreadSuspended;
4532 }
4533 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
4534 cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
4535 }
4536 if (thread_on_core) {
4537 cur_thread_snap->tds_ss_flags |= kThreadOnCore;
4538 }
4539 if (stackshot_thread_is_idle_worker_unsafe(thread)) {
4540 cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
4541 }
4542
4543 cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
4544 cur_thread_snap->tds_state = thread->state;
4545 cur_thread_snap->tds_sched_flags = thread->sched_flags;
4546 cur_thread_snap->tds_base_priority = thread->base_pri;
4547 cur_thread_snap->tds_sched_priority = thread->sched_pri;
4548 cur_thread_snap->tds_eqos = thread->effective_policy.thep_qos;
4549 cur_thread_snap->tds_rqos = thread->requested_policy.thrp_qos;
4550 cur_thread_snap->tds_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
4551 thread->requested_policy.thrp_qos_workq_override);
4552 cur_thread_snap->tds_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
4553
4554 static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
4555 static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
4556 cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
4557 cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
4558
4559 return 0;
4560 }
4561
4562 /*
4563 * Why 12? 12 strikes a decent balance between allocating a large array on
4564 * the stack and having large kcdata item overheads for recording nonrunable
4565 * tasks.
4566 */
4567 #define UNIQUEIDSPERFLUSH 12
4568
4569 struct saved_uniqueids {
4570 uint64_t ids[UNIQUEIDSPERFLUSH];
4571 unsigned count;
4572 };
4573
4574 enum thread_classification {
4575 tc_full_snapshot, /* take a full snapshot */
4576 tc_delta_snapshot, /* take a delta snapshot */
4577 };
4578
4579 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)4580 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
4581 {
4582 processor_t last_processor = thread->last_processor;
4583
4584 boolean_t thread_on_core = FALSE;
4585 if (last_processor != PROCESSOR_NULL) {
4586 /* Idle threads are always treated as on-core, since the processor state can change while they are running. */
4587 thread_on_core = (thread == last_processor->idle_thread) ||
4588 (last_processor->state == PROCESSOR_RUNNING &&
4589 last_processor->active_thread == thread);
4590 }
4591
4592 *thread_on_core_p = thread_on_core;
4593
4594 /* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
4595 * previous full stackshot */
4596 if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stackshot_args.since_timestamp)) {
4597 return tc_full_snapshot;
4598 } else {
4599 return tc_delta_snapshot;
4600 }
4601 }
4602
4603
4604 static kern_return_t
kdp_stackshot_record_task(task_t task)4605 kdp_stackshot_record_task(task_t task)
4606 {
4607 boolean_t active_kthreads_only_p = ((stackshot_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
4608 boolean_t save_donating_pids_p = ((stackshot_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
4609 boolean_t collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4610 boolean_t save_owner_info = ((stackshot_flags & STACKSHOT_THREAD_WAITINFO) != 0);
4611 boolean_t include_drivers = ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) != 0);
4612
4613 kern_return_t error = KERN_SUCCESS;
4614 mach_vm_address_t out_addr = 0;
4615 int saved_count = 0;
4616
4617 int task_pid = 0;
4618 uint64_t task_uniqueid = 0;
4619 int num_delta_thread_snapshots = 0;
4620 int num_waitinfo_threads = 0;
4621 int num_turnstileinfo_threads = 0;
4622
4623 uint64_t task_start_abstime = 0;
4624 boolean_t have_map = FALSE, have_pmap = FALSE;
4625 boolean_t some_thread_ran = FALSE;
4626 unaligned_u64 task_snap_ss_flags = 0;
4627 #if STACKSHOT_COLLECTS_LATENCY_INFO
4628 struct stackshot_latency_task latency_info;
4629 latency_info.setup_latency = mach_absolute_time();
4630 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4631
4632 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4633 uint64_t task_begin_cpu_cycle_count = 0;
4634 if (!stackshot_ctx.sc_panic_stackshot) {
4635 task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
4636 }
4637 #endif
4638
4639 if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
4640 error = KERN_FAILURE;
4641 goto error_exit;
4642 }
4643
4644 void *bsd_info = get_bsdtask_info(task);
4645 boolean_t task_in_teardown = (bsd_info == NULL) || proc_in_teardown(bsd_info);// has P_LPEXIT set during proc_exit()
4646 boolean_t task_in_transition = task_in_teardown; // here we can add other types of transition.
4647 uint32_t container_type = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
4648 uint32_t transition_type = (task_in_teardown) ? kTaskIsTerminated : 0;
4649
4650 if (task_in_transition) {
4651 collect_delta_stackshot = FALSE;
4652 }
4653
4654 have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
4655 have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
4656
4657 task_pid = pid_from_task(task);
4658 /* Is returning -1 ok for terminating task ok ??? */
4659 task_uniqueid = get_task_uniqueid(task);
4660
4661 if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
4662 /*
4663 * Not interested in terminated tasks without threads.
4664 */
4665 if (queue_empty(&task->threads) || task_pid == -1) {
4666 return KERN_SUCCESS;
4667 }
4668 }
4669
4670 /* All PIDs should have the MSB unset */
4671 assert((task_pid & (1ULL << 31)) == 0);
4672
4673 #if STACKSHOT_COLLECTS_LATENCY_INFO
4674 latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
4675 latency_info.task_uniqueid = task_uniqueid;
4676 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4677
4678 /* Trace everything, unless a process was specified. Add in driver tasks if requested. */
4679 if ((stackshot_args.pid == -1) || (stackshot_args.pid == task_pid) || (include_drivers && task_is_driver(task))) {
4680 #if STACKSHOT_COLLECTS_LATENCY_INFO
4681 stackshot_cpu_latency.tasks_processed++;
4682 #endif
4683
4684 /* add task snapshot marker */
4685 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4686 container_type, task_uniqueid));
4687
4688 if (collect_delta_stackshot) {
4689 /*
4690 * For delta stackshots we need to know if a thread from this task has run since the
4691 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
4692 */
4693 thread_t thread = THREAD_NULL;
4694 queue_iterate(&task->threads, thread, thread_t, task_threads)
4695 {
4696 if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4697 error = KERN_FAILURE;
4698 goto error_exit;
4699 }
4700
4701 if (active_kthreads_only_p && thread->kernel_stack == 0) {
4702 continue;
4703 }
4704
4705 boolean_t thread_on_core;
4706 enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4707
4708 switch (thread_classification) {
4709 case tc_full_snapshot:
4710 some_thread_ran = TRUE;
4711 break;
4712 case tc_delta_snapshot:
4713 num_delta_thread_snapshots++;
4714 break;
4715 }
4716 }
4717 }
4718
4719 if (collect_delta_stackshot) {
4720 proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &task_start_abstime);
4721 }
4722
4723 /* Next record any relevant UUID info and store the task snapshot */
4724 if (task_in_transition ||
4725 !collect_delta_stackshot ||
4726 (task_start_abstime == 0) ||
4727 (task_start_abstime > stackshot_args.since_timestamp) ||
4728 some_thread_ran) {
4729 /*
4730 * Collect full task information in these scenarios:
4731 *
4732 * 1) a full stackshot or the task is in transition
4733 * 2) a delta stackshot where the task started after the previous full stackshot
4734 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
4735 *
4736 * because the task may have exec'ed, changing its name, architecture, load info, etc
4737 */
4738
4739 kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
4740 kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, stackshot_flags, have_pmap, &task_snap_ss_flags));
4741 #if STACKSHOT_COLLECTS_LATENCY_INFO
4742 if (!task_in_transition) {
4743 kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags, &latency_info));
4744 } else {
4745 kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4746 }
4747 #else
4748 if (!task_in_transition) {
4749 kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4750 } else {
4751 kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
4752 }
4753 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4754 } else {
4755 kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, stackshot_flags, have_pmap, task_snap_ss_flags));
4756 }
4757
4758 #if STACKSHOT_COLLECTS_LATENCY_INFO
4759 latency_info.misc_latency = mach_absolute_time();
4760 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4761
4762 struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
4763 int current_delta_snapshot_index = 0;
4764 if (num_delta_thread_snapshots > 0) {
4765 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
4766 sizeof(struct thread_delta_snapshot_v3),
4767 num_delta_thread_snapshots, &out_addr));
4768 delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
4769 }
4770
4771
4772 #if STACKSHOT_COLLECTS_LATENCY_INFO
4773 latency_info.task_thread_count_loop_latency = mach_absolute_time();
4774 #endif
4775 /*
4776 * Iterate over the task threads to save thread snapshots and determine
4777 * how much space we need for waitinfo and turnstile info
4778 */
4779 thread_t thread = THREAD_NULL;
4780 queue_iterate(&task->threads, thread, thread_t, task_threads)
4781 {
4782 if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
4783 error = KERN_FAILURE;
4784 goto error_exit;
4785 }
4786
4787 uint64_t thread_uniqueid;
4788 if (active_kthreads_only_p && thread->kernel_stack == 0) {
4789 continue;
4790 }
4791 thread_uniqueid = thread_tid(thread);
4792
4793 boolean_t thread_on_core;
4794 enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
4795
4796 #if STACKSHOT_COLLECTS_LATENCY_INFO
4797 stackshot_cpu_latency.threads_processed++;
4798 #endif
4799
4800 switch (thread_classification) {
4801 case tc_full_snapshot:
4802 /* add thread marker */
4803 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4804 STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
4805
4806 /* thread snapshot can be large, including strings, avoid overflowing the stack. */
4807 kcdata_compression_window_open(stackshot_kcdata_p);
4808
4809 kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, stackshot_flags, have_pmap, thread_on_core));
4810
4811 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4812
4813 /* mark end of thread snapshot data */
4814 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
4815 STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
4816 break;
4817 case tc_delta_snapshot:
4818 kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
4819 break;
4820 }
4821
4822 /*
4823 * We want to report owner information regardless of whether a thread
4824 * has changed since the last delta, whether it's a normal stackshot,
4825 * or whether it's nonrunnable
4826 */
4827 if (save_owner_info) {
4828 if (stackshot_thread_has_valid_waitinfo(thread)) {
4829 num_waitinfo_threads++;
4830 }
4831
4832 if (stackshot_thread_has_valid_turnstileinfo(thread)) {
4833 num_turnstileinfo_threads++;
4834 }
4835 }
4836 }
4837 #if STACKSHOT_COLLECTS_LATENCY_INFO
4838 latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
4839 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4840
4841 thread_waitinfo_v2_t *thread_waitinfo = NULL;
4842 thread_turnstileinfo_v2_t *thread_turnstileinfo = NULL;
4843 int current_waitinfo_index = 0;
4844 int current_turnstileinfo_index = 0;
4845 /* allocate space for the wait and turnstil info */
4846 if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
4847 /* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
4848 kcdata_compression_window_open(stackshot_kcdata_p);
4849
4850 if (num_waitinfo_threads > 0) {
4851 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
4852 sizeof(thread_waitinfo_v2_t), num_waitinfo_threads, &out_addr));
4853 thread_waitinfo = (thread_waitinfo_v2_t *)out_addr;
4854 }
4855
4856 if (num_turnstileinfo_threads > 0) {
4857 /* get space for the turnstile info */
4858 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
4859 sizeof(thread_turnstileinfo_v2_t), num_turnstileinfo_threads, &out_addr));
4860 thread_turnstileinfo = (thread_turnstileinfo_v2_t *)out_addr;
4861 }
4862
4863 stackshot_plh_resetgen(); // so we know which portlabel_ids are referenced
4864 }
4865
4866 #if STACKSHOT_COLLECTS_LATENCY_INFO
4867 latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
4868 latency_info.task_thread_data_loop_latency = mach_absolute_time();
4869 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4870
4871 /* Iterate over the task's threads to save the wait and turnstile info */
4872 queue_iterate(&task->threads, thread, thread_t, task_threads)
4873 {
4874 uint64_t thread_uniqueid;
4875 #pragma unused(thread_uniqueid)
4876
4877 if (active_kthreads_only_p && thread->kernel_stack == 0) {
4878 continue;
4879 }
4880
4881 thread_uniqueid = thread_tid(thread);
4882
4883 /* If we want owner info, we should capture it regardless of its classification */
4884 if (save_owner_info) {
4885 if (stackshot_thread_has_valid_waitinfo(thread)) {
4886 stackshot_thread_wait_owner_info(
4887 thread,
4888 &thread_waitinfo[current_waitinfo_index++]);
4889 }
4890
4891 if (stackshot_thread_has_valid_turnstileinfo(thread)) {
4892 stackshot_thread_turnstileinfo(
4893 thread,
4894 &thread_turnstileinfo[current_turnstileinfo_index++]);
4895 }
4896 }
4897 }
4898
4899 #if STACKSHOT_COLLECTS_LATENCY_INFO
4900 latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
4901 latency_info.misc2_latency = mach_absolute_time();
4902 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4903
4904 #if DEBUG || DEVELOPMENT
4905 if (current_delta_snapshot_index != num_delta_thread_snapshots) {
4906 panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
4907 num_delta_thread_snapshots, current_delta_snapshot_index);
4908 }
4909 if (current_waitinfo_index != num_waitinfo_threads) {
4910 panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
4911 num_waitinfo_threads, current_waitinfo_index);
4912 }
4913 #endif
4914
4915 if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
4916 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4917 // now, record the portlabel hashes.
4918 kcd_exit_on_error(kdp_stackshot_plh_record());
4919 }
4920
4921 #if IMPORTANCE_INHERITANCE
4922 if (save_donating_pids_p) {
4923 /* Ensure the buffer is big enough, since we're using the stack buffer for this. */
4924 static_assert(TASK_IMP_WALK_LIMIT * sizeof(int32_t) <= MAX_FRAMES * sizeof(uintptr_t));
4925 saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
4926 (char*) stackshot_cpu_ctx.scc_stack_buffer, TASK_IMP_WALK_LIMIT);
4927 if (saved_count > 0) {
4928 /* Variable size array - better not have it on the stack. */
4929 kcdata_compression_window_open(stackshot_kcdata_p);
4930 kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
4931 sizeof(int32_t), saved_count, stackshot_cpu_ctx.scc_stack_buffer));
4932 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
4933 }
4934 }
4935 #endif
4936
4937 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
4938 if (!stackshot_ctx.sc_panic_stackshot) {
4939 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
4940 "task_cpu_cycle_count"));
4941 }
4942 #endif
4943
4944 #if STACKSHOT_COLLECTS_LATENCY_INFO
4945 latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
4946 if (collect_latency_info) {
4947 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
4948 }
4949 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
4950
4951 /* mark end of task snapshot data */
4952 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
4953 task_uniqueid));
4954 }
4955
4956
4957 error_exit:
4958 return error;
4959 }
4960
4961 /* Record global shared regions */
4962 static kern_return_t
kdp_stackshot_shared_regions(uint64_t trace_flags)4963 kdp_stackshot_shared_regions(uint64_t trace_flags)
4964 {
4965 kern_return_t error = KERN_SUCCESS;
4966
4967 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
4968 extern queue_head_t vm_shared_region_queue;
4969 vm_shared_region_t sr;
4970
4971 extern queue_head_t vm_shared_region_queue;
4972 queue_iterate(&vm_shared_region_queue,
4973 sr,
4974 vm_shared_region_t,
4975 sr_q) {
4976 struct dyld_shared_cache_loadinfo_v2 scinfo = {0};
4977 if (!_stackshot_validate_kva((vm_offset_t)sr, sizeof(*sr))) {
4978 break;
4979 }
4980 if (collect_delta_stackshot && sr->sr_install_time < stackshot_args.since_timestamp) {
4981 continue; // only include new shared caches in delta stackshots
4982 }
4983 uint32_t sharedCacheFlags = ((sr == primary_system_shared_region) ? kSharedCacheSystemPrimary : 0) |
4984 (sr->sr_driverkit ? kSharedCacheDriverkit : 0);
4985 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
4986 STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
4987 kdp_memcpy(scinfo.sharedCacheUUID, sr->sr_uuid, sizeof(sr->sr_uuid));
4988 scinfo.sharedCacheSlide = sr->sr_slide;
4989 scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_base_address + sr->sr_first_mapping;
4990 scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
4991 scinfo.sharedCacheID = sr->sr_id;
4992 scinfo.sharedCacheFlags = sharedCacheFlags;
4993
4994 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_INFO,
4995 sizeof(scinfo), &scinfo));
4996
4997 if ((trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) && sr->sr_images != NULL &&
4998 _stackshot_validate_kva((vm_offset_t)sr->sr_images, sr->sr_images_count * sizeof(struct dyld_uuid_info_64))) {
4999 assert(sr->sr_images_count != 0);
5000 kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
5001 }
5002 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5003 STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
5004 }
5005
5006 /*
5007 * For backwards compatibility; this will eventually be removed.
5008 * Another copy of the Primary System Shared Region, for older readers.
5009 */
5010 sr = primary_system_shared_region;
5011 /* record system level shared cache load info (if available) */
5012 if (!collect_delta_stackshot && sr &&
5013 _stackshot_validate_kva((vm_offset_t)sr, sizeof(struct vm_shared_region))) {
5014 struct dyld_shared_cache_loadinfo scinfo = {0};
5015
5016 /*
5017 * Historically, this data was in a dyld_uuid_info_64 structure, but the
5018 * naming of both the structure and fields for this use isn't great. The
5019 * dyld_shared_cache_loadinfo structure has better names, but the same
5020 * layout and content as the original.
5021 *
5022 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
5023 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
5024 * entries; here, it's the slid base address, and we leave it that way
5025 * for backwards compatibility.
5026 */
5027 kdp_memcpy(scinfo.sharedCacheUUID, &sr->sr_uuid, sizeof(sr->sr_uuid));
5028 scinfo.sharedCacheSlide = sr->sr_slide;
5029 scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_slide + sr->sr_base_address;
5030 scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
5031
5032 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
5033 sizeof(scinfo), &scinfo));
5034
5035 if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
5036 /*
5037 * Include a map of the system shared cache layout if it has been populated
5038 * (which is only when the system is using a custom shared cache).
5039 */
5040 if (sr->sr_images && _stackshot_validate_kva((vm_offset_t)sr->sr_images,
5041 (sr->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
5042 assert(sr->sr_images_count != 0);
5043 kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
5044 }
5045 }
5046 }
5047
5048 error_exit:
5049 return error;
5050 }
5051
5052 static kern_return_t
kdp_stackshot_kcdata_format(void)5053 kdp_stackshot_kcdata_format(void)
5054 {
5055 kern_return_t error = KERN_SUCCESS;
5056 mach_vm_address_t out_addr = 0;
5057 uint64_t abs_time = 0;
5058 uint64_t system_state_flags = 0;
5059 task_t task = TASK_NULL;
5060 mach_timebase_info_data_t timebase = {0, 0};
5061 uint32_t length_to_copy = 0, tmp32 = 0;
5062 abs_time = mach_absolute_time();
5063 uint64_t last_task_start_time = 0;
5064 int cur_workitem_index = 0;
5065 uint64_t tasks_in_stackshot = 0;
5066 uint64_t threads_in_stackshot = 0;
5067
5068 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5069 uint64_t stackshot_begin_cpu_cycle_count = 0;
5070
5071 if (!stackshot_ctx.sc_panic_stackshot) {
5072 stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5073 }
5074 #endif
5075
5076 /* the CPU entering here is participating in the stackshot */
5077 stackshot_cpu_ctx.scc_did_work = true;
5078
5079 #if STACKSHOT_COLLECTS_LATENCY_INFO
5080 collect_latency_info = stackshot_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
5081 #endif
5082 /* process the flags */
5083 bool collect_delta_stackshot = ((stackshot_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
5084 bool collect_exclaves = !disable_exclave_stackshot && ((stackshot_flags & STACKSHOT_SKIP_EXCLAVES) == 0);
5085 stackshot_ctx.sc_enable_faulting = (stackshot_flags & (STACKSHOT_ENABLE_BT_FAULTING));
5086
5087 /* Currently we only support returning explicit KEXT load info on fileset kernels */
5088 kc_format_t primary_kc_type = KCFormatUnknown;
5089 if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
5090 stackshot_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
5091 }
5092
5093 if (sizeof(void *) == 8) {
5094 system_state_flags |= kKernel64_p;
5095 }
5096
5097 #if CONFIG_EXCLAVES
5098 if (!stackshot_ctx.sc_panic_stackshot && collect_exclaves) {
5099 kcd_exit_on_error(stackshot_setup_exclave_waitlist()); /* Allocate list of exclave threads */
5100 }
5101 #else
5102 #pragma unused(collect_exclaves)
5103 #endif /* CONFIG_EXCLAVES */
5104
5105 /* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
5106 clock_timebase_info(&timebase);
5107
5108 /* begin saving data into the buffer */
5109 if (stackshot_ctx.sc_bytes_uncompressed) {
5110 stackshot_ctx.sc_bytes_uncompressed = 0;
5111 }
5112
5113 /*
5114 * Setup pre-task linked kcdata buffer.
5115 * The idea here is that we want the kcdata to be in (roughly) the same order as it was
5116 * before we made this multithreaded, so we have separate buffers for pre and post task-iteration,
5117 * since that's the parallelized part.
5118 */
5119 if (!stackshot_ctx.sc_is_singlethreaded) {
5120 kcd_exit_on_error(stackshot_new_linked_kcdata());
5121 stackshot_ctx.sc_pretask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5122 }
5123
5124 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, stackshot_flags, "stackshot_in_flags"));
5125 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)stackshot_flags, "stackshot_in_pid"));
5126 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
5127 if (stackshot_flags & STACKSHOT_PAGE_TABLES) {
5128 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_args.pagetable_mask, "stackshot_pagetable_mask"));
5129 }
5130 if (stackshot_initial_estimate != 0) {
5131 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
5132 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
5133 }
5134
5135 #if STACKSHOT_COLLECTS_LATENCY_INFO
5136 stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time();
5137 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5138
5139 #if CONFIG_JETSAM
5140 tmp32 = memorystatus_get_pressure_status_kdp();
5141 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
5142 #endif
5143
5144 if (!collect_delta_stackshot) {
5145 tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
5146 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
5147
5148 tmp32 = PAGE_SIZE;
5149 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
5150
5151 /* save boot-args and osversion string */
5152 length_to_copy = MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
5153 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
5154 length_to_copy = MIN((uint32_t)(strlen(osversion) + 1), OSVERSIZE);
5155 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OS_BUILD_VERSION, length_to_copy, (void *)osversion));
5156
5157
5158 length_to_copy = MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
5159 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
5160
5161 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
5162 } else {
5163 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stackshot_args.since_timestamp));
5164 }
5165
5166 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
5167
5168 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_ctx.sc_microsecs));
5169
5170 kcd_exit_on_error(kdp_stackshot_shared_regions(stackshot_flags));
5171
5172 /* Add requested information first */
5173 if (stackshot_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
5174 struct mem_and_io_snapshot mais = {0};
5175 kdp_mem_and_io_snapshot(&mais);
5176 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
5177 }
5178
5179 #if CONFIG_THREAD_GROUPS
5180 struct thread_group_snapshot_v3 *thread_groups = NULL;
5181 int num_thread_groups = 0;
5182
5183 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5184 uint64_t thread_group_begin_cpu_cycle_count = 0;
5185
5186 if (!stackshot_ctx.sc_is_singlethreaded && (stackshot_flags & STACKSHOT_THREAD_GROUP)) {
5187 thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5188 }
5189 #endif
5190
5191 /* Iterate over thread group names */
5192 if (stackshot_flags & STACKSHOT_THREAD_GROUP) {
5193 /* Variable size array - better not have it on the stack. */
5194 kcdata_compression_window_open(stackshot_kcdata_p);
5195
5196 if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
5197 stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5198 }
5199
5200 if (num_thread_groups > 0) {
5201 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v3), num_thread_groups, &out_addr));
5202 thread_groups = (struct thread_group_snapshot_v3 *)out_addr;
5203 }
5204
5205 if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
5206 error = KERN_FAILURE;
5207 goto error_exit;
5208 }
5209
5210 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5211 }
5212
5213 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5214 if (!stackshot_ctx.sc_panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
5215 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
5216 "thread_groups_cpu_cycle_count"));
5217 }
5218 #endif
5219 #else
5220 stackshot_flags &= ~(STACKSHOT_THREAD_GROUP);
5221 #endif /* CONFIG_THREAD_GROUPS */
5222
5223
5224 #if STACKSHOT_COLLECTS_LATENCY_INFO
5225 stackshot_ctx.sc_latency.setup_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.setup_latency_mt;
5226 if (stackshot_ctx.sc_is_singlethreaded) {
5227 stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time();
5228 } else {
5229 stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time();
5230 }
5231 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5232
5233 bool const process_scoped = (stackshot_args.pid != -1) &&
5234 ((stackshot_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) == 0);
5235
5236 /* Iterate over tasks */
5237 queue_iterate(&tasks, task, task_t, tasks)
5238 {
5239 stackshot_panic_guard();
5240
5241 if (collect_delta_stackshot) {
5242 uint64_t abstime;
5243 proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &abstime);
5244
5245 if (abstime > last_task_start_time) {
5246 last_task_start_time = abstime;
5247 }
5248 }
5249
5250 pid_t task_pid = pid_from_task(task);
5251
5252 if (process_scoped && (task_pid != stackshot_args.pid)) {
5253 continue;
5254 }
5255
5256 if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5257 (!queue_empty(&task->threads) && task_pid != -1)) {
5258 tasks_in_stackshot++;
5259 threads_in_stackshot += task->thread_count;
5260 }
5261
5262 /* If this is a singlethreaded stackshot, don't use the work queues. */
5263 if (stackshot_ctx.sc_is_singlethreaded) {
5264 kcd_exit_on_error(kdp_stackshot_record_task(task));
5265 } else {
5266 kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5267 .sswi_task = task,
5268 .sswi_data = NULL,
5269 .sswi_idx = cur_workitem_index++
5270 }));
5271 }
5272
5273 if (process_scoped) {
5274 /* Only targeting one process, we're done now. */
5275 break;
5276 }
5277 }
5278
5279 #if STACKSHOT_COLLECTS_LATENCY_INFO
5280 if (stackshot_ctx.sc_is_singlethreaded) {
5281 stackshot_ctx.sc_latency.total_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_task_iteration_latency_mt;
5282 } else {
5283 stackshot_ctx.sc_latency.task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.task_queue_building_latency_mt;
5284 }
5285 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5286
5287 /* Setup post-task kcdata buffer */
5288 if (!stackshot_ctx.sc_is_singlethreaded) {
5289 stackshot_finalize_linked_kcdata();
5290 kcd_exit_on_error(stackshot_new_linked_kcdata());
5291 stackshot_ctx.sc_posttask_kcdata = stackshot_cpu_ctx.scc_kcdata_head;
5292 }
5293
5294 #if CONFIG_COALITIONS
5295 /* Don't collect jetsam coalition snapshots in delta stackshots - these don't change */
5296 if (!collect_delta_stackshot || (last_task_start_time > stackshot_args.since_timestamp)) {
5297 int num_coalitions = 0;
5298 struct jetsam_coalition_snapshot *coalitions = NULL;
5299
5300 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5301 uint64_t coalition_begin_cpu_cycle_count = 0;
5302
5303 if (!stackshot_ctx.sc_panic_stackshot && (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
5304 coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
5305 }
5306 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5307
5308 /* Iterate over coalitions */
5309 if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5310 if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5311 stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5312 }
5313 }
5314 if (stackshot_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
5315 if (num_coalitions > 0) {
5316 /* Variable size array - better not have it on the stack. */
5317 kcdata_compression_window_open(stackshot_kcdata_p);
5318 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
5319 coalitions = (struct jetsam_coalition_snapshot*)out_addr;
5320
5321 if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
5322 error = KERN_FAILURE;
5323 goto error_exit;
5324 }
5325
5326 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
5327 }
5328 }
5329 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5330 if (!stackshot_ctx.sc_panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
5331 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
5332 "coalitions_cpu_cycle_count"));
5333 }
5334 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
5335 }
5336 #else
5337 stackshot_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
5338 #endif /* CONFIG_COALITIONS */
5339
5340 stackshot_panic_guard();
5341
5342 #if STACKSHOT_COLLECTS_LATENCY_INFO
5343 if (stackshot_ctx.sc_is_singlethreaded) {
5344 stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time();
5345 } else {
5346 stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time();
5347 }
5348 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5349
5350 /*
5351 * Iterate over the tasks in the terminated tasks list. We only inspect
5352 * tasks that have a valid bsd_info pointer. The check for task transition
5353 * like past P_LPEXIT during proc_exit() is now checked for inside the
5354 * kdp_stackshot_record_task(), and then a safer and minimal
5355 * transitioning_task_snapshot struct is collected via
5356 * kcdata_record_transitioning_task_snapshot()
5357 */
5358 queue_iterate(&terminated_tasks, task, task_t, tasks)
5359 {
5360 stackshot_panic_guard();
5361
5362 if ((task->active && !task_is_a_corpse(task) && !task_is_a_corpse_fork(task)) ||
5363 (!queue_empty(&task->threads) && pid_from_task(task) != -1)) {
5364 tasks_in_stackshot++;
5365 threads_in_stackshot += task->thread_count;
5366 }
5367
5368 /* Only use workqueues on non-panic and non-scoped stackshots. */
5369 if (stackshot_ctx.sc_is_singlethreaded) {
5370 kcd_exit_on_error(kdp_stackshot_record_task(task));
5371 } else {
5372 kcd_exit_on_error(stackshot_put_workitem((struct stackshot_workitem) {
5373 .sswi_task = task,
5374 .sswi_data = NULL,
5375 .sswi_idx = cur_workitem_index++
5376 }));
5377 }
5378 }
5379
5380 /* Mark the queue(s) as populated. */
5381 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5382 os_atomic_store(&stackshot_ctx.sc_workqueues[i].sswq_populated, true, release);
5383 }
5384
5385 #if DEVELOPMENT || DEBUG
5386 kcd_exit_on_error(kdp_stackshot_plh_stats());
5387 #endif /* DEVELOPMENT || DEBUG */
5388
5389 #if STACKSHOT_COLLECTS_LATENCY_INFO
5390 if (stackshot_ctx.sc_is_singlethreaded) {
5391 stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.total_terminated_task_iteration_latency_mt;
5392 } else {
5393 stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.terminated_task_queue_building_latency_mt;
5394 }
5395 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5396
5397 #if STACKSHOT_COLLECTS_LATENCY_INFO
5398 if (collect_latency_info) {
5399 stackshot_ctx.sc_latency.latency_version = 2;
5400 stackshot_ctx.sc_latency.main_cpu_number = stackshot_ctx.sc_main_cpuid;
5401 stackshot_ctx.sc_latency.calling_cpu_number = stackshot_ctx.sc_calling_cpuid;
5402 }
5403 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5404
5405 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
5406 if (!stackshot_ctx.sc_panic_stackshot) {
5407 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
5408 "stackshot_total_cpu_cycle_cnt"));
5409 }
5410 #endif
5411
5412 kcdata_add_uint64_with_description(stackshot_kcdata_p, tasks_in_stackshot, "stackshot_tasks_count");
5413 kcdata_add_uint64_with_description(stackshot_kcdata_p, threads_in_stackshot, "stackshot_threads_count");
5414
5415 stackshot_panic_guard();
5416
5417 if (!stackshot_ctx.sc_is_singlethreaded) {
5418 /* Chip away at the queue. */
5419 stackshot_finalize_linked_kcdata();
5420 stackshot_cpu_do_work();
5421 *stackshot_kcdata_p = stackshot_cpu_ctx.scc_kcdata_tail->kcdata;
5422 }
5423
5424 #if CONFIG_EXCLAVES
5425 /* If this is the panic stackshot, check if Exclaves panic left its stackshot in the shared region */
5426 if (stackshot_ctx.sc_panic_stackshot) {
5427 struct exclaves_panic_stackshot excl_ss;
5428 kdp_read_panic_exclaves_stackshot(&excl_ss);
5429
5430 if (excl_ss.stackshot_buffer != NULL && excl_ss.stackshot_buffer_size != 0) {
5431 tb_error_t tberr = TB_ERROR_SUCCESS;
5432 exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_FOUND;
5433
5434 /* this block does not escape, so this is okay... */
5435 kern_return_t *error_in_block = &error;
5436 kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
5437 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5438 tberr = stackshot_stackshotresult__unmarshal(excl_ss.stackshot_buffer, excl_ss.stackshot_buffer_size, ^(stackshot_stackshotresult_s result){
5439 *error_in_block = stackshot_exclaves_process_stackshot(&result, stackshot_kcdata_p, false);
5440 });
5441 kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
5442 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
5443 if (tberr != TB_ERROR_SUCCESS) {
5444 exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_DECODE_FAILED;
5445 }
5446 } else {
5447 exclaves_panic_ss_status = EXCLAVES_PANIC_STACKSHOT_NOT_FOUND;
5448 }
5449
5450 /* check error from the block */
5451 kcd_exit_on_error(error);
5452 }
5453 #endif
5454
5455 /* === END of populating stackshot data === */
5456 error_exit:;
5457 if (error != KERN_SUCCESS) {
5458 stackshot_set_error(error);
5459 }
5460
5461 stackshot_panic_guard();
5462
5463 return error;
5464 }
5465
5466 static uint64_t
proc_was_throttled_from_task(task_t task)5467 proc_was_throttled_from_task(task_t task)
5468 {
5469 uint64_t was_throttled = 0;
5470 void *bsd_info = get_bsdtask_info(task);
5471
5472 if (bsd_info) {
5473 was_throttled = proc_was_throttled(bsd_info);
5474 }
5475
5476 return was_throttled;
5477 }
5478
5479 static uint64_t
proc_did_throttle_from_task(task_t task)5480 proc_did_throttle_from_task(task_t task)
5481 {
5482 uint64_t did_throttle = 0;
5483 void *bsd_info = get_bsdtask_info(task);
5484
5485 if (bsd_info) {
5486 did_throttle = proc_did_throttle(bsd_info);
5487 }
5488
5489 return did_throttle;
5490 }
5491
5492 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)5493 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
5494 {
5495 unsigned int pages_reclaimed;
5496 unsigned int pages_wanted;
5497 kern_return_t kErr;
5498
5499 uint64_t compressions = 0;
5500 uint64_t decompressions = 0;
5501
5502 compressions = counter_load(&vm_statistics_compressions);
5503 decompressions = counter_load(&vm_statistics_decompressions);
5504
5505 memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
5506 memio_snap->free_pages = vm_page_free_count;
5507 memio_snap->active_pages = vm_page_active_count;
5508 memio_snap->inactive_pages = vm_page_inactive_count;
5509 memio_snap->purgeable_pages = vm_page_purgeable_count;
5510 memio_snap->wired_pages = vm_page_wire_count;
5511 memio_snap->speculative_pages = vm_page_speculative_count;
5512 memio_snap->throttled_pages = vm_page_throttled_count;
5513 memio_snap->busy_buffer_count = count_busy_buffers();
5514 memio_snap->filebacked_pages = vm_page_pageable_external_count;
5515 memio_snap->compressions = (uint32_t)compressions;
5516 memio_snap->decompressions = (uint32_t)decompressions;
5517 memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
5518 kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
5519
5520 if (!kErr) {
5521 memio_snap->pages_wanted = (uint32_t)pages_wanted;
5522 memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
5523 memio_snap->pages_wanted_reclaimed_valid = 1;
5524 } else {
5525 memio_snap->pages_wanted = 0;
5526 memio_snap->pages_reclaimed = 0;
5527 memio_snap->pages_wanted_reclaimed_valid = 0;
5528 }
5529 }
5530
5531 static vm_offset_t
stackshot_find_phys(vm_map_t map,vm_offset_t target_addr,kdp_fault_flags_t fault_flags,uint32_t * kdp_fault_result_flags)5532 stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags)
5533 {
5534 vm_offset_t result;
5535 struct kdp_fault_result fault_results = {0};
5536 if (stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting) {
5537 fault_flags &= ~KDP_FAULT_FLAGS_ENABLE_FAULTING;
5538 }
5539 if (!stackshot_ctx.sc_panic_stackshot) {
5540 fault_flags |= KDP_FAULT_FLAGS_MULTICPU;
5541 }
5542
5543 result = kdp_find_phys(map, target_addr, fault_flags, &fault_results);
5544
5545 if ((fault_results.flags & KDP_FAULT_RESULT_TRIED_FAULT) || (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN)) {
5546 stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting += fault_results.time_spent_faulting;
5547
5548 #if STACKSHOT_COLLECTS_LATENCY_INFO
5549 stackshot_cpu_latency.faulting_time_mt += fault_results.time_spent_faulting;
5550 #endif
5551
5552 if ((stackshot_cpu_ctx.scc_fault_stats.sfs_time_spent_faulting >= stackshot_max_fault_time) && !stackshot_ctx.sc_panic_stackshot) {
5553 stackshot_cpu_ctx.scc_fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
5554 }
5555 }
5556
5557 if (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN) {
5558 stackshot_cpu_ctx.scc_fault_stats.sfs_pages_faulted_in++;
5559 }
5560
5561 if (kdp_fault_result_flags) {
5562 *kdp_fault_result_flags = fault_results.flags;
5563 }
5564
5565 return result;
5566 }
5567
5568 /*
5569 * Wrappers around kdp_generic_copyin, kdp_generic_copyin_word, kdp_generic_copyin_string that use stackshot_find_phys
5570 * in order to:
5571 * 1. collect statistics on the number of pages faulted in
5572 * 2. stop faulting if the time spent faulting has exceeded the limit.
5573 */
5574 static boolean_t
stackshot_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5575 stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5576 {
5577 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5578 if (try_fault) {
5579 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5580 }
5581 return kdp_generic_copyin(map, uaddr, dest, size, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5582 }
5583 static boolean_t
stackshot_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5584 stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5585 {
5586 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5587 if (try_fault) {
5588 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5589 }
5590 return kdp_generic_copyin_word(task, addr, result, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
5591 }
5592 static int
stackshot_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)5593 stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
5594 {
5595 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
5596 if (try_fault) {
5597 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
5598 }
5599 return kdp_generic_copyin_string(task, addr, buf, buf_sz, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags);
5600 }
5601
5602 kern_return_t
do_stackshot(void * context)5603 do_stackshot(void *context)
5604 {
5605 #pragma unused(context)
5606 kern_return_t error;
5607 size_t queue_size;
5608 uint64_t abs_time = mach_absolute_time(), abs_time_end = 0;
5609 kdp_snapshot++;
5610
5611 _stackshot_validation_reset();
5612 error = stackshot_plh_setup(); /* set up port label hash */
5613
5614 if (!stackshot_ctx.sc_is_singlethreaded) {
5615 /* Set up queues. These numbers shouldn't change, but slightly fudge queue size just in case. */
5616 queue_size = FUDGED_SIZE(tasks_count + terminated_tasks_count, 10);
5617 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5618 stackshot_ctx.sc_workqueues[i] = (struct stackshot_workqueue) {
5619 .sswq_items = stackshot_alloc_arr(struct stackshot_workitem, queue_size, &error),
5620 .sswq_capacity = queue_size,
5621 .sswq_num_items = 0,
5622 .sswq_cur_item = 0,
5623 .sswq_populated = false
5624 };
5625 if (error != KERN_SUCCESS) {
5626 break;
5627 }
5628 }
5629 }
5630
5631 if (error != KERN_SUCCESS) {
5632 stackshot_set_error(error);
5633 return error;
5634 }
5635
5636 /*
5637 * If no main CPU has been selected at this point, (since every CPU has
5638 * called stackshot_cpu_preflight by now), then there was no CLPC
5639 * recommended P-core available. In that case, we should volunteer ourself
5640 * to be the main CPU, because someone has to do it.
5641 */
5642 if (stackshot_ctx.sc_main_cpuid == -1) {
5643 os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, cpu_number(), acquire);
5644 stackshot_cpu_ctx.scc_can_work = true;
5645 }
5646
5647 /* After this, auxiliary CPUs can begin work. */
5648 os_atomic_store(&stackshot_ctx.sc_state, SS_RUNNING, release);
5649
5650 /* If we are the main CPU, populate the queues / do other main CPU work. */
5651 if (stackshot_ctx.sc_panic_stackshot || (stackshot_ctx.sc_main_cpuid == cpu_number())) {
5652 stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5653 } else if (stackshot_cpu_ctx.scc_can_work) {
5654 stackshot_cpu_do_work();
5655 }
5656
5657 /* Wait for every CPU to finish. */
5658 #if STACKSHOT_COLLECTS_LATENCY_INFO
5659 stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time();
5660 #endif
5661 if (stackshot_cpu_ctx.scc_can_work) {
5662 os_atomic_dec(&stackshot_ctx.sc_cpus_working, seq_cst);
5663 stackshot_cpu_ctx.scc_can_work = false;
5664 }
5665 while (os_atomic_load(&stackshot_ctx.sc_cpus_working, seq_cst) != 0) {
5666 loop_wait();
5667 }
5668 stackshot_panic_guard();
5669 #if STACKSHOT_COLLECTS_LATENCY_INFO
5670 stackshot_ctx.sc_latency.cpu_wait_latency_mt = mach_absolute_time() - stackshot_ctx.sc_latency.cpu_wait_latency_mt;
5671 #endif
5672
5673 /* update timestamp of the stackshot */
5674 abs_time_end = mach_absolute_time();
5675 stackshot_ctx.sc_duration = (struct stackshot_duration_v2) {
5676 .stackshot_duration = (abs_time_end - abs_time),
5677 .stackshot_duration_outer = 0,
5678 .stackshot_duration_prior = stackshot_duration_prior_abs,
5679 };
5680
5681 stackshot_plh_reset();
5682
5683 /* Check interrupts disabled time. */
5684 #if SCHED_HYGIENE_DEBUG
5685 bool disable_interrupts_masked_check = kern_feature_override(
5686 KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
5687 (stackshot_flags & STACKSHOT_DO_COMPRESS) != 0;
5688
5689 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
5690 disable_interrupts_masked_check = true;
5691 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
5692
5693 if (disable_interrupts_masked_check) {
5694 ml_spin_debug_clear_self();
5695 }
5696
5697 if (!stackshot_ctx.sc_panic_stackshot && interrupt_masked_debug_mode) {
5698 /*
5699 * Try to catch instances where stackshot takes too long BEFORE returning from
5700 * the debugger
5701 */
5702 ml_handle_stackshot_interrupt_disabled_duration(current_thread());
5703 }
5704 #endif /* SCHED_HYGIENE_DEBUG */
5705
5706 kdp_snapshot--;
5707
5708 /* If any other CPU had an error, make sure we return it */
5709 if (stackshot_ctx.sc_retval == KERN_SUCCESS) {
5710 stackshot_ctx.sc_retval = stackshot_status_check();
5711 }
5712
5713 #if CONFIG_EXCLAVES
5714 /* Avoid setting AST until as late as possible, in case the stackshot fails */
5715 if (!stackshot_ctx.sc_panic_stackshot && stackshot_ctx.sc_retval == KERN_SUCCESS) {
5716 commit_exclaves_ast();
5717 }
5718 if (stackshot_ctx.sc_retval != KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
5719 /* Clear inspection CTID list: no need to wait for these threads */
5720 stackshot_exclave_inspect_ctid_count = 0;
5721 stackshot_exclave_inspect_ctid_capacity = 0;
5722 stackshot_exclave_inspect_ctids = NULL;
5723 }
5724 #endif
5725
5726 /* If this is a singlethreaded stackshot, the "final" kcdata buffer is just our CPU's kcdata buffer */
5727 if (stackshot_ctx.sc_is_singlethreaded) {
5728 stackshot_ctx.sc_finalized_kcdata = stackshot_kcdata_p;
5729 }
5730
5731 return stackshot_ctx.sc_retval;
5732 }
5733
5734 kern_return_t
do_panic_stackshot(void * context)5735 do_panic_stackshot(void *context)
5736 {
5737 kern_return_t ret = do_stackshot(context);
5738 if (ret != KERN_SUCCESS) {
5739 goto out;
5740 }
5741
5742 ret = stackshot_finalize_singlethreaded_kcdata();
5743
5744 out:
5745 return ret;
5746 }
5747
5748 /*
5749 * Set up needed state for this CPU before participating in a stackshot.
5750 * Namely, we want to signal that we're available to do work.
5751 * Called while interrupts are disabled & in the debugger trap.
5752 */
5753 void
stackshot_cpu_preflight(void)5754 stackshot_cpu_preflight(void)
5755 {
5756 bool is_recommended, is_calling_cpu;
5757 int my_cpu_no = cpu_number();
5758
5759 #if STACKSHOT_COLLECTS_LATENCY_INFO
5760 stackshot_cpu_latency = (typeof(stackshot_cpu_latency)) {
5761 .cpu_number = cpu_number(),
5762 #if defined(__AMP__)
5763 .cluster_type = current_cpu_datap()->cpu_cluster_type,
5764 #else /* __AMP__ */
5765 .cluster_type = CLUSTER_TYPE_SMP,
5766 #endif /* __AMP__ */
5767 .faulting_time_mt = 0,
5768 .total_buf = 0,
5769 .intercluster_buf_used = 0
5770 };
5771 #if CONFIG_PERVASIVE_CPI
5772 mt_cur_cpu_cycles_instrs_speculative(&stackshot_cpu_latency.total_cycles, &stackshot_cpu_latency.total_instrs);
5773 #endif /* CONFIG_PERVASIVE_CPI */
5774 stackshot_cpu_latency.init_latency_mt = stackshot_cpu_latency.total_latency_mt = mach_absolute_time();
5775 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5776
5777 is_recommended = current_processor()->is_recommended;
5778
5779 /* If this is a recommended P-core (or SMP), try making it the main CPU */
5780 if (is_recommended
5781 #if defined(__AMP__)
5782 && current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_P
5783 #endif /* __AMP__ */
5784 ) {
5785 os_atomic_cmpxchg(&stackshot_ctx.sc_main_cpuid, -1, my_cpu_no, acquire);
5786 }
5787
5788 is_calling_cpu = stackshot_ctx.sc_calling_cpuid == my_cpu_no;
5789
5790 stackshot_cpu_ctx.scc_did_work = false;
5791 stackshot_cpu_ctx.scc_can_work = is_calling_cpu || (is_recommended && !stackshot_ctx.sc_is_singlethreaded);
5792
5793 if (stackshot_cpu_ctx.scc_can_work) {
5794 os_atomic_inc(&stackshot_ctx.sc_cpus_working, relaxed);
5795 }
5796 }
5797
5798 __result_use_check
5799 static kern_return_t
stackshot_cpu_work_on_queue(struct stackshot_workqueue * queue)5800 stackshot_cpu_work_on_queue(struct stackshot_workqueue *queue)
5801 {
5802 struct stackshot_workitem *cur_workitemp;
5803 kern_return_t error = KERN_SUCCESS;
5804
5805 while (((cur_workitemp = stackshot_get_workitem(queue)) != NULL || !os_atomic_load(&queue->sswq_populated, acquire))) {
5806 /* Check to make sure someone hasn't errored out or panicked. */
5807 if (__improbable(stackshot_status_check() != KERN_SUCCESS)) {
5808 return KERN_ABORTED;
5809 }
5810
5811 if (cur_workitemp) {
5812 kcd_exit_on_error(stackshot_new_linked_kcdata());
5813 cur_workitemp->sswi_data = stackshot_cpu_ctx.scc_kcdata_head;
5814 kcd_exit_on_error(kdp_stackshot_record_task(cur_workitemp->sswi_task));
5815 stackshot_finalize_linked_kcdata();
5816 } else {
5817 #if STACKSHOT_COLLECTS_LATENCY_INFO
5818 uint64_t time_begin = mach_absolute_time();
5819 #endif
5820 loop_wait();
5821 #if STACKSHOT_COLLECTS_LATENCY_INFO
5822 stackshot_cpu_latency.workqueue_latency_mt += mach_absolute_time() - time_begin;
5823 #endif
5824 }
5825 }
5826
5827 error_exit:
5828 return error;
5829 }
5830
5831 static void
stackshot_cpu_do_work(void)5832 stackshot_cpu_do_work(void)
5833 {
5834 kern_return_t error;
5835
5836 stackshot_cpu_ctx.scc_stack_buffer = stackshot_alloc_arr(uintptr_t, MAX_FRAMES, &error);
5837 if (error != KERN_SUCCESS) {
5838 goto error_exit;
5839 }
5840
5841 #if STACKSHOT_COLLECTS_LATENCY_INFO
5842 stackshot_cpu_latency.init_latency_mt = mach_absolute_time() - stackshot_cpu_latency.init_latency_mt;
5843 #endif
5844
5845 bool high_perf = true;
5846
5847 #if defined(__AMP__)
5848 if (current_cpu_datap()->cpu_cluster_type == CLUSTER_TYPE_E) {
5849 high_perf = false;
5850 }
5851 #endif /* __AMP__ */
5852
5853 if (high_perf) {
5854 /* Non-E cores: Work from most difficult to least difficult */
5855 for (size_t i = STACKSHOT_NUM_WORKQUEUES; i > 0; i--) {
5856 kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i - 1]));
5857 }
5858 } else {
5859 /* E: Work from least difficult to most difficult */
5860 for (size_t i = 0; i < STACKSHOT_NUM_WORKQUEUES; i++) {
5861 kcd_exit_on_error(stackshot_cpu_work_on_queue(&stackshot_ctx.sc_workqueues[i]));
5862 }
5863 }
5864 #if STACKSHOT_COLLECTS_LATENCY_INFO
5865 stackshot_cpu_latency.total_latency_mt = mach_absolute_time() - stackshot_cpu_latency.total_latency_mt;
5866 #if CONFIG_PERVASIVE_CPI
5867 uint64_t cycles, instrs;
5868 mt_cur_cpu_cycles_instrs_speculative(&cycles, &instrs);
5869 stackshot_cpu_latency.total_cycles = cycles - stackshot_cpu_latency.total_cycles;
5870 stackshot_cpu_latency.total_instrs = instrs - stackshot_cpu_latency.total_instrs;
5871 #endif /* CONFIG_PERVASIVE_CPI */
5872 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
5873
5874 error_exit:
5875 if (error != KERN_SUCCESS) {
5876 stackshot_set_error(error);
5877 }
5878 stackshot_panic_guard();
5879 }
5880
5881 /*
5882 * This is where the other CPUs will end up when we take a stackshot.
5883 * If they're available to do work, they'll do so here.
5884 * Called with interrupts disabled & from the debugger trap.
5885 */
5886 void
stackshot_aux_cpu_entry(void)5887 stackshot_aux_cpu_entry(void)
5888 {
5889 /*
5890 * This is where the other CPUs will end up when we take a stackshot.
5891 * Also, the main CPU will call this in the middle of its work to chip
5892 * away at the queue.
5893 */
5894
5895 /* Don't do work if we said we couldn't... */
5896 if (!stackshot_cpu_ctx.scc_can_work) {
5897 return;
5898 }
5899
5900 /* Spin until we're ready to run. */
5901 while (os_atomic_load(&stackshot_ctx.sc_state, acquire) == SS_SETUP) {
5902 loop_wait();
5903 }
5904
5905 /* Check to make sure the setup didn't error out or panic. */
5906 if (stackshot_status_check() != KERN_SUCCESS) {
5907 goto exit;
5908 }
5909
5910 /* the CPU entering here is participating in the stackshot */
5911 stackshot_cpu_ctx.scc_did_work = true;
5912
5913 if (stackshot_ctx.sc_main_cpuid == cpu_number()) {
5914 stackshot_ctx.sc_retval = kdp_stackshot_kcdata_format();
5915 } else {
5916 stackshot_cpu_do_work();
5917 }
5918
5919 exit:
5920 os_atomic_dec(&stackshot_ctx.sc_cpus_working, release);
5921 }
5922
5923 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)5924 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
5925 {
5926 /* When the pthread kext puts a worker thread to sleep, it will
5927 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
5928 * struct. See parkit() in kern/kern_support.c in libpthread.
5929 */
5930 return (thread->state & TH_WAIT) &&
5931 (thread->block_hint == kThreadWaitParkedWorkQueue);
5932 }
5933
5934 #if CONFIG_COALITIONS
5935 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)5936 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
5937 {
5938 #pragma unused(i, coal)
5939 unsigned int *coalition_count = (unsigned int*)arg;
5940 (*coalition_count)++;
5941 }
5942
5943 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)5944 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
5945 {
5946 if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
5947 return;
5948 }
5949
5950 struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
5951 struct jetsam_coalition_snapshot *jcs = &coalitions[i];
5952 task_t leader = TASK_NULL;
5953 jcs->jcs_id = coalition_id(coal);
5954 jcs->jcs_flags = 0;
5955 jcs->jcs_thread_group = 0;
5956
5957 if (coalition_term_requested(coal)) {
5958 jcs->jcs_flags |= kCoalitionTermRequested;
5959 }
5960 if (coalition_is_terminated(coal)) {
5961 jcs->jcs_flags |= kCoalitionTerminated;
5962 }
5963 if (coalition_is_reaped(coal)) {
5964 jcs->jcs_flags |= kCoalitionReaped;
5965 }
5966 if (coalition_is_privileged(coal)) {
5967 jcs->jcs_flags |= kCoalitionPrivileged;
5968 }
5969
5970 #if CONFIG_THREAD_GROUPS
5971 struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
5972 if (thread_group) {
5973 jcs->jcs_thread_group = thread_group_get_id(thread_group);
5974 }
5975 #endif /* CONFIG_THREAD_GROUPS */
5976
5977 leader = kdp_coalition_get_leader(coal);
5978 if (leader) {
5979 jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
5980 } else {
5981 jcs->jcs_leader_task_uniqueid = 0;
5982 }
5983 }
5984 #endif /* CONFIG_COALITIONS */
5985
5986 #if CONFIG_THREAD_GROUPS
5987 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)5988 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
5989 {
5990 #pragma unused(i, tg)
5991 unsigned int *n = (unsigned int*)arg;
5992 (*n)++;
5993 }
5994
5995 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)5996 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
5997 {
5998 struct thread_group_snapshot_v3 *thread_groups = arg;
5999 struct thread_group_snapshot_v3 *tgs = &thread_groups[i];
6000 const char *name = thread_group_get_name(tg);
6001 uint32_t flags = thread_group_get_flags(tg);
6002 tgs->tgs_id = thread_group_get_id(tg);
6003 static_assert(THREAD_GROUP_MAXNAME > sizeof(tgs->tgs_name));
6004 kdp_memcpy(tgs->tgs_name, name, sizeof(tgs->tgs_name));
6005 kdp_memcpy(tgs->tgs_name_cont, name + sizeof(tgs->tgs_name),
6006 sizeof(tgs->tgs_name_cont));
6007 tgs->tgs_flags =
6008 ((flags & THREAD_GROUP_FLAGS_EFFICIENT) ? kThreadGroupEfficient : 0) |
6009 ((flags & THREAD_GROUP_FLAGS_APPLICATION) ? kThreadGroupApplication : 0) |
6010 ((flags & THREAD_GROUP_FLAGS_CRITICAL) ? kThreadGroupCritical : 0) |
6011 ((flags & THREAD_GROUP_FLAGS_BEST_EFFORT) ? kThreadGroupBestEffort : 0) |
6012 ((flags & THREAD_GROUP_FLAGS_UI_APP) ? kThreadGroupUIApplication : 0) |
6013 ((flags & THREAD_GROUP_FLAGS_MANAGED) ? kThreadGroupManaged : 0) |
6014 ((flags & THREAD_GROUP_FLAGS_STRICT_TIMERS) ? kThreadGroupStrictTimers : 0) |
6015 0;
6016 }
6017 #endif /* CONFIG_THREAD_GROUPS */
6018
6019 /* Determine if a thread has waitinfo that stackshot can provide */
6020 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)6021 stackshot_thread_has_valid_waitinfo(thread_t thread)
6022 {
6023 if (!(thread->state & TH_WAIT)) {
6024 return 0;
6025 }
6026
6027 switch (thread->block_hint) {
6028 // If set to None or is a parked work queue, ignore it
6029 case kThreadWaitParkedWorkQueue:
6030 case kThreadWaitNone:
6031 return 0;
6032 // There is a short window where the pthread kext removes a thread
6033 // from its ksyn wait queue before waking the thread up
6034 case kThreadWaitPThreadMutex:
6035 case kThreadWaitPThreadRWLockRead:
6036 case kThreadWaitPThreadRWLockWrite:
6037 case kThreadWaitPThreadCondVar:
6038 return kdp_pthread_get_thread_kwq(thread) != NULL;
6039 // All other cases are valid block hints if in a wait state
6040 default:
6041 return 1;
6042 }
6043 }
6044
6045 /* Determine if a thread has turnstileinfo that stackshot can provide */
6046 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)6047 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
6048 {
6049 struct turnstile *ts = thread_get_waiting_turnstile(thread);
6050
6051 return stackshot_thread_has_valid_waitinfo(thread) &&
6052 ts != TURNSTILE_NULL;
6053 }
6054
6055 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_v2_t * tsinfo)6056 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo)
6057 {
6058 struct turnstile *ts;
6059 struct ipc_service_port_label *ispl = NULL;
6060
6061 /* acquire turnstile information and store it in the stackshot */
6062 ts = thread_get_waiting_turnstile(thread);
6063 tsinfo->waiter = thread_tid(thread);
6064 kdp_turnstile_fill_tsinfo(ts, tsinfo, &ispl);
6065 tsinfo->portlabel_id = stackshot_plh_lookup(ispl,
6066 (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_SENDPORT) ? STACKSHOT_PLH_LOOKUP_SEND :
6067 (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_RECEIVEPORT) ? STACKSHOT_PLH_LOOKUP_RECEIVE :
6068 STACKSHOT_PLH_LOOKUP_UNKNOWN);
6069 }
6070
6071 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_v2_t * waitinfo)6072 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t *waitinfo)
6073 {
6074 thread_waitinfo_t *waitinfo_v1 = (thread_waitinfo_t *)waitinfo;
6075 struct ipc_service_port_label *ispl = NULL;
6076
6077 waitinfo->waiter = thread_tid(thread);
6078 waitinfo->wait_type = thread->block_hint;
6079 waitinfo->wait_flags = 0;
6080
6081 switch (waitinfo->wait_type) {
6082 case kThreadWaitKernelMutex:
6083 kdp_lck_mtx_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6084 break;
6085 case kThreadWaitPortReceive:
6086 kdp_mqueue_recv_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6087 waitinfo->portlabel_id = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_RECEIVE);
6088 break;
6089 case kThreadWaitPortSend:
6090 kdp_mqueue_send_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
6091 waitinfo->portlabel_id = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_SEND);
6092 break;
6093 case kThreadWaitSemaphore:
6094 kdp_sema_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6095 break;
6096 case kThreadWaitUserLock:
6097 kdp_ulock_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6098 break;
6099 case kThreadWaitKernelRWLockRead:
6100 case kThreadWaitKernelRWLockWrite:
6101 case kThreadWaitKernelRWLockUpgrade:
6102 kdp_rwlck_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6103 break;
6104 case kThreadWaitPThreadMutex:
6105 case kThreadWaitPThreadRWLockRead:
6106 case kThreadWaitPThreadRWLockWrite:
6107 case kThreadWaitPThreadCondVar:
6108 kdp_pthread_find_owner(thread, waitinfo_v1);
6109 break;
6110 case kThreadWaitWorkloopSyncWait:
6111 kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo_v1);
6112 break;
6113 case kThreadWaitOnProcess:
6114 kdp_wait4_find_process(thread, thread->wait_event, waitinfo_v1);
6115 break;
6116 case kThreadWaitSleepWithInheritor:
6117 kdp_sleep_with_inheritor_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6118 break;
6119 case kThreadWaitEventlink:
6120 kdp_eventlink_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6121 break;
6122 case kThreadWaitCompressor:
6123 kdp_compressor_busy_find_owner(thread->wait_event, waitinfo_v1);
6124 break;
6125 #ifdef CONFIG_EXCLAVES
6126 case kThreadWaitExclaveCore:
6127 case kThreadWaitExclaveKit:
6128 kdp_esync_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
6129 break;
6130 #endif /* CONFIG_EXCLAVES */
6131 case kThreadWaitPageBusy:
6132 kdp_vm_page_sleep_find_owner(thread->wait_event, waitinfo_v1);
6133 break;
6134 case kThreadWaitPagingInProgress:
6135 case kThreadWaitPagingActivity:
6136 case kThreadWaitPagerInit:
6137 case kThreadWaitPagerReady:
6138 case kThreadWaitMemoryBlocked:
6139 case kThreadWaitPageInThrottle:
6140 kdp_vm_object_sleep_find_owner(thread->wait_event, waitinfo->wait_type, waitinfo_v1);
6141 break;
6142 default:
6143 waitinfo->owner = 0;
6144 waitinfo->context = 0;
6145 break;
6146 }
6147 }
6148