1 /*
2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/vm_param.h>
31 #include <mach/mach_vm.h>
32 #include <mach/clock_types.h>
33 #include <sys/code_signing.h>
34 #include <sys/errno.h>
35 #include <sys/stackshot.h>
36 #ifdef IMPORTANCE_INHERITANCE
37 #include <ipc/ipc_importance.h>
38 #endif
39 #include <sys/appleapiopts.h>
40 #include <kern/debug.h>
41 #include <kern/block_hint.h>
42 #include <uuid/uuid.h>
43
44 #include <kdp/kdp_dyld.h>
45 #include <kdp/kdp_en_debugger.h>
46 #include <kdp/processor_core.h>
47 #include <kdp/kdp_common.h>
48
49 #include <libsa/types.h>
50 #include <libkern/version.h>
51 #include <libkern/section_keywords.h>
52
53 #include <string.h> /* bcopy */
54
55 #include <kern/backtrace.h>
56 #include <kern/coalition.h>
57 #include <kern/exclaves_stackshot.h>
58 #include <kern/exclaves_inspection.h>
59 #include <kern/processor.h>
60 #include <kern/host_statistics.h>
61 #include <kern/counter.h>
62 #include <kern/thread.h>
63 #include <kern/thread_group.h>
64 #include <kern/task.h>
65 #include <kern/telemetry.h>
66 #include <kern/clock.h>
67 #include <kern/policy_internal.h>
68 #include <kern/socd_client.h>
69 #include <vm/vm_map.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_pageout.h>
72 #include <vm/vm_fault.h>
73 #include <vm/vm_shared_region.h>
74 #include <vm/vm_compressor.h>
75 #include <libkern/OSKextLibPrivate.h>
76 #include <os/log.h>
77
78 #ifdef CONFIG_EXCLAVES
79 #include <kern/exclaves.tightbeam.h>
80 #endif /* CONFIG_EXCLAVES */
81
82 #include <kern/exclaves_test_stackshot.h>
83
84 #if defined(__x86_64__)
85 #include <i386/mp.h>
86 #include <i386/cpu_threads.h>
87 #endif
88
89 #include <pexpert/pexpert.h>
90
91 #if CONFIG_PERVASIVE_CPI
92 #include <kern/monotonic.h>
93 #endif /* CONFIG_PERVASIVE_CPI */
94
95 #include <san/kasan.h>
96
97 #if DEBUG || DEVELOPMENT
98 # define STACKSHOT_COLLECTS_LATENCY_INFO 1
99 #else
100 # define STACKSHOT_COLLECTS_LATENCY_INFO 0
101 #endif /* DEBUG || DEVELOPMENT */
102
103 extern unsigned int not_in_kdp;
104
105 /* indicate to the compiler that some accesses are unaligned */
106 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
107
108 int kdp_snapshot = 0;
109 static kern_return_t stack_snapshot_ret = 0;
110 static uint32_t stack_snapshot_bytes_traced = 0;
111 static uint32_t stack_snapshot_bytes_uncompressed = 0;
112
113 #if STACKSHOT_COLLECTS_LATENCY_INFO
114 static bool collect_latency_info = true;
115 #endif
116 static kcdata_descriptor_t stackshot_kcdata_p = NULL;
117 static void *stack_snapshot_buf;
118 static uint32_t stack_snapshot_bufsize;
119 int stack_snapshot_pid;
120 static uint64_t stack_snapshot_flags;
121 static uint64_t stackshot_out_flags;
122 static uint64_t stack_snapshot_delta_since_timestamp;
123 static uint32_t stack_snapshot_pagetable_mask;
124 static boolean_t panic_stackshot;
125
126 static boolean_t stack_enable_faulting = FALSE;
127 static struct stackshot_fault_stats fault_stats;
128
129 static uint64_t stackshot_last_abs_start; /* start time of last stackshot */
130 static uint64_t stackshot_last_abs_end; /* end time of last stackshot */
131 static uint64_t stackshots_taken; /* total stackshots taken since boot */
132 static uint64_t stackshots_duration; /* total abs time spent in stackshot_trap() since boot */
133
134 /*
135 * Experimentally, our current estimates are 40% short 77% of the time; adding
136 * 75% to the estimate gets us into 99%+ territory. In the longer run, we need
137 * to make stackshot estimates use a better approach (rdar://78880038); this is
138 * intended to be a short-term fix.
139 */
140 uint32_t stackshot_estimate_adj = 75; /* experiment factor: 0-100, adjust our estimate up by this amount */
141
142 static uint32_t stackshot_initial_estimate;
143 static uint32_t stackshot_initial_estimate_adj;
144 static uint64_t stackshot_duration_prior_abs; /* prior attempts, abs */
145 static unaligned_u64 * stackshot_duration_outer;
146 static uint64_t stackshot_microsecs;
147
148 void * kernel_stackshot_buf = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
149 int kernel_stackshot_buf_size = 0;
150
151 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
152
153 #if CONFIG_EXCLAVES
154 static ctid_t *stackshot_exclave_inspect_ctids = NULL;
155 static size_t stackshot_exclave_inspect_ctid_count = 0;
156 static size_t stackshot_exclave_inspect_ctid_capacity = 0;
157
158 static kern_return_t stackshot_exclave_kr = KERN_SUCCESS;
159 #endif /* CONFIG_EXCLAVES */
160
161 __private_extern__ void stackshot_init( void );
162 static boolean_t memory_iszero(void *addr, size_t size);
163 uint32_t get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj);
164 kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config,
165 size_t stackshot_config_size, boolean_t stackshot_from_user);
166 kern_return_t do_stackshot(void *);
167 void kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags, kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask);
168 boolean_t stackshot_thread_is_idle_worker_unsafe(thread_t thread);
169 static int kdp_stackshot_kcdata_format(int pid, uint64_t * trace_flags);
170 uint32_t kdp_stack_snapshot_bytes_traced(void);
171 uint32_t kdp_stack_snapshot_bytes_uncompressed(void);
172 static void kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
173 static vm_offset_t stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags);
174 static boolean_t stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
175 static int stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
176 static boolean_t stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
177 static uint64_t proc_was_throttled_from_task(task_t task);
178 static void stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t * waitinfo);
179 static int stackshot_thread_has_valid_waitinfo(thread_t thread);
180 static void stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo);
181 static int stackshot_thread_has_valid_turnstileinfo(thread_t thread);
182
183 #if CONFIG_COALITIONS
184 static void stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
185 static void stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
186 #endif /* CONFIG_COALITIONS */
187
188 #if CONFIG_THREAD_GROUPS
189 static void stackshot_thread_group_count(void *arg, int i, struct thread_group *tg);
190 static void stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg);
191 #endif /* CONFIG_THREAD_GROUPS */
192
193 extern uint32_t workqueue_get_pwq_state_kdp(void *proc);
194
195 struct proc;
196 extern int proc_pid(struct proc *p);
197 extern uint64_t proc_uniqueid(void *p);
198 extern uint64_t proc_was_throttled(void *p);
199 extern uint64_t proc_did_throttle(void *p);
200 extern int proc_exiting(void *p);
201 extern int proc_in_teardown(void *p);
202 static uint64_t proc_did_throttle_from_task(task_t task);
203 extern void proc_name_kdp(struct proc *p, char * buf, int size);
204 extern int proc_threadname_kdp(void * uth, char * buf, size_t size);
205 extern void proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
206 extern void proc_archinfo_kdp(void* p, cpu_type_t* cputype, cpu_subtype_t* cpusubtype);
207 extern uint64_t proc_getcsflags_kdp(void * p);
208 extern boolean_t proc_binary_uuid_kdp(task_t task, uuid_t uuid);
209 extern int memorystatus_get_pressure_status_kdp(void);
210 extern void memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
211
212 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
213
214 #if CONFIG_TELEMETRY
215 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
216 #endif /* CONFIG_TELEMETRY */
217
218 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
219 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
220
221 static size_t stackshot_plh_est_size(void);
222
223 #if CONFIG_EXCLAVES
224 static kern_return_t collect_exclave_threads(void);
225 #endif
226
227 /*
228 * Validates that the given address for a word is both a valid page and has
229 * default caching attributes for the current map.
230 */
231 bool machine_trace_thread_validate_kva(vm_offset_t);
232 /*
233 * Validates a region that stackshot will potentially inspect.
234 */
235 static bool _stackshot_validate_kva(vm_offset_t, size_t);
236 /*
237 * Must be called whenever stackshot is re-driven.
238 */
239 static void _stackshot_validation_reset(void);
240 /*
241 * A kdp-safe strlen() call. Returns:
242 * -1 if we reach maxlen or a bad address before the end of the string, or
243 * strlen(s)
244 */
245 static long _stackshot_strlen(const char *s, size_t maxlen);
246
247 #define MAX_FRAMES 1000
248 #define MAX_LOADINFOS 500
249 #define MAX_DYLD_COMPACTINFO (20 * 1024) // max bytes of compactinfo to include per proc/shared region
250 #define TASK_IMP_WALK_LIMIT 20
251
252 typedef struct thread_snapshot *thread_snapshot_t;
253 typedef struct task_snapshot *task_snapshot_t;
254
255 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
256 extern kdp_send_t kdp_en_send_pkt;
257 #endif
258
259 /*
260 * Stackshot locking and other defines.
261 */
262 static LCK_GRP_DECLARE(stackshot_subsys_lck_grp, "stackshot_subsys_lock");
263 static LCK_MTX_DECLARE(stackshot_subsys_mutex, &stackshot_subsys_lck_grp);
264
265 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
266 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
267 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
268 #define STACKSHOT_SUBSYS_ASSERT_LOCKED() lck_mtx_assert(&stackshot_subsys_mutex, LCK_MTX_ASSERT_OWNED);
269
270 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
271 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
272
273 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
274 #define GIGABYTES (1024ULL * 1024ULL * 1024ULL)
275
276 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
277
278 /*
279 * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
280 * for non-panic stackshots where faulting is requested.
281 */
282 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
283
284 #define STACKSHOT_SUPP_SIZE (16 * 1024) /* Minimum stackshot size */
285 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
286
287 #ifndef ROUNDUP
288 #define ROUNDUP(x, y) ((((x)+(y)-1)/(y))*(y))
289 #endif
290
291 #define STACKSHOT_QUEUE_LABEL_MAXSIZE 64
292
293 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
294 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
295 /*
296 * Use of the kcd_exit_on_error(action) macro requires a local
297 * 'kern_return_t error' variable and 'error_exit' label.
298 */
299 #define kcd_exit_on_error(action) \
300 do { \
301 if (KERN_SUCCESS != (error = (action))) { \
302 if (error == KERN_RESOURCE_SHORTAGE) { \
303 error = KERN_INSUFFICIENT_BUFFER_SIZE; \
304 } \
305 goto error_exit; \
306 } \
307 } while (0); /* end kcd_exit_on_error */
308
309 /*
310 * Initialize the mutex governing access to the stack snapshot subsystem
311 * and other stackshot related bits.
312 */
313 __private_extern__ void
stackshot_init(void)314 stackshot_init(void)
315 {
316 mach_timebase_info_data_t timebase;
317
318 clock_timebase_info(&timebase);
319 fault_stats.sfs_system_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
320
321 max_tracebuf_size = MAX(max_tracebuf_size, ((ROUNDUP(max_mem, GIGABYTES) / GIGABYTES) * TRACEBUF_SIZE_PER_GB));
322
323 PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
324 }
325
326 /*
327 * Called with interrupts disabled after stackshot context has been
328 * initialized. Updates stack_snapshot_ret.
329 */
330 static kern_return_t
stackshot_trap(void)331 stackshot_trap(void)
332 {
333 kern_return_t rv;
334
335 #if defined(__x86_64__)
336 /*
337 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
338 * operation, it's essential to apply mutual exclusion to the other when one
339 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
340 * capture CPUs.
341 *
342 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
343 * allowed, so we check to ensure there there is no rendezvous in progress before
344 * trying to grab the lock (if there is, a deadlock will occur when we try to
345 * grab the lock). This is accomplished by setting cpu_rendezvous_in_progress to
346 * TRUE in the mp rendezvous action function. If stackshot_trap() is called by
347 * a subordinate of the call chain within the mp rendezvous action, this flag will
348 * be set and can be used to detect the inevitable deadlock that would occur
349 * if this thread tried to grab the rendezvous lock.
350 */
351
352 if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
353 panic("Calling stackshot from a rendezvous is not allowed!");
354 }
355
356 mp_rendezvous_lock();
357 #endif
358
359 stackshot_last_abs_start = mach_absolute_time();
360 stackshot_last_abs_end = 0;
361
362 rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0);
363
364 stackshot_last_abs_end = mach_absolute_time();
365 stackshots_taken++;
366 stackshots_duration += (stackshot_last_abs_end - stackshot_last_abs_start);
367
368 #if defined(__x86_64__)
369 mp_rendezvous_unlock();
370 #endif
371 return rv;
372 }
373
374 extern void stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration);
375 void
stackshot_get_timing(uint64_t * last_abs_start,uint64_t * last_abs_end,uint64_t * count,uint64_t * total_duration)376 stackshot_get_timing(uint64_t *last_abs_start, uint64_t *last_abs_end, uint64_t *count, uint64_t *total_duration)
377 {
378 STACKSHOT_SUBSYS_LOCK();
379 *last_abs_start = stackshot_last_abs_start;
380 *last_abs_end = stackshot_last_abs_end;
381 *count = stackshots_taken;
382 *total_duration = stackshots_duration;
383 STACKSHOT_SUBSYS_UNLOCK();
384 }
385
386 static kern_return_t
finalize_kcdata(kcdata_descriptor_t kcdata)387 finalize_kcdata(kcdata_descriptor_t kcdata)
388 {
389 kern_return_t error = KERN_SUCCESS;
390
391 kcd_finalize_compression(kcdata);
392 kcd_exit_on_error(kcdata_add_uint64_with_description(kcdata, stackshot_out_flags, "stackshot_out_flags"));
393 kcd_exit_on_error(kcdata_write_buffer_end(kcdata));
394 stack_snapshot_bytes_traced = (uint32_t) kcdata_memory_get_used_bytes(kcdata);
395 stack_snapshot_bytes_uncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(kcdata);
396 kcdata_finish(kcdata);
397 error_exit:
398 return error;
399 }
400
401 kern_return_t
stack_snapshot_from_kernel(int pid,void * buf,uint32_t size,uint64_t flags,uint64_t delta_since_timestamp,uint32_t pagetable_mask,unsigned * bytes_traced)402 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint64_t flags, uint64_t delta_since_timestamp, uint32_t pagetable_mask, unsigned *bytes_traced)
403 {
404 kern_return_t error = KERN_SUCCESS;
405 boolean_t istate;
406
407 #if DEVELOPMENT || DEBUG
408 if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
409 return KERN_NOT_SUPPORTED;
410 }
411 #endif
412 if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
413 return KERN_INVALID_ARGUMENT;
414 }
415
416 /* cap in individual stackshot to max_tracebuf_size */
417 if (size > max_tracebuf_size) {
418 size = max_tracebuf_size;
419 }
420
421 /* Serialize tracing */
422 if (flags & STACKSHOT_TRYLOCK) {
423 if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
424 return KERN_LOCK_OWNED;
425 }
426 } else {
427 STACKSHOT_SUBSYS_LOCK();
428 }
429
430 #if CONFIG_EXCLAVES
431 assert(!stackshot_exclave_inspect_ctids);
432 #endif
433
434 struct kcdata_descriptor kcdata;
435 uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ?
436 KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT : KCDATA_BUFFER_BEGIN_STACKSHOT;
437
438 error = kcdata_memory_static_init(&kcdata, (mach_vm_address_t)buf, hdr_tag, size,
439 KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
440 if (error) {
441 goto out;
442 }
443
444 stackshot_initial_estimate = 0;
445 stackshot_duration_prior_abs = 0;
446 stackshot_duration_outer = NULL;
447
448 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_START,
449 flags, size, pid, delta_since_timestamp);
450
451 istate = ml_set_interrupts_enabled(FALSE);
452 uint64_t time_start = mach_absolute_time();
453
454 /* Emit a SOCD tracepoint that we are initiating a stackshot */
455 SOCD_TRACE_XNU_START(STACKSHOT);
456
457 /* Preload trace parameters*/
458 kdp_snapshot_preflight(pid, buf, size, flags, &kcdata,
459 delta_since_timestamp, pagetable_mask);
460
461 /*
462 * Trap to the debugger to obtain a coherent stack snapshot; this populates
463 * the trace buffer
464 */
465 error = stackshot_trap();
466
467 uint64_t time_end = mach_absolute_time();
468
469 /* Emit a SOCD tracepoint that we have completed the stackshot */
470 SOCD_TRACE_XNU_END(STACKSHOT);
471
472 ml_set_interrupts_enabled(istate);
473
474 #if CONFIG_EXCLAVES
475 /* stackshot trap should only finish successfully or with no pending Exclave threads */
476 assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
477 if (stackshot_exclave_inspect_ctids && stackshot_exclave_inspect_ctid_count > 0) {
478 error = collect_exclave_threads();
479 }
480 #endif /* CONFIG_EXCLAVES */
481 if (error == KERN_SUCCESS) {
482 error = finalize_kcdata(stackshot_kcdata_p);
483 }
484
485 if (stackshot_duration_outer) {
486 *stackshot_duration_outer = time_end - time_start;
487 }
488 *bytes_traced = kdp_stack_snapshot_bytes_traced();
489
490 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_KERN_RECORD) | DBG_FUNC_END,
491 error, (time_end - time_start), size, *bytes_traced);
492 out:
493 #if CONFIG_EXCLAVES
494 stackshot_exclave_inspect_ctids = NULL;
495 stackshot_exclave_inspect_ctid_capacity = 0;
496 stackshot_exclave_inspect_ctid_count = 0;
497 #endif
498
499 stackshot_kcdata_p = NULL;
500 STACKSHOT_SUBSYS_UNLOCK();
501 return error;
502 }
503
504 #if CONFIG_TELEMETRY
505 kern_return_t
stack_microstackshot(user_addr_t tracebuf,uint32_t tracebuf_size,uint32_t flags,int32_t * retval)506 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
507 {
508 int error = KERN_SUCCESS;
509 uint32_t bytes_traced = 0;
510
511 *retval = -1;
512
513 /*
514 * Control related operations
515 */
516 if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
517 telemetry_global_ctl(1);
518 *retval = 0;
519 goto exit;
520 } else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
521 telemetry_global_ctl(0);
522 *retval = 0;
523 goto exit;
524 }
525
526 /*
527 * Data related operations
528 */
529 *retval = -1;
530
531 if ((((void*)tracebuf) == NULL) || (tracebuf_size == 0)) {
532 error = KERN_INVALID_ARGUMENT;
533 goto exit;
534 }
535
536 STACKSHOT_SUBSYS_LOCK();
537
538 if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
539 if (tracebuf_size > max_tracebuf_size) {
540 error = KERN_INVALID_ARGUMENT;
541 goto unlock_exit;
542 }
543
544 bytes_traced = tracebuf_size;
545 error = telemetry_gather(tracebuf, &bytes_traced,
546 (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? true : false);
547 *retval = (int)bytes_traced;
548 goto unlock_exit;
549 }
550
551 unlock_exit:
552 STACKSHOT_SUBSYS_UNLOCK();
553 exit:
554 return error;
555 }
556 #endif /* CONFIG_TELEMETRY */
557
558 /*
559 * Return the estimated size of a stackshot based on the
560 * number of currently running threads and tasks.
561 *
562 * adj is an adjustment in units of percentage
563 *
564 * This function is mostly unhinged from reality; struct thread_snapshot and
565 * struct task_stackshot are legacy, much larger versions of the structures we
566 * actually use, and there's no accounting for how we actually generate
567 * task & thread information. rdar://78880038 intends to replace this all.
568 */
569 uint32_t
get_stackshot_estsize(uint32_t prev_size_hint,uint32_t adj)570 get_stackshot_estsize(uint32_t prev_size_hint, uint32_t adj)
571 {
572 vm_size_t thread_total;
573 vm_size_t task_total;
574 uint64_t size;
575 uint32_t estimated_size;
576 size_t est_thread_size = sizeof(struct thread_snapshot);
577 size_t est_task_size = sizeof(struct task_snapshot) + TASK_UUID_AVG_SIZE;
578
579 adj = MIN(adj, 100u); /* no more than double our estimate */
580
581 #if STACKSHOT_COLLECTS_LATENCY_INFO
582 if (collect_latency_info) {
583 est_thread_size += sizeof(struct stackshot_latency_thread);
584 est_task_size += sizeof(struct stackshot_latency_task);
585 }
586 #endif
587
588 thread_total = (threads_count * est_thread_size);
589 task_total = (tasks_count * est_task_size);
590
591 size = thread_total + task_total + STACKSHOT_SUPP_SIZE; /* estimate */
592 size += (size * adj) / 100; /* add adj */
593 size = MAX(size, prev_size_hint); /* allow hint to increase */
594 size += stackshot_plh_est_size(); /* add space for the port label hash */
595 size = MIN(size, VM_MAP_TRUNC_PAGE(UINT32_MAX, PAGE_MASK)); /* avoid overflow */
596 estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(size, PAGE_MASK); /* round to pagesize */
597
598 return estimated_size;
599 }
600
601 /*
602 * stackshot_remap_buffer: Utility function to remap bytes_traced bytes starting at stackshotbuf
603 * into the current task's user space and subsequently copy out the address
604 * at which the buffer has been mapped in user space to out_buffer_addr.
605 *
606 * Inputs: stackshotbuf - pointer to the original buffer in the kernel's address space
607 * bytes_traced - length of the buffer to remap starting from stackshotbuf
608 * out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
609 * out_size_addr - pointer to be filled in with the size of the buffer
610 *
611 * Outputs: ENOSPC if there is not enough free space in the task's address space to remap the buffer
612 * EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
613 * an error from copyout
614 */
615 static kern_return_t
stackshot_remap_buffer(void * stackshotbuf,uint32_t bytes_traced,uint64_t out_buffer_addr,uint64_t out_size_addr)616 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
617 {
618 int error = 0;
619 mach_vm_offset_t stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
620 vm_prot_t cur_prot, max_prot;
621
622 error = mach_vm_remap_kernel(get_task_map(current_task()), &stackshotbuf_user_addr, bytes_traced, 0,
623 VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE, &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
624 /*
625 * If the call to mach_vm_remap fails, we return the appropriate converted error
626 */
627 if (error == KERN_SUCCESS) {
628 /*
629 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
630 * we just made in the task's user space.
631 */
632 error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
633 if (error != KERN_SUCCESS) {
634 mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
635 return error;
636 }
637 error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
638 if (error != KERN_SUCCESS) {
639 mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
640 return error;
641 }
642 }
643 return error;
644 }
645
646 #if CONFIG_EXCLAVES
647
648 static kern_return_t
stackshot_setup_exclave_waitlist(kcdata_descriptor_t kcdata)649 stackshot_setup_exclave_waitlist(kcdata_descriptor_t kcdata)
650 {
651 kern_return_t error = KERN_SUCCESS;
652 size_t exclave_threads_max = exclaves_ipc_buffer_count();
653 size_t waitlist_size = 0;
654
655 assert(!stackshot_exclave_inspect_ctids);
656
657 if (exclaves_inspection_is_initialized() && exclave_threads_max) {
658 if (os_mul_overflow(exclave_threads_max, sizeof(ctid_t), &waitlist_size)) {
659 error = KERN_INVALID_ARGUMENT;
660 goto error;
661 }
662 stackshot_exclave_inspect_ctids = kcdata_endalloc(kcdata, waitlist_size);
663 if (!stackshot_exclave_inspect_ctids) {
664 error = KERN_RESOURCE_SHORTAGE;
665 goto error;
666 }
667 stackshot_exclave_inspect_ctid_count = 0;
668 stackshot_exclave_inspect_ctid_capacity = exclave_threads_max;
669 }
670
671 error:
672 return error;
673 }
674
675 static kern_return_t
collect_exclave_threads(void)676 collect_exclave_threads(void)
677 {
678 size_t i;
679 ctid_t ctid;
680 thread_t thread;
681 kern_return_t kr;
682 STACKSHOT_SUBSYS_ASSERT_LOCKED();
683
684 lck_mtx_lock(&exclaves_collect_mtx);
685 /* This error is intentionally ignored: we are now committed to collecting
686 * these threads, or at least properly waking them. If this fails, the first
687 * collected thread should also fail to append to the kcdata, and will abort
688 * further collection, properly clearing the AST and waking these threads.
689 */
690 kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
691 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
692
693 for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
694 ctid = stackshot_exclave_inspect_ctids[i];
695 thread = ctid_get_thread(ctid);
696 assert(thread);
697 exclaves_inspection_queue_add(&exclaves_inspection_queue_stackshot, &thread->th_exclaves_inspection_queue_stackshot);
698 }
699 exclaves_inspection_begin_collecting();
700 exclaves_inspection_wait_complete(&exclaves_inspection_queue_stackshot);
701 kr = stackshot_exclave_kr; /* Read the result of work done on our behalf, by collection thread */
702 if (kr != KERN_SUCCESS) {
703 goto out;
704 }
705
706 kr = kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
707 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
708 if (kr != KERN_SUCCESS) {
709 goto out;
710 }
711 out:
712 lck_mtx_unlock(&exclaves_collect_mtx);
713 return kr;
714 }
715
716 static kern_return_t
stackshot_exclaves_process_stacktrace(const address_v__opt_s * _Nonnull st,void * kcdata_ptr)717 stackshot_exclaves_process_stacktrace(const address_v__opt_s *_Nonnull st, void *kcdata_ptr)
718 {
719 kern_return_t error = KERN_SUCCESS;
720 exclave_ecstackentry_addr_t * addr = NULL;
721 __block size_t count = 0;
722
723 if (!st->has_value) {
724 goto error_exit;
725 }
726
727 address__v_visit(&st->value, ^(size_t __unused i, const stackshot_address_s __unused item) {
728 count++;
729 });
730
731 kcdata_compression_window_open(kcdata_ptr);
732 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_ECSTACK,
733 sizeof(exclave_ecstackentry_addr_t), count, (mach_vm_address_t*)&addr));
734
735 address__v_visit(&st->value, ^(size_t i, const stackshot_address_s item) {
736 addr[i] = (exclave_ecstackentry_addr_t)item;
737 });
738
739 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
740
741 error_exit:
742 return error;
743 }
744
745 static kern_return_t
stackshot_exclaves_process_ipcstackentry(uint64_t index,const stackshot_ipcstackentry_s * _Nonnull ise,void * kcdata_ptr)746 stackshot_exclaves_process_ipcstackentry(uint64_t index, const stackshot_ipcstackentry_s *_Nonnull ise, void *kcdata_ptr)
747 {
748 kern_return_t error = KERN_SUCCESS;
749
750 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
751 STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
752
753 struct exclave_ipcstackentry_info info = { 0 };
754 info.eise_asid = ise->asid;
755
756 info.eise_tnid = ise->tnid;
757
758 if (ise->invocationid.has_value) {
759 info.eise_flags |= kExclaveIpcStackEntryHaveInvocationID;
760 info.eise_invocationid = ise->invocationid.value;
761 } else {
762 info.eise_invocationid = 0;
763 }
764
765 info.eise_flags |= (ise->stacktrace.has_value ? kExclaveIpcStackEntryHaveStack : 0);
766
767 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_IPCSTACKENTRY_INFO, sizeof(struct exclave_ipcstackentry_info), &info));
768
769 if (ise->stacktrace.has_value) {
770 kcd_exit_on_error(stackshot_exclaves_process_stacktrace(&ise->stacktrace, kcdata_ptr));
771 }
772
773 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
774 STACKSHOT_KCCONTAINER_EXCLAVE_IPCSTACKENTRY, index));
775
776 error_exit:
777 return error;
778 }
779
780 static kern_return_t
stackshot_exclaves_process_ipcstack(const stackshot_ipcstackentry_v__opt_s * _Nonnull ipcstack,void * kcdata_ptr)781 stackshot_exclaves_process_ipcstack(const stackshot_ipcstackentry_v__opt_s *_Nonnull ipcstack, void *kcdata_ptr)
782 {
783 __block kern_return_t kr = KERN_SUCCESS;
784
785 if (!ipcstack->has_value) {
786 goto error_exit;
787 }
788
789 stackshot_ipcstackentry__v_visit(&ipcstack->value, ^(size_t i, const stackshot_ipcstackentry_s *_Nonnull item) {
790 if (kr == KERN_SUCCESS) {
791 kr = stackshot_exclaves_process_ipcstackentry(i, item, kcdata_ptr);
792 }
793 });
794
795 error_exit:
796 return kr;
797 }
798
799 static kern_return_t
stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s * _Nonnull se,void * kcdata_ptr)800 stackshot_exclaves_process_stackshotentry(const stackshot_stackshotentry_s *_Nonnull se, void *kcdata_ptr)
801 {
802 kern_return_t error = KERN_SUCCESS;
803
804 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
805 STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
806
807 struct exclave_scresult_info info = { 0 };
808 info.esc_id = se->scid;
809 info.esc_flags = se->ipcstack.has_value ? kExclaveScresultHaveIPCStack : 0;
810
811 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_SCRESULT_INFO, sizeof(struct exclave_scresult_info), &info));
812
813 if (se->ipcstack.has_value) {
814 kcd_exit_on_error(stackshot_exclaves_process_ipcstack(&se->ipcstack, kcdata_ptr));
815 }
816
817 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
818 STACKSHOT_KCCONTAINER_EXCLAVE_SCRESULT, se->scid));
819
820 error_exit:
821 return error;
822 }
823
824 static kern_return_t
stackshot_exclaves_process_textlayout_segments(const stackshot_textlayout_s * _Nonnull tl,void * kcdata_ptr)825 stackshot_exclaves_process_textlayout_segments(const stackshot_textlayout_s *_Nonnull tl, void *kcdata_ptr)
826 {
827 kern_return_t error = KERN_SUCCESS;
828 __block struct exclave_textlayout_segment * info = NULL;
829
830 __block size_t count = 0;
831 stackshot_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshot_textsegment_s __unused *_Nonnull item) {
832 count++;
833 });
834
835 if (!count) {
836 goto error_exit;
837 }
838
839 kcdata_compression_window_open(kcdata_ptr);
840 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_SEGMENTS,
841 sizeof(struct exclave_textlayout_segment), count, (mach_vm_address_t*)&info));
842
843 stackshot_textsegment__v_visit(&tl->textsegments, ^(size_t __unused i, const stackshot_textsegment_s *_Nonnull item) {
844 memcpy(&info->layoutSegment_uuid, item->uuid, sizeof(uuid_t));
845 info->layoutSegment_loadAddress = item->loadaddress;
846 info++;
847 });
848
849 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
850
851 error_exit:
852 return error;
853 }
854
855 static kern_return_t
stackshot_exclaves_process_textlayout(uint64_t index,const stackshot_textlayout_s * _Nonnull tl,void * kcdata_ptr)856 stackshot_exclaves_process_textlayout(uint64_t index, const stackshot_textlayout_s *_Nonnull tl, void *kcdata_ptr)
857 {
858 kern_return_t error = KERN_SUCCESS;
859 __block struct exclave_textlayout_info info = { 0 };
860
861 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
862 STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, index));
863
864 info.layout_id = tl->textlayoutid;
865
866 info.etl_flags = kExclaveTextLayoutLoadAddressesUnslid;
867
868 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_TEXTLAYOUT_INFO, sizeof(struct exclave_textlayout_info), &info));
869 kcd_exit_on_error(stackshot_exclaves_process_textlayout_segments(tl, kcdata_ptr));
870 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
871 STACKSHOT_KCCONTAINER_EXCLAVE_TEXTLAYOUT, index));
872 error_exit:
873 return error;
874 }
875
876 static kern_return_t
stackshot_exclaves_process_addressspace(const stackshot_addressspace_s * _Nonnull as,void * kcdata_ptr)877 stackshot_exclaves_process_addressspace(const stackshot_addressspace_s *_Nonnull as, void *kcdata_ptr)
878 {
879 kern_return_t error = KERN_SUCCESS;
880 struct exclave_addressspace_info info = { 0 };
881 __block size_t name_len = 0;
882 uint8_t * name = NULL;
883
884 u8__v_visit(&as->name, ^(size_t __unused i, const uint8_t __unused item) {
885 name_len++;
886 });
887
888 info.eas_id = as->asid;
889
890 if (as->rawaddressslide.has_value) {
891 info.eas_flags = kExclaveAddressSpaceHaveSlide;
892 info.eas_slide = as->rawaddressslide.value;
893 } else {
894 info.eas_flags = 0;
895 info.eas_slide = UINT64_MAX;
896 }
897
898 info.eas_layoutid = as->textlayoutid; // text layout for this address space
899
900 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
901 STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
902 kcd_exit_on_error(kcdata_push_data(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_INFO, sizeof(struct exclave_addressspace_info), &info));
903
904 if (name_len > 0) {
905 kcdata_compression_window_open(kcdata_ptr);
906 kcd_exit_on_error(kcdata_get_memory_addr(kcdata_ptr, STACKSHOT_KCTYPE_EXCLAVE_ADDRESSSPACE_NAME, name_len + 1, (mach_vm_address_t*)&name));
907
908 u8__v_visit(&as->name, ^(size_t i, const uint8_t item) {
909 name[i] = item;
910 });
911 name[name_len] = 0;
912
913 kcd_exit_on_error(kcdata_compression_window_close(kcdata_ptr));
914 }
915
916 kcd_exit_on_error(kcdata_add_container_marker(kcdata_ptr, KCDATA_TYPE_CONTAINER_END,
917 STACKSHOT_KCCONTAINER_EXCLAVE_ADDRESSSPACE, as->asid));
918 error_exit:
919 return error;
920 }
921
922 kern_return_t
923 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr);
924
925 kern_return_t
stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s * result,void * kcdata_ptr)926 stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *result, void *kcdata_ptr)
927 {
928 __block kern_return_t kr = KERN_SUCCESS;
929
930 stackshot_stackshotentry__v_visit(&result->stackshotentries, ^(size_t __unused i, const stackshot_stackshotentry_s *_Nonnull item) {
931 if (kr == KERN_SUCCESS) {
932 kr = stackshot_exclaves_process_stackshotentry(item, kcdata_ptr);
933 }
934 });
935
936 stackshot_addressspace__v_visit(&result->addressspaces, ^(size_t __unused i, const stackshot_addressspace_s *_Nonnull item) {
937 if (kr == KERN_SUCCESS) {
938 kr = stackshot_exclaves_process_addressspace(item, kcdata_ptr);
939 }
940 });
941
942 stackshot_textlayout__v_visit(&result->textlayouts, ^(size_t i, const stackshot_textlayout_s *_Nonnull item) {
943 if (kr == KERN_SUCCESS) {
944 kr = stackshot_exclaves_process_textlayout(i, item, kcdata_ptr);
945 }
946 });
947
948 return kr;
949 }
950
951 kern_return_t
952 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result);
953
954 kern_return_t
stackshot_exclaves_process_result(kern_return_t collect_kr,const stackshot_stackshotresult_s * result)955 stackshot_exclaves_process_result(kern_return_t collect_kr, const stackshot_stackshotresult_s *result)
956 {
957 kern_return_t kr = KERN_SUCCESS;
958 if (result == NULL) {
959 return collect_kr;
960 }
961
962 kr = stackshot_exclaves_process_stackshot(result, stackshot_kcdata_p);
963
964 stackshot_exclave_kr = kr;
965
966 return kr;
967 }
968
969
970 static void
commit_exclaves_ast(void)971 commit_exclaves_ast(void)
972 {
973 size_t i = 0;
974 thread_t thread = NULL;
975
976 assert(debug_mode_active());
977
978 if (stackshot_exclave_inspect_ctids && stackshot_exclave_inspect_ctid_count > 0) {
979 for (i = 0; i < stackshot_exclave_inspect_ctid_count; ++i) {
980 thread = ctid_get_thread(stackshot_exclave_inspect_ctids[i]);
981 thread_reference(thread);
982 os_atomic_or(&thread->th_exclaves_inspection_state, TH_EXCLAVES_INSPECTION_STACKSHOT, relaxed);
983 }
984 }
985 }
986
987 #endif /* CONFIG_EXCLAVES */
988
989 kern_return_t
kern_stack_snapshot_internal(int stackshot_config_version,void * stackshot_config,size_t stackshot_config_size,boolean_t stackshot_from_user)990 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
991 {
992 int error = 0;
993 boolean_t prev_interrupt_state;
994 uint32_t bytes_traced = 0;
995 uint32_t stackshot_estimate = 0;
996 uint32_t stackshotbuf_size = 0;
997 void * stackshotbuf = NULL;
998 kcdata_descriptor_t kcdata_p = NULL;
999
1000 void * buf_to_free = NULL;
1001 int size_to_free = 0;
1002 bool is_traced = false; /* has FUNC_START tracepoint fired? */
1003 uint64_t tot_interrupts_off_abs = 0; /* sum(time with interrupts off) */
1004
1005 /* Parsed arguments */
1006 uint64_t out_buffer_addr;
1007 uint64_t out_size_addr;
1008 int pid = -1;
1009 uint64_t flags;
1010 uint64_t since_timestamp;
1011 uint32_t size_hint = 0;
1012 uint32_t pagetable_mask = STACKSHOT_PAGETABLES_MASK_ALL;
1013
1014 if (stackshot_config == NULL) {
1015 return KERN_INVALID_ARGUMENT;
1016 }
1017 #if DEVELOPMENT || DEBUG
1018 /* TBD: ask stackshot clients to avoid issuing stackshots in this
1019 * configuration in lieu of the kernel feature override.
1020 */
1021 if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
1022 return KERN_NOT_SUPPORTED;
1023 }
1024 #endif
1025
1026 switch (stackshot_config_version) {
1027 case STACKSHOT_CONFIG_TYPE:
1028 if (stackshot_config_size != sizeof(stackshot_config_t)) {
1029 return KERN_INVALID_ARGUMENT;
1030 }
1031 stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
1032 out_buffer_addr = config->sc_out_buffer_addr;
1033 out_size_addr = config->sc_out_size_addr;
1034 pid = config->sc_pid;
1035 flags = config->sc_flags;
1036 since_timestamp = config->sc_delta_timestamp;
1037 if (config->sc_size <= max_tracebuf_size) {
1038 size_hint = config->sc_size;
1039 }
1040 /*
1041 * Retain the pre-sc_pagetable_mask behavior of STACKSHOT_PAGE_TABLES,
1042 * dump every level if the pagetable_mask is not set
1043 */
1044 if (flags & STACKSHOT_PAGE_TABLES && config->sc_pagetable_mask) {
1045 pagetable_mask = config->sc_pagetable_mask;
1046 }
1047 break;
1048 default:
1049 return KERN_NOT_SUPPORTED;
1050 }
1051
1052 /*
1053 * Currently saving a kernel buffer and trylock are only supported from the
1054 * internal/KEXT API.
1055 */
1056 if (stackshot_from_user) {
1057 if (flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
1058 return KERN_NO_ACCESS;
1059 }
1060 #if !DEVELOPMENT && !DEBUG
1061 if (flags & (STACKSHOT_DO_COMPRESS)) {
1062 return KERN_NO_ACCESS;
1063 }
1064 #endif
1065 } else {
1066 if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
1067 return KERN_NOT_SUPPORTED;
1068 }
1069 }
1070
1071 if (!((flags & STACKSHOT_KCDATA_FORMAT) || (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
1072 return KERN_NOT_SUPPORTED;
1073 }
1074
1075 /* Compresssed delta stackshots or page dumps are not yet supported */
1076 if (((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) || (flags & STACKSHOT_PAGE_TABLES))
1077 && (flags & STACKSHOT_DO_COMPRESS)) {
1078 return KERN_NOT_SUPPORTED;
1079 }
1080
1081 /*
1082 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
1083 */
1084 if ((!out_buffer_addr || !out_size_addr) && !(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
1085 return KERN_INVALID_ARGUMENT;
1086 }
1087
1088 if (since_timestamp != 0 && ((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
1089 return KERN_INVALID_ARGUMENT;
1090 }
1091
1092 #if CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS
1093 if (!mt_core_supported) {
1094 flags &= ~STACKSHOT_INSTRS_CYCLES;
1095 }
1096 #else /* CONFIG_PERVASIVE_CPI && CONFIG_CPU_COUNTERS */
1097 flags &= ~STACKSHOT_INSTRS_CYCLES;
1098 #endif /* !CONFIG_PERVASIVE_CPI || !CONFIG_CPU_COUNTERS */
1099
1100 STACKSHOT_TESTPOINT(TP_WAIT_START_STACKSHOT);
1101 STACKSHOT_SUBSYS_LOCK();
1102
1103 if (flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
1104 /*
1105 * Don't overwrite an existing stackshot
1106 */
1107 if (kernel_stackshot_buf != NULL) {
1108 error = KERN_MEMORY_PRESENT;
1109 goto error_early_exit;
1110 }
1111 } else if (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
1112 if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
1113 error = KERN_NOT_IN_SET;
1114 goto error_early_exit;
1115 }
1116 error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
1117 out_buffer_addr, out_size_addr);
1118 /*
1119 * If we successfully remapped the buffer into the user's address space, we
1120 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
1121 * and then clear the kernel stackshot pointer and associated size.
1122 */
1123 if (error == KERN_SUCCESS) {
1124 buf_to_free = kernel_stackshot_buf;
1125 size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
1126 kernel_stackshot_buf = NULL;
1127 kernel_stackshot_buf_size = 0;
1128 }
1129
1130 goto error_early_exit;
1131 }
1132
1133 if (flags & STACKSHOT_GET_BOOT_PROFILE) {
1134 void *bootprofile = NULL;
1135 uint32_t len = 0;
1136 #if CONFIG_TELEMETRY
1137 bootprofile_get(&bootprofile, &len);
1138 #endif
1139 if (!bootprofile || !len) {
1140 error = KERN_NOT_IN_SET;
1141 goto error_early_exit;
1142 }
1143 error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
1144 goto error_early_exit;
1145 }
1146
1147 stackshot_duration_prior_abs = 0;
1148 stackshot_initial_estimate_adj = os_atomic_load(&stackshot_estimate_adj, relaxed);
1149 stackshotbuf_size = stackshot_estimate =
1150 get_stackshot_estsize(size_hint, stackshot_initial_estimate_adj);
1151 stackshot_initial_estimate = stackshot_estimate;
1152
1153 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_START,
1154 flags, stackshotbuf_size, pid, since_timestamp);
1155 is_traced = true;
1156
1157 #if CONFIG_EXCLAVES
1158 assert(!stackshot_exclave_inspect_ctids);
1159 #endif
1160
1161 for (; stackshotbuf_size <= max_tracebuf_size; stackshotbuf_size <<= 1) {
1162 if (kmem_alloc(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size,
1163 KMA_ZERO | KMA_DATA, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
1164 error = KERN_RESOURCE_SHORTAGE;
1165 goto error_exit;
1166 }
1167
1168
1169 uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1170 : (flags & STACKSHOT_DO_COMPRESS) ? KCDATA_BUFFER_BEGIN_COMPRESSED
1171 : KCDATA_BUFFER_BEGIN_STACKSHOT;
1172 kcdata_p = kcdata_memory_alloc_init((mach_vm_address_t)stackshotbuf, hdr_tag, stackshotbuf_size,
1173 KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
1174
1175 stackshot_duration_outer = NULL;
1176
1177 /* if compression was requested, allocate the extra zlib scratch area */
1178 if (flags & STACKSHOT_DO_COMPRESS) {
1179 hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT
1180 : KCDATA_BUFFER_BEGIN_STACKSHOT;
1181 error = kcdata_init_compress(kcdata_p, hdr_tag, kdp_memcpy, KCDCT_ZLIB);
1182 if (error != KERN_SUCCESS) {
1183 os_log(OS_LOG_DEFAULT, "failed to initialize compression: %d!\n",
1184 (int) error);
1185 goto error_exit;
1186 }
1187 }
1188
1189 /*
1190 * Disable interrupts and save the current interrupt state.
1191 */
1192 prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
1193 uint64_t time_start = mach_absolute_time();
1194
1195 /* Emit a SOCD tracepoint that we are initiating a stackshot */
1196 SOCD_TRACE_XNU_START(STACKSHOT);
1197
1198 /*
1199 * Load stackshot parameters.
1200 */
1201 kdp_snapshot_preflight(pid, stackshotbuf, stackshotbuf_size, flags, kcdata_p, since_timestamp,
1202 pagetable_mask);
1203
1204 error = stackshot_trap();
1205
1206 /* record the duration that interupts were disabled */
1207 uint64_t time_end = mach_absolute_time();
1208
1209 /* Emit a SOCD tracepoint that we have completed the stackshot */
1210 SOCD_TRACE_XNU_END(STACKSHOT);
1211 ml_set_interrupts_enabled(prev_interrupt_state);
1212
1213 #if CONFIG_EXCLAVES
1214 /* trigger Exclave thread collection if any are queued */
1215 assert(error == KERN_SUCCESS || stackshot_exclave_inspect_ctids == NULL);
1216 if (stackshot_exclave_inspect_ctids && stackshot_exclave_inspect_ctid_count > 0) {
1217 STACKSHOT_TESTPOINT(TP_START_COLLECTION);
1218 error = collect_exclave_threads();
1219 }
1220 #endif /* CONFIG_EXCLAVES */
1221
1222 if (stackshot_duration_outer) {
1223 *stackshot_duration_outer = time_end - time_start;
1224 }
1225 tot_interrupts_off_abs += time_end - time_start;
1226
1227 if (error != KERN_SUCCESS) {
1228 if (kcdata_p != NULL) {
1229 kcdata_memory_destroy(kcdata_p);
1230 kcdata_p = NULL;
1231 stackshot_kcdata_p = NULL;
1232 }
1233 kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
1234 stackshotbuf = NULL;
1235 if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
1236 /*
1237 * If we didn't allocate a big enough buffer, deallocate and try again.
1238 */
1239 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD_SHORT) | DBG_FUNC_NONE,
1240 time_end - time_start, stackshot_estimate, stackshotbuf_size);
1241 stackshot_duration_prior_abs += (time_end - time_start);
1242 continue;
1243 } else {
1244 goto error_exit;
1245 }
1246 }
1247
1248 kcd_exit_on_error(finalize_kcdata(stackshot_kcdata_p));
1249
1250 bytes_traced = kdp_stack_snapshot_bytes_traced();
1251 if (bytes_traced <= 0) {
1252 error = KERN_ABORTED;
1253 goto error_exit;
1254 }
1255
1256 assert(bytes_traced <= stackshotbuf_size);
1257 if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
1258 error = stackshot_remap_buffer(stackshotbuf, bytes_traced, out_buffer_addr, out_size_addr);
1259 goto error_exit;
1260 }
1261
1262 /*
1263 * Save the stackshot in the kernel buffer.
1264 */
1265 kernel_stackshot_buf = stackshotbuf;
1266 kernel_stackshot_buf_size = bytes_traced;
1267 /*
1268 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
1269 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
1270 * update size_to_free for kmem_free accordingly.
1271 */
1272 size_to_free = stackshotbuf_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
1273
1274 assert(size_to_free >= 0);
1275
1276 if (size_to_free != 0) {
1277 buf_to_free = (void *)((uint64_t)stackshotbuf + stackshotbuf_size - size_to_free);
1278 }
1279
1280 stackshotbuf = NULL;
1281 stackshotbuf_size = 0;
1282 goto error_exit;
1283 }
1284
1285 if (stackshotbuf_size > max_tracebuf_size) {
1286 error = KERN_RESOURCE_SHORTAGE;
1287 }
1288
1289 error_exit:
1290 if (is_traced) {
1291 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_STACKSHOT, STACKSHOT_RECORD) | DBG_FUNC_END,
1292 error, tot_interrupts_off_abs, stackshotbuf_size, bytes_traced);
1293 }
1294
1295 #if CONFIG_EXCLAVES
1296 stackshot_exclave_inspect_ctids = NULL;
1297 stackshot_exclave_inspect_ctid_capacity = 0;
1298 stackshot_exclave_inspect_ctid_count = 0;
1299 #endif
1300
1301 error_early_exit:
1302 if (kcdata_p != NULL) {
1303 kcdata_memory_destroy(kcdata_p);
1304 kcdata_p = NULL;
1305 stackshot_kcdata_p = NULL;
1306 }
1307
1308 if (stackshotbuf != NULL) {
1309 kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
1310 }
1311 if (buf_to_free != NULL) {
1312 kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
1313 }
1314
1315 STACKSHOT_SUBSYS_UNLOCK();
1316 STACKSHOT_TESTPOINT(TP_STACKSHOT_DONE);
1317
1318 return error;
1319 }
1320
1321 /*
1322 * Cache stack snapshot parameters in preparation for a trace.
1323 */
1324 void
kdp_snapshot_preflight(int pid,void * tracebuf,uint32_t tracebuf_size,uint64_t flags,kcdata_descriptor_t data_p,uint64_t since_timestamp,uint32_t pagetable_mask)1325 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint64_t flags,
1326 kcdata_descriptor_t data_p, uint64_t since_timestamp, uint32_t pagetable_mask)
1327 {
1328 uint64_t microsecs = 0, secs = 0;
1329 clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)µsecs);
1330
1331 stackshot_microsecs = microsecs + (secs * USEC_PER_SEC);
1332 stack_snapshot_pid = pid;
1333 stack_snapshot_buf = tracebuf;
1334 stack_snapshot_bufsize = tracebuf_size;
1335 stack_snapshot_flags = flags;
1336 stack_snapshot_delta_since_timestamp = since_timestamp;
1337 stack_snapshot_pagetable_mask = pagetable_mask;
1338
1339 panic_stackshot = ((flags & STACKSHOT_FROM_PANIC) != 0);
1340
1341 assert(data_p != NULL);
1342 assert(stackshot_kcdata_p == NULL);
1343 stackshot_kcdata_p = data_p;
1344
1345 stack_snapshot_bytes_traced = 0;
1346 stack_snapshot_bytes_uncompressed = 0;
1347 }
1348
1349 void
panic_stackshot_reset_state(void)1350 panic_stackshot_reset_state(void)
1351 {
1352 stackshot_kcdata_p = NULL;
1353 }
1354
1355 boolean_t
stackshot_active(void)1356 stackshot_active(void)
1357 {
1358 return stackshot_kcdata_p != NULL;
1359 }
1360
1361 uint32_t
kdp_stack_snapshot_bytes_traced(void)1362 kdp_stack_snapshot_bytes_traced(void)
1363 {
1364 return stack_snapshot_bytes_traced;
1365 }
1366
1367 uint32_t
kdp_stack_snapshot_bytes_uncompressed(void)1368 kdp_stack_snapshot_bytes_uncompressed(void)
1369 {
1370 return stack_snapshot_bytes_uncompressed;
1371 }
1372
1373 static boolean_t
memory_iszero(void * addr,size_t size)1374 memory_iszero(void *addr, size_t size)
1375 {
1376 char *data = (char *)addr;
1377 for (size_t i = 0; i < size; i++) {
1378 if (data[i] != 0) {
1379 return FALSE;
1380 }
1381 }
1382 return TRUE;
1383 }
1384
1385 /*
1386 * Keep a simple cache of the most recent validation done at a page granularity
1387 * to avoid the expensive software KVA-to-phys translation in the VM.
1388 */
1389
1390 struct _stackshot_validation_state {
1391 vm_offset_t last_valid_page_kva;
1392 size_t last_valid_size;
1393 } g_validation_state;
1394
1395 static void
_stackshot_validation_reset(void)1396 _stackshot_validation_reset(void)
1397 {
1398 g_validation_state.last_valid_page_kva = -1;
1399 g_validation_state.last_valid_size = 0;
1400 }
1401
1402 static bool
_stackshot_validate_kva(vm_offset_t addr,size_t size)1403 _stackshot_validate_kva(vm_offset_t addr, size_t size)
1404 {
1405 vm_offset_t page_addr = atop_kernel(addr);
1406 if (g_validation_state.last_valid_page_kva == page_addr &&
1407 g_validation_state.last_valid_size <= size) {
1408 return true;
1409 }
1410
1411 if (ml_validate_nofault(addr, size)) {
1412 g_validation_state.last_valid_page_kva = page_addr;
1413 g_validation_state.last_valid_size = size;
1414 return true;
1415 }
1416 return false;
1417 }
1418
1419 static long
_stackshot_strlen(const char * s,size_t maxlen)1420 _stackshot_strlen(const char *s, size_t maxlen)
1421 {
1422 size_t len = 0;
1423 for (len = 0; _stackshot_validate_kva((vm_offset_t)s, 1); len++, s++) {
1424 if (*s == 0) {
1425 return len;
1426 }
1427 if (len >= maxlen) {
1428 return -1;
1429 }
1430 }
1431 return -1; /* failed before end of string */
1432 }
1433
1434 /*
1435 * For port labels, we have a small hash table we use to track the
1436 * struct ipc_service_port_label pointers we see along the way.
1437 * This structure encapsulates the global state.
1438 *
1439 * The hash table is insert-only, similar to "intern"ing strings. It's
1440 * only used an manipulated in during the stackshot collection. We use
1441 * seperate chaining, with the hash elements and chains being int16_ts
1442 * indexes into the parallel arrays, with -1 ending the chain. Array indices are
1443 * allocated using a bump allocator.
1444 *
1445 * The parallel arrays contain:
1446 * - plh_array[idx] the pointer entered
1447 * - plh_chains[idx] the hash chain
1448 * - plh_gen[idx] the last 'generation #' seen
1449 *
1450 * Generation IDs are used to track entries looked up in the current
1451 * task; 0 is never used, and the plh_gen array is cleared to 0 on
1452 * rollover.
1453 *
1454 * The portlabel_ids we report externally are just the index in the array,
1455 * plus 1 to avoid 0 as a value. 0 is NONE, -1 is UNKNOWN (e.g. there is
1456 * one, but we ran out of space)
1457 */
1458 struct port_label_hash {
1459 uint16_t plh_size; /* size of allocations; 0 disables tracking */
1460 uint16_t plh_count; /* count of used entries in plh_array */
1461 struct ipc_service_port_label **plh_array; /* _size allocated, _count used */
1462 int16_t *plh_chains; /* _size allocated */
1463 uint8_t *plh_gen; /* last 'gen #' seen in */
1464 int16_t *plh_hash; /* (1 << STACKSHOT_PLH_SHIFT) entry hash table: hash(ptr) -> array index */
1465 int16_t plh_curgen_min; /* min idx seen for this gen */
1466 int16_t plh_curgen_max; /* max idx seen for this gen */
1467 uint8_t plh_curgen; /* current gen */
1468 #if DEVELOPMENT || DEBUG
1469 /* statistics */
1470 uint32_t plh_lookups; /* # lookups or inserts */
1471 uint32_t plh_found;
1472 uint32_t plh_found_depth;
1473 uint32_t plh_insert;
1474 uint32_t plh_insert_depth;
1475 uint32_t plh_bad;
1476 uint32_t plh_bad_depth;
1477 uint32_t plh_lookup_send;
1478 uint32_t plh_lookup_receive;
1479 #define PLH_STAT_OP(...) (void)(__VA_ARGS__)
1480 #else /* DEVELOPMENT || DEBUG */
1481 #define PLH_STAT_OP(...) (void)(0)
1482 #endif /* DEVELOPMENT || DEBUG */
1483 } port_label_hash;
1484
1485 #define STACKSHOT_PLH_SHIFT 7
1486 #define STACKSHOT_PLH_SIZE_MAX ((kdp_ipc_have_splabel)? 1024 : 0)
1487 size_t stackshot_port_label_size = (2 * (1u << STACKSHOT_PLH_SHIFT));
1488 #define STASKSHOT_PLH_SIZE(x) MIN((x), STACKSHOT_PLH_SIZE_MAX)
1489
1490 static size_t
stackshot_plh_est_size(void)1491 stackshot_plh_est_size(void)
1492 {
1493 struct port_label_hash *plh = &port_label_hash;
1494 size_t size = STASKSHOT_PLH_SIZE(stackshot_port_label_size);
1495
1496 if (size == 0) {
1497 return 0;
1498 }
1499 #define SIZE_EST(x) ROUNDUP((x), sizeof (uintptr_t))
1500 return SIZE_EST(size * sizeof(*plh->plh_array)) +
1501 SIZE_EST(size * sizeof(*plh->plh_chains)) +
1502 SIZE_EST(size * sizeof(*plh->plh_gen)) +
1503 SIZE_EST((1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh->plh_hash));
1504 #undef SIZE_EST
1505 }
1506
1507 static void
stackshot_plh_reset(void)1508 stackshot_plh_reset(void)
1509 {
1510 port_label_hash = (struct port_label_hash){.plh_size = 0}; /* structure assignment */
1511 }
1512
1513 static void
stackshot_plh_setup(kcdata_descriptor_t data)1514 stackshot_plh_setup(kcdata_descriptor_t data)
1515 {
1516 struct port_label_hash plh = {
1517 .plh_size = STASKSHOT_PLH_SIZE(stackshot_port_label_size),
1518 .plh_count = 0,
1519 .plh_curgen = 1,
1520 .plh_curgen_min = STACKSHOT_PLH_SIZE_MAX,
1521 .plh_curgen_max = 0,
1522 };
1523 stackshot_plh_reset();
1524 size_t size = plh.plh_size;
1525 if (size == 0) {
1526 return;
1527 }
1528 plh.plh_array = kcdata_endalloc(data, size * sizeof(*plh.plh_array));
1529 plh.plh_chains = kcdata_endalloc(data, size * sizeof(*plh.plh_chains));
1530 plh.plh_gen = kcdata_endalloc(data, size * sizeof(*plh.plh_gen));
1531 plh.plh_hash = kcdata_endalloc(data, (1ul << STACKSHOT_PLH_SHIFT) * sizeof(*plh.plh_hash));
1532 if (plh.plh_array == NULL || plh.plh_chains == NULL || plh.plh_gen == NULL || plh.plh_hash == NULL) {
1533 PLH_STAT_OP(port_label_hash.plh_bad++);
1534 return;
1535 }
1536 for (int x = 0; x < size; x++) {
1537 plh.plh_array[x] = NULL;
1538 plh.plh_chains[x] = -1;
1539 plh.plh_gen[x] = 0;
1540 }
1541 for (int x = 0; x < (1ul << STACKSHOT_PLH_SHIFT); x++) {
1542 plh.plh_hash[x] = -1;
1543 }
1544 port_label_hash = plh; /* structure assignment */
1545 }
1546
1547 static int16_t
stackshot_plh_hash(struct ipc_service_port_label * ispl)1548 stackshot_plh_hash(struct ipc_service_port_label *ispl)
1549 {
1550 uintptr_t ptr = (uintptr_t)ispl;
1551 static_assert(STACKSHOT_PLH_SHIFT < 16, "plh_hash must fit in 15 bits");
1552 #define PLH_HASH_STEP(ptr, x) \
1553 ((((x) * STACKSHOT_PLH_SHIFT) < (sizeof(ispl) * CHAR_BIT)) ? ((ptr) >> ((x) * STACKSHOT_PLH_SHIFT)) : 0)
1554 ptr ^= PLH_HASH_STEP(ptr, 16);
1555 ptr ^= PLH_HASH_STEP(ptr, 8);
1556 ptr ^= PLH_HASH_STEP(ptr, 4);
1557 ptr ^= PLH_HASH_STEP(ptr, 2);
1558 ptr ^= PLH_HASH_STEP(ptr, 1);
1559 #undef PLH_HASH_STEP
1560 return (int16_t)(ptr & ((1ul << STACKSHOT_PLH_SHIFT) - 1));
1561 }
1562
1563 enum stackshot_plh_lookup_type {
1564 STACKSHOT_PLH_LOOKUP_UNKNOWN,
1565 STACKSHOT_PLH_LOOKUP_SEND,
1566 STACKSHOT_PLH_LOOKUP_RECEIVE,
1567 };
1568
1569 static void
stackshot_plh_resetgen(void)1570 stackshot_plh_resetgen(void)
1571 {
1572 struct port_label_hash *plh = &port_label_hash;
1573 if (plh->plh_curgen_min == STACKSHOT_PLH_SIZE_MAX && plh->plh_curgen_max == 0) {
1574 return; // no lookups, nothing using the current generation
1575 }
1576 plh->plh_curgen++;
1577 plh->plh_curgen_min = STACKSHOT_PLH_SIZE_MAX;
1578 plh->plh_curgen_max = 0;
1579 if (plh->plh_curgen == 0) { // wrapped, zero the array and increment the generation
1580 for (int x = 0; x < plh->plh_size; x++) {
1581 plh->plh_gen[x] = 0;
1582 }
1583 plh->plh_curgen = 1;
1584 }
1585 }
1586
1587 static int16_t
stackshot_plh_lookup(struct ipc_service_port_label * ispl,enum stackshot_plh_lookup_type type)1588 stackshot_plh_lookup(struct ipc_service_port_label *ispl, enum stackshot_plh_lookup_type type)
1589 {
1590 struct port_label_hash *plh = &port_label_hash;
1591 int depth;
1592 int16_t cur;
1593 if (ispl == NULL) {
1594 return STACKSHOT_PORTLABELID_NONE;
1595 }
1596 switch (type) {
1597 case STACKSHOT_PLH_LOOKUP_SEND:
1598 PLH_STAT_OP(plh->plh_lookup_send++);
1599 break;
1600 case STACKSHOT_PLH_LOOKUP_RECEIVE:
1601 PLH_STAT_OP(plh->plh_lookup_receive++);
1602 break;
1603 default:
1604 break;
1605 }
1606 PLH_STAT_OP(plh->plh_lookups++);
1607 if (plh->plh_size == 0) {
1608 return STACKSHOT_PORTLABELID_MISSING;
1609 }
1610 int16_t hash = stackshot_plh_hash(ispl);
1611 assert(hash >= 0 && hash < (1ul << STACKSHOT_PLH_SHIFT));
1612 depth = 0;
1613 for (cur = plh->plh_hash[hash]; cur >= 0; cur = plh->plh_chains[cur]) {
1614 /* cur must be in-range, and chain depth can never be above our # allocated */
1615 if (cur >= plh->plh_count || depth > plh->plh_count || depth > plh->plh_size) {
1616 PLH_STAT_OP((plh->plh_bad++), (plh->plh_bad_depth += depth));
1617 return STACKSHOT_PORTLABELID_MISSING;
1618 }
1619 assert(cur < plh->plh_count);
1620 if (plh->plh_array[cur] == ispl) {
1621 PLH_STAT_OP((plh->plh_found++), (plh->plh_found_depth += depth));
1622 goto found;
1623 }
1624 depth++;
1625 }
1626 /* not found in hash table, so alloc and insert it */
1627 if (cur != -1) {
1628 PLH_STAT_OP((plh->plh_bad++), (plh->plh_bad_depth += depth));
1629 return STACKSHOT_PORTLABELID_MISSING; /* bad end of chain */
1630 }
1631 PLH_STAT_OP((plh->plh_insert++), (plh->plh_insert_depth += depth));
1632 if (plh->plh_count >= plh->plh_size) {
1633 return STACKSHOT_PORTLABELID_MISSING; /* no space */
1634 }
1635 cur = plh->plh_count;
1636 plh->plh_count++;
1637 plh->plh_array[cur] = ispl;
1638 plh->plh_chains[cur] = plh->plh_hash[hash];
1639 plh->plh_hash[hash] = cur;
1640 found:
1641 plh->plh_gen[cur] = plh->plh_curgen;
1642 if (plh->plh_curgen_min > cur) {
1643 plh->plh_curgen_min = cur;
1644 }
1645 if (plh->plh_curgen_max < cur) {
1646 plh->plh_curgen_max = cur;
1647 }
1648 return cur + 1; /* offset to avoid 0 */
1649 }
1650
1651 // record any PLH referenced since the last stackshot_plh_resetgen() call
1652 static kern_return_t
kdp_stackshot_plh_record(void)1653 kdp_stackshot_plh_record(void)
1654 {
1655 kern_return_t error = KERN_SUCCESS;
1656 struct port_label_hash *plh = &port_label_hash;
1657 uint16_t count = plh->plh_count;
1658 uint8_t curgen = plh->plh_curgen;
1659 int16_t curgen_min = plh->plh_curgen_min;
1660 int16_t curgen_max = plh->plh_curgen_max;
1661 if (curgen_min <= curgen_max && curgen_max < count &&
1662 count <= plh->plh_size && plh->plh_size <= STACKSHOT_PLH_SIZE_MAX) {
1663 struct ipc_service_port_label **arr = plh->plh_array;
1664 size_t ispl_size, max_namelen;
1665 kdp_ipc_splabel_size(&ispl_size, &max_namelen);
1666 for (int idx = curgen_min; idx <= curgen_max; idx++) {
1667 struct ipc_service_port_label *ispl = arr[idx];
1668 struct portlabel_info spl = {
1669 .portlabel_id = (idx + 1),
1670 };
1671 const char *name = NULL;
1672 long name_sz = 0;
1673 if (plh->plh_gen[idx] != curgen) {
1674 continue;
1675 }
1676 if (_stackshot_validate_kva((vm_offset_t)ispl, ispl_size)) {
1677 kdp_ipc_fill_splabel(ispl, &spl, &name);
1678 }
1679 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
1680 STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
1681 if (name != NULL && (name_sz = _stackshot_strlen(name, max_namelen)) > 0) { /* validates the kva */
1682 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL_NAME, name_sz + 1, name));
1683 } else {
1684 spl.portlabel_flags |= STACKSHOT_PORTLABEL_READFAILED;
1685 }
1686 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_PORTLABEL, sizeof(spl), &spl));
1687 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
1688 STACKSHOT_KCCONTAINER_PORTLABEL, idx + 1));
1689 }
1690 }
1691
1692 error_exit:
1693 return error;
1694 }
1695
1696 #if DEVELOPMENT || DEBUG
1697 static kern_return_t
kdp_stackshot_plh_stats(void)1698 kdp_stackshot_plh_stats(void)
1699 {
1700 kern_return_t error = KERN_SUCCESS;
1701 struct port_label_hash *plh = &port_label_hash;
1702
1703 #define PLH_STAT(x) do { if (plh->x != 0) { \
1704 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, plh->x, "stackshot_" #x)); \
1705 } } while (0)
1706 PLH_STAT(plh_size);
1707 PLH_STAT(plh_lookups);
1708 PLH_STAT(plh_found);
1709 PLH_STAT(plh_found_depth);
1710 PLH_STAT(plh_insert);
1711 PLH_STAT(plh_insert_depth);
1712 PLH_STAT(plh_bad);
1713 PLH_STAT(plh_bad_depth);
1714 PLH_STAT(plh_lookup_send);
1715 PLH_STAT(plh_lookup_receive);
1716 #undef PLH_STAT
1717
1718 error_exit:
1719 return error;
1720 }
1721 #endif /* DEVELOPMENT || DEBUG */
1722
1723 static uint64_t
kcdata_get_task_ss_flags(task_t task)1724 kcdata_get_task_ss_flags(task_t task)
1725 {
1726 uint64_t ss_flags = 0;
1727 boolean_t task_64bit_addr = task_has_64Bit_addr(task);
1728 void *bsd_info = get_bsdtask_info(task);
1729
1730 if (task_64bit_addr) {
1731 ss_flags |= kUser64_p;
1732 }
1733 if (!task->active || task_is_a_corpse(task) || proc_exiting(bsd_info)) {
1734 ss_flags |= kTerminatedSnapshot;
1735 }
1736 if (task->pidsuspended) {
1737 ss_flags |= kPidSuspended;
1738 }
1739 if (task->frozen) {
1740 ss_flags |= kFrozen;
1741 }
1742 if (task->effective_policy.tep_darwinbg == 1) {
1743 ss_flags |= kTaskDarwinBG;
1744 }
1745 if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
1746 ss_flags |= kTaskIsForeground;
1747 }
1748 if (task->requested_policy.trp_boosted == 1) {
1749 ss_flags |= kTaskIsBoosted;
1750 }
1751 if (task->effective_policy.tep_sup_active == 1) {
1752 ss_flags |= kTaskIsSuppressed;
1753 }
1754 #if CONFIG_MEMORYSTATUS
1755
1756 boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
1757 memorystatus_proc_flags_unsafe(bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
1758 if (dirty) {
1759 ss_flags |= kTaskIsDirty;
1760 }
1761 if (dirty_tracked) {
1762 ss_flags |= kTaskIsDirtyTracked;
1763 }
1764 if (allow_idle_exit) {
1765 ss_flags |= kTaskAllowIdleExit;
1766 }
1767
1768 #endif
1769 if (task->effective_policy.tep_tal_engaged) {
1770 ss_flags |= kTaskTALEngaged;
1771 }
1772
1773 ss_flags |= (0x7 & workqueue_get_pwq_state_kdp(bsd_info)) << 17;
1774
1775 #if IMPORTANCE_INHERITANCE
1776 if (task->task_imp_base) {
1777 if (task->task_imp_base->iit_donor) {
1778 ss_flags |= kTaskIsImpDonor;
1779 }
1780 if (task->task_imp_base->iit_live_donor) {
1781 ss_flags |= kTaskIsLiveImpDonor;
1782 }
1783 }
1784 #endif
1785 return ss_flags;
1786 }
1787
1788 static kern_return_t
kcdata_record_shared_cache_info(kcdata_descriptor_t kcd,task_t task,unaligned_u64 * task_snap_ss_flags)1789 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
1790 {
1791 kern_return_t error = KERN_SUCCESS;
1792
1793 uint64_t shared_cache_slide = 0;
1794 uint64_t shared_cache_first_mapping = 0;
1795 uint32_t kdp_fault_results = 0;
1796 uint32_t shared_cache_id = 0;
1797 struct dyld_shared_cache_loadinfo shared_cache_data = {0};
1798
1799
1800 assert(task_snap_ss_flags != NULL);
1801
1802 /* Get basic info about the shared region pointer, regardless of any failures */
1803 if (task->shared_region == NULL) {
1804 *task_snap_ss_flags |= kTaskSharedRegionNone;
1805 } else if (task->shared_region == primary_system_shared_region) {
1806 *task_snap_ss_flags |= kTaskSharedRegionSystem;
1807 } else {
1808 *task_snap_ss_flags |= kTaskSharedRegionOther;
1809 }
1810
1811 if (task->shared_region && _stackshot_validate_kva((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
1812 struct vm_shared_region *sr = task->shared_region;
1813 shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
1814
1815 shared_cache_id = sr->sr_id;
1816 } else {
1817 *task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
1818 goto error_exit;
1819 }
1820
1821 /* We haven't copied in the shared region UUID yet as part of setup */
1822 if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
1823 goto error_exit;
1824 }
1825
1826
1827 /*
1828 * No refcounting here, but we are in debugger context, so that should be safe.
1829 */
1830 shared_cache_slide = task->shared_region->sr_slide;
1831
1832 if (task->shared_region == primary_system_shared_region) {
1833 /* skip adding shared cache info -- it's the same as the system level one */
1834 goto error_exit;
1835 }
1836 /*
1837 * New-style shared cache reference: for non-primary shared regions,
1838 * just include the ID of the shared cache we're attached to. Consumers
1839 * should use the following info from the task's ts_ss_flags as well:
1840 *
1841 * kTaskSharedRegionNone - task is not attached to a shared region
1842 * kTaskSharedRegionSystem - task is attached to the shared region
1843 * with kSharedCacheSystemPrimary set in sharedCacheFlags.
1844 * kTaskSharedRegionOther - task is attached to the shared region with
1845 * sharedCacheID matching the STACKSHOT_KCTYPE_SHAREDCACHE_ID entry.
1846 */
1847 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_ID, sizeof(shared_cache_id), &shared_cache_id));
1848
1849 /*
1850 * For backwards compatibility; this should eventually be removed.
1851 *
1852 * Historically, this data was in a dyld_uuid_info_64 structure, but the
1853 * naming of both the structure and fields for this use wasn't great. The
1854 * dyld_shared_cache_loadinfo structure has better names, but the same
1855 * layout and content as the original.
1856 *
1857 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
1858 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
1859 * entries; here, it's the slid first mapping, and we leave it that way
1860 * for backwards compatibility.
1861 */
1862 shared_cache_data.sharedCacheSlide = shared_cache_slide;
1863 kdp_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
1864 shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
1865 shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
1866 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
1867
1868 error_exit:
1869 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
1870 *task_snap_ss_flags |= kTaskUUIDInfoMissing;
1871 }
1872
1873 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
1874 *task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
1875 }
1876
1877 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
1878 *task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
1879 }
1880
1881 return error;
1882 }
1883
1884 static kern_return_t
kcdata_record_uuid_info(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 * task_snap_ss_flags)1885 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
1886 {
1887 bool save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
1888 bool save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
1889 bool save_compactinfo_p = ((trace_flags & STACKSHOT_SAVE_DYLD_COMPACTINFO) != 0);
1890 bool should_fault = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
1891
1892 kern_return_t error = KERN_SUCCESS;
1893 mach_vm_address_t out_addr = 0;
1894
1895 mach_vm_address_t dyld_compactinfo_addr = 0;
1896 uint32_t dyld_compactinfo_size = 0;
1897
1898 uint32_t uuid_info_count = 0;
1899 mach_vm_address_t uuid_info_addr = 0;
1900 uint64_t uuid_info_timestamp = 0;
1901 kdp_fault_result_flags_t kdp_fault_results = 0;
1902
1903
1904 assert(task_snap_ss_flags != NULL);
1905
1906 int task_pid = pid_from_task(task);
1907 boolean_t task_64bit_addr = task_has_64Bit_addr(task);
1908
1909 if ((save_loadinfo_p || save_compactinfo_p) && have_pmap && task->active && task_pid > 0) {
1910 /* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
1911 if (task_64bit_addr) {
1912 struct user64_dyld_all_image_infos task_image_infos;
1913 if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1914 sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1915 uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
1916 uuid_info_addr = task_image_infos.uuidArray;
1917 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1918 uuid_info_timestamp = task_image_infos.timestamp;
1919 }
1920 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
1921 dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
1922 dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
1923 }
1924
1925 }
1926 } else {
1927 struct user32_dyld_all_image_infos task_image_infos;
1928 if (stackshot_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1929 sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1930 uuid_info_count = task_image_infos.uuidArrayCount;
1931 uuid_info_addr = task_image_infos.uuidArray;
1932 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1933 uuid_info_timestamp = task_image_infos.timestamp;
1934 }
1935 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_COMPACTINFO_MINIMUM_VERSION) {
1936 dyld_compactinfo_addr = task_image_infos.compact_dyld_image_info_addr;
1937 dyld_compactinfo_size = task_image_infos.compact_dyld_image_info_size;
1938 }
1939 }
1940 }
1941
1942 /*
1943 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
1944 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
1945 * for this task.
1946 */
1947 if (!uuid_info_addr) {
1948 uuid_info_count = 0;
1949 }
1950
1951 if (!dyld_compactinfo_addr) {
1952 dyld_compactinfo_size = 0;
1953 }
1954
1955 }
1956
1957 if (have_pmap && task_pid == 0) {
1958 if (save_kextloadinfo_p && _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
1959 uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
1960 } else {
1961 uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
1962 }
1963 }
1964
1965 if (save_compactinfo_p && task_pid > 0) {
1966 if (dyld_compactinfo_size == 0) {
1967 *task_snap_ss_flags |= kTaskDyldCompactInfoNone;
1968 } else if (dyld_compactinfo_size > MAX_DYLD_COMPACTINFO) {
1969 *task_snap_ss_flags |= kTaskDyldCompactInfoTooBig;
1970 } else {
1971 kdp_fault_result_flags_t ci_kdp_fault_results = 0;
1972
1973 /* Open a compression window to avoid overflowing the stack */
1974 kcdata_compression_window_open(kcd);
1975 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_DYLD_COMPACTINFO,
1976 dyld_compactinfo_size, &out_addr));
1977
1978 if (!stackshot_copyin(task->map, dyld_compactinfo_addr, (void *)out_addr,
1979 dyld_compactinfo_size, should_fault, &ci_kdp_fault_results)) {
1980 bzero((void *)out_addr, dyld_compactinfo_size);
1981 }
1982 if (ci_kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
1983 *task_snap_ss_flags |= kTaskDyldCompactInfoMissing;
1984 }
1985
1986 if (ci_kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
1987 *task_snap_ss_flags |= kTaskDyldCompactInfoTriedFault;
1988 }
1989
1990 if (ci_kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
1991 *task_snap_ss_flags |= kTaskDyldCompactInfoFaultedIn;
1992 }
1993
1994 kcd_exit_on_error(kcdata_compression_window_close(kcd));
1995 }
1996 }
1997 if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
1998 uint32_t copied_uuid_count = 0;
1999 uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
2000 uint32_t uuid_info_array_size = 0;
2001
2002 /* Open a compression window to avoid overflowing the stack */
2003 kcdata_compression_window_open(kcd);
2004
2005 /* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
2006 if (uuid_info_count > 0) {
2007 uuid_info_array_size = uuid_info_count * uuid_info_size;
2008
2009 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
2010 uuid_info_size, uuid_info_count, &out_addr));
2011
2012 if (!stackshot_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
2013 bzero((void *)out_addr, uuid_info_array_size);
2014 } else {
2015 copied_uuid_count = uuid_info_count;
2016 }
2017 }
2018
2019 uuid_t binary_uuid;
2020 if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
2021 /* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
2022 if (uuid_info_array_size == 0) {
2023 /* We just need to store one UUID */
2024 uuid_info_array_size = uuid_info_size;
2025 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
2026 uuid_info_size, 1, &out_addr));
2027 }
2028
2029 if (task_64bit_addr) {
2030 struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
2031 uint64_t image_load_address = task->mach_header_vm_address;
2032
2033 kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
2034 kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
2035 } else {
2036 struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
2037 uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
2038
2039 kdp_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
2040 kdp_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
2041 }
2042 }
2043
2044 kcd_exit_on_error(kcdata_compression_window_close(kcd));
2045 } else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
2046 uintptr_t image_load_address;
2047
2048 do {
2049 #if defined(__arm64__)
2050 if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
2051 struct dyld_uuid_info_64 kc_uuid = {0};
2052 kc_uuid.imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
2053 kdp_memcpy(&kc_uuid.imageUUID, &kernelcache_uuid, sizeof(uuid_t));
2054 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &kc_uuid));
2055 break;
2056 }
2057 #endif /* defined(__arm64__) */
2058
2059 if (!kernel_uuid || !_stackshot_validate_kva((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
2060 /* Kernel UUID not found or inaccessible */
2061 break;
2062 }
2063
2064 uint32_t uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO;
2065 if ((sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info))) {
2066 uuid_type = KCDATA_TYPE_LIBRARY_LOADINFO64;
2067 #if defined(__arm64__)
2068 kc_format_t primary_kc_type = KCFormatUnknown;
2069 if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type == KCFormatFileset)) {
2070 /* return TEXT_EXEC based load information on arm devices running with fileset kernelcaches */
2071 uuid_type = STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC;
2072 }
2073 #endif
2074 }
2075
2076 /*
2077 * The element count of the array can vary - avoid overflowing the
2078 * stack by opening a window.
2079 */
2080 kcdata_compression_window_open(kcd);
2081 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, uuid_type,
2082 sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
2083 kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
2084
2085 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
2086 #if defined(__arm64__)
2087 if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
2088 /* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
2089 extern vm_offset_t segTEXTEXECB;
2090 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(segTEXTEXECB);
2091 }
2092 #endif
2093 uuid_info_array[0].imageLoadAddress = image_load_address;
2094 kdp_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
2095
2096 if (save_kextloadinfo_p &&
2097 _stackshot_validate_kva((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
2098 _stackshot_validate_kva((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
2099 gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
2100 uint32_t kexti;
2101 for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
2102 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
2103 #if defined(__arm64__)
2104 if (uuid_type == STACKSHOT_KCTYPE_LOADINFO64_TEXT_EXEC) {
2105 /* If we're reporting TEXT_EXEC load info, populate the TEXT_EXEC base instead */
2106 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].text_exec_address);
2107 }
2108 #endif
2109 uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
2110 kdp_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
2111 }
2112 }
2113 kcd_exit_on_error(kcdata_compression_window_close(kcd));
2114 } while (0);
2115 }
2116
2117 error_exit:
2118 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
2119 *task_snap_ss_flags |= kTaskUUIDInfoMissing;
2120 }
2121
2122 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
2123 *task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
2124 }
2125
2126 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
2127 *task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
2128 }
2129
2130 return error;
2131 }
2132
2133 static kern_return_t
kcdata_record_task_iostats(kcdata_descriptor_t kcd,task_t task)2134 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
2135 {
2136 kern_return_t error = KERN_SUCCESS;
2137 mach_vm_address_t out_addr = 0;
2138
2139 /* I/O Statistics if any counters are non zero */
2140 assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
2141 if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
2142 /* struct io_stats_snapshot is quite large - avoid overflowing the stack. */
2143 kcdata_compression_window_open(kcd);
2144 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
2145 struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
2146 _iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
2147 _iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
2148 _iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
2149 _iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
2150 _iostat->ss_paging_count = task->task_io_stats->paging.count;
2151 _iostat->ss_paging_size = task->task_io_stats->paging.size;
2152 _iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
2153 _iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
2154 _iostat->ss_metadata_count = task->task_io_stats->metadata.count;
2155 _iostat->ss_metadata_size = task->task_io_stats->metadata.size;
2156 _iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
2157 _iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
2158 for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
2159 _iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
2160 _iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
2161 }
2162 kcd_exit_on_error(kcdata_compression_window_close(kcd));
2163 }
2164
2165
2166 error_exit:
2167 return error;
2168 }
2169
2170 #if CONFIG_PERVASIVE_CPI
2171 static kern_return_t
kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd,task_t task)2172 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
2173 {
2174 struct instrs_cycles_snapshot_v2 instrs_cycles = { 0 };
2175 struct recount_usage usage = { 0 };
2176 struct recount_usage perf_only = { 0 };
2177 recount_task_terminated_usage_perf_only(task, &usage, &perf_only);
2178 instrs_cycles.ics_instructions = recount_usage_instructions(&usage);
2179 instrs_cycles.ics_cycles = recount_usage_cycles(&usage);
2180 instrs_cycles.ics_p_instructions = recount_usage_instructions(&perf_only);
2181 instrs_cycles.ics_p_cycles = recount_usage_cycles(&perf_only);
2182
2183 return kcdata_push_data(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(instrs_cycles), &instrs_cycles);
2184 }
2185 #endif /* CONFIG_PERVASIVE_CPI */
2186
2187 static kern_return_t
kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd,task_t task)2188 kcdata_record_task_cpu_architecture(kcdata_descriptor_t kcd, task_t task)
2189 {
2190 struct stackshot_cpu_architecture cpu_architecture = {0};
2191 int32_t cputype;
2192 int32_t cpusubtype;
2193
2194 proc_archinfo_kdp(get_bsdtask_info(task), &cputype, &cpusubtype);
2195 cpu_architecture.cputype = cputype;
2196 cpu_architecture.cpusubtype = cpusubtype;
2197
2198 return kcdata_push_data(kcd, STACKSHOT_KCTYPE_TASK_CPU_ARCHITECTURE, sizeof(struct stackshot_cpu_architecture), &cpu_architecture);
2199 }
2200
2201 static kern_return_t
kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd,task_t task)2202 kcdata_record_task_codesigning_info(kcdata_descriptor_t kcd, task_t task)
2203 {
2204 struct stackshot_task_codesigning_info codesigning_info = {};
2205 void * bsdtask_info = NULL;
2206 uint32_t trust = 0;
2207 kern_return_t ret = 0;
2208 pmap_t pmap = get_task_pmap(task);
2209 if (task != kernel_task) {
2210 bsdtask_info = get_bsdtask_info(task);
2211 codesigning_info.csflags = proc_getcsflags_kdp(bsdtask_info);
2212 ret = get_trust_level_kdp(pmap, &trust);
2213 if (ret != KERN_SUCCESS) {
2214 trust = KCDATA_INVALID_CS_TRUST_LEVEL;
2215 }
2216 codesigning_info.cs_trust_level = trust;
2217 } else {
2218 return KERN_SUCCESS;
2219 }
2220 return kcdata_push_data(kcd, STACKSHOT_KCTYPE_CODESIGNING_INFO, sizeof(struct stackshot_task_codesigning_info), &codesigning_info);
2221 }
2222 #if CONFIG_TASK_SUSPEND_STATS
2223 static kern_return_t
kcdata_record_task_suspension_info(kcdata_descriptor_t kcd,task_t task)2224 kcdata_record_task_suspension_info(kcdata_descriptor_t kcd, task_t task)
2225 {
2226 kern_return_t ret = KERN_SUCCESS;
2227 struct stackshot_suspension_info suspension_info = {};
2228 task_suspend_stats_data_t suspend_stats;
2229 task_suspend_source_array_t suspend_sources;
2230 struct stackshot_suspension_source suspension_sources[TASK_SUSPEND_SOURCES_MAX];
2231 int i;
2232
2233 if (task == kernel_task) {
2234 return KERN_SUCCESS;
2235 }
2236
2237 ret = task_get_suspend_stats_kdp(task, &suspend_stats);
2238 if (ret != KERN_SUCCESS) {
2239 return ret;
2240 }
2241
2242 suspension_info.tss_count = suspend_stats.tss_count;
2243 suspension_info.tss_duration = suspend_stats.tss_duration;
2244 suspension_info.tss_last_end = suspend_stats.tss_last_end;
2245 suspension_info.tss_last_start = suspend_stats.tss_last_start;
2246 ret = kcdata_push_data(kcd, STACKSHOT_KCTYPE_SUSPENSION_INFO, sizeof(suspension_info), &suspension_info);
2247 if (ret != KERN_SUCCESS) {
2248 return ret;
2249 }
2250
2251 ret = task_get_suspend_sources_kdp(task, suspend_sources);
2252 if (ret != KERN_SUCCESS) {
2253 return ret;
2254 }
2255
2256 for (i = 0; i < TASK_SUSPEND_SOURCES_MAX; ++i) {
2257 suspension_sources[i].tss_pid = suspend_sources[i].tss_pid;
2258 strlcpy(suspension_sources[i].tss_procname, suspend_sources[i].tss_procname, sizeof(suspend_sources[i].tss_procname));
2259 suspension_sources[i].tss_tid = suspend_sources[i].tss_tid;
2260 suspension_sources[i].tss_time = suspend_sources[i].tss_time;
2261 }
2262 return kcdata_push_array(kcd, STACKSHOT_KCTYPE_SUSPENSION_SOURCE, sizeof(suspension_sources[0]), TASK_SUSPEND_SOURCES_MAX, &suspension_sources);
2263 }
2264 #endif /* CONFIG_TASK_SUSPEND_STATS */
2265
2266 static kern_return_t
kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd,task_t task,unaligned_u64 task_snap_ss_flags,uint64_t transition_type)2267 kcdata_record_transitioning_task_snapshot(kcdata_descriptor_t kcd, task_t task, unaligned_u64 task_snap_ss_flags, uint64_t transition_type)
2268 {
2269 kern_return_t error = KERN_SUCCESS;
2270 mach_vm_address_t out_addr = 0;
2271 struct transitioning_task_snapshot * cur_tsnap = NULL;
2272
2273 int task_pid = pid_from_task(task);
2274 /* Is returning -1 ok for terminating task ok ??? */
2275 uint64_t task_uniqueid = get_task_uniqueid(task);
2276
2277 if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
2278 /*
2279 * if this task is a transit task from another one, show the pid as
2280 * negative
2281 */
2282 task_pid = 0 - task_pid;
2283 }
2284
2285 /* the task_snapshot_v2 struct is large - avoid overflowing the stack */
2286 kcdata_compression_window_open(kcd);
2287 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TRANSITIONING_TASK_SNAPSHOT, sizeof(struct transitioning_task_snapshot), &out_addr));
2288 cur_tsnap = (struct transitioning_task_snapshot *)out_addr;
2289 bzero(cur_tsnap, sizeof(*cur_tsnap));
2290
2291 cur_tsnap->tts_unique_pid = task_uniqueid;
2292 cur_tsnap->tts_ss_flags = kcdata_get_task_ss_flags(task);
2293 cur_tsnap->tts_ss_flags |= task_snap_ss_flags;
2294 cur_tsnap->tts_transition_type = transition_type;
2295 cur_tsnap->tts_pid = task_pid;
2296
2297 /* Add the BSD process identifiers */
2298 if (task_pid != -1 && get_bsdtask_info(task) != NULL) {
2299 proc_name_kdp(get_bsdtask_info(task), cur_tsnap->tts_p_comm, sizeof(cur_tsnap->tts_p_comm));
2300 } else {
2301 cur_tsnap->tts_p_comm[0] = '\0';
2302 }
2303
2304 kcd_exit_on_error(kcdata_compression_window_close(kcd));
2305
2306 error_exit:
2307 return error;
2308 }
2309
2310 static kern_return_t
2311 #if STACKSHOT_COLLECTS_LATENCY_INFO
kcdata_record_task_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags,struct stackshot_latency_task * latency_info)2312 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags, struct stackshot_latency_task *latency_info)
2313 #else
2314 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
2315 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2316 {
2317 bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2318 bool collect_iostats = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
2319 #if CONFIG_PERVASIVE_CPI
2320 bool collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
2321 #endif /* CONFIG_PERVASIVE_CPI */
2322 #if __arm64__
2323 bool collect_asid = ((trace_flags & STACKSHOT_ASID) != 0);
2324 #endif
2325 bool collect_pagetables = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
2326
2327
2328 kern_return_t error = KERN_SUCCESS;
2329 mach_vm_address_t out_addr = 0;
2330 struct task_snapshot_v2 * cur_tsnap = NULL;
2331 #if STACKSHOT_COLLECTS_LATENCY_INFO
2332 latency_info->cur_tsnap_latency = mach_absolute_time();
2333 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2334
2335 int task_pid = pid_from_task(task);
2336 uint64_t task_uniqueid = get_task_uniqueid(task);
2337 void *bsd_info = get_bsdtask_info(task);
2338 uint64_t proc_starttime_secs = 0;
2339
2340 if (task_pid && (task_did_exec_internal(task) || task_is_exec_copy_internal(task))) {
2341 /*
2342 * if this task is a transit task from another one, show the pid as
2343 * negative
2344 */
2345 task_pid = 0 - task_pid;
2346 }
2347
2348 /* the task_snapshot_v2 struct is large - avoid overflowing the stack */
2349 kcdata_compression_window_open(kcd);
2350 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
2351 cur_tsnap = (struct task_snapshot_v2 *)out_addr;
2352 bzero(cur_tsnap, sizeof(*cur_tsnap));
2353
2354 cur_tsnap->ts_unique_pid = task_uniqueid;
2355 cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
2356 cur_tsnap->ts_ss_flags |= task_snap_ss_flags;
2357
2358 struct recount_usage term_usage = { 0 };
2359 recount_task_terminated_usage(task, &term_usage);
2360 struct recount_times_mach term_times = recount_usage_times_mach(&term_usage);
2361 cur_tsnap->ts_user_time_in_terminated_threads = term_times.rtm_user;
2362 cur_tsnap->ts_system_time_in_terminated_threads = term_times.rtm_system;
2363
2364 proc_starttime_kdp(bsd_info, &proc_starttime_secs, NULL, NULL);
2365 cur_tsnap->ts_p_start_sec = proc_starttime_secs;
2366 cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
2367 cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
2368 cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
2369 cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
2370
2371 cur_tsnap->ts_suspend_count = task->suspend_count;
2372 cur_tsnap->ts_faults = counter_load(&task->faults);
2373 cur_tsnap->ts_pageins = counter_load(&task->pageins);
2374 cur_tsnap->ts_cow_faults = counter_load(&task->cow_faults);
2375 cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
2376 LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
2377 cur_tsnap->ts_pid = task_pid;
2378
2379 /* Add the BSD process identifiers */
2380 if (task_pid != -1 && bsd_info != NULL) {
2381 proc_name_kdp(bsd_info, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
2382 } else {
2383 cur_tsnap->ts_p_comm[0] = '\0';
2384 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
2385 if (task->task_imp_base != NULL) {
2386 kdp_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
2387 MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
2388 }
2389 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
2390 }
2391
2392 kcd_exit_on_error(kcdata_compression_window_close(kcd));
2393
2394 #if CONFIG_COALITIONS
2395 if (task_pid != -1 && bsd_info != NULL &&
2396 (task->coalition[COALITION_TYPE_JETSAM] != NULL)) {
2397 /*
2398 * The jetsam coalition ID is always saved, even if
2399 * STACKSHOT_SAVE_JETSAM_COALITIONS is not set.
2400 */
2401 uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
2402 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &jetsam_coal_id));
2403 }
2404 #endif /* CONFIG_COALITIONS */
2405
2406 #if __arm64__
2407 if (collect_asid && have_pmap) {
2408 uint32_t asid = PMAP_VASID(task->map->pmap);
2409 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_ASID, sizeof(asid), &asid));
2410 }
2411 #endif
2412
2413 #if STACKSHOT_COLLECTS_LATENCY_INFO
2414 latency_info->cur_tsnap_latency = mach_absolute_time() - latency_info->cur_tsnap_latency;
2415 latency_info->pmap_latency = mach_absolute_time();
2416 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2417
2418 if (collect_pagetables && have_pmap) {
2419 #if SCHED_HYGIENE_DEBUG
2420 // pagetable dumps can be large; reset the interrupt timeout to avoid a panic
2421 ml_spin_debug_clear_self();
2422 #endif
2423 size_t bytes_dumped = 0;
2424 error = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd), stack_snapshot_pagetable_mask, &bytes_dumped);
2425 if (error != KERN_SUCCESS) {
2426 goto error_exit;
2427 } else {
2428 /* Variable size array - better not have it on the stack. */
2429 kcdata_compression_window_open(kcd);
2430 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
2431 sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
2432 kcd_exit_on_error(kcdata_compression_window_close(kcd));
2433 }
2434 }
2435
2436 #if STACKSHOT_COLLECTS_LATENCY_INFO
2437 latency_info->pmap_latency = mach_absolute_time() - latency_info->pmap_latency;
2438 latency_info->bsd_proc_ids_latency = mach_absolute_time();
2439 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2440
2441 #if STACKSHOT_COLLECTS_LATENCY_INFO
2442 latency_info->bsd_proc_ids_latency = mach_absolute_time() - latency_info->bsd_proc_ids_latency;
2443 latency_info->end_latency = mach_absolute_time();
2444 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2445
2446 if (collect_iostats) {
2447 kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
2448 }
2449
2450 #if CONFIG_PERVASIVE_CPI
2451 if (collect_instrs_cycles) {
2452 kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
2453 }
2454 #endif /* CONFIG_PERVASIVE_CPI */
2455
2456 kcd_exit_on_error(kcdata_record_task_cpu_architecture(kcd, task));
2457 kcd_exit_on_error(kcdata_record_task_codesigning_info(kcd, task));
2458
2459 #if CONFIG_TASK_SUSPEND_STATS
2460 kcd_exit_on_error(kcdata_record_task_suspension_info(kcd, task));
2461 #endif /* CONFIG_TASK_SUSPEND_STATS */
2462
2463 #if STACKSHOT_COLLECTS_LATENCY_INFO
2464 latency_info->end_latency = mach_absolute_time() - latency_info->end_latency;
2465 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2466
2467 error_exit:
2468 return error;
2469 }
2470
2471 static kern_return_t
kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd,task_t task,uint64_t trace_flags,boolean_t have_pmap,unaligned_u64 task_snap_ss_flags)2472 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint64_t trace_flags, boolean_t have_pmap, unaligned_u64 task_snap_ss_flags)
2473 {
2474 #if !CONFIG_PERVASIVE_CPI
2475 #pragma unused(trace_flags)
2476 #endif /* !CONFIG_PERVASIVE_CPI */
2477 kern_return_t error = KERN_SUCCESS;
2478 struct task_delta_snapshot_v2 * cur_tsnap = NULL;
2479 mach_vm_address_t out_addr = 0;
2480 (void) trace_flags;
2481 #if __arm64__
2482 boolean_t collect_asid = ((trace_flags & STACKSHOT_ASID) != 0);
2483 #endif
2484 #if CONFIG_PERVASIVE_CPI
2485 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
2486 #endif /* CONFIG_PERVASIVE_CPI */
2487
2488 uint64_t task_uniqueid = get_task_uniqueid(task);
2489
2490 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
2491
2492 cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
2493
2494 cur_tsnap->tds_unique_pid = task_uniqueid;
2495 cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
2496 cur_tsnap->tds_ss_flags |= task_snap_ss_flags;
2497
2498 struct recount_usage usage = { 0 };
2499 recount_task_terminated_usage(task, &usage);
2500 struct recount_times_mach term_times = recount_usage_times_mach(&usage);
2501
2502 cur_tsnap->tds_user_time_in_terminated_threads = term_times.rtm_user;
2503 cur_tsnap->tds_system_time_in_terminated_threads = term_times.rtm_system;
2504
2505 cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
2506
2507 cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
2508 cur_tsnap->tds_suspend_count = task->suspend_count;
2509 cur_tsnap->tds_faults = counter_load(&task->faults);
2510 cur_tsnap->tds_pageins = counter_load(&task->pageins);
2511 cur_tsnap->tds_cow_faults = counter_load(&task->cow_faults);
2512 cur_tsnap->tds_was_throttled = (uint32_t)proc_was_throttled_from_task(task);
2513 cur_tsnap->tds_did_throttle = (uint32_t)proc_did_throttle_from_task(task);
2514 cur_tsnap->tds_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
2515 ? LATENCY_QOS_TIER_UNSPECIFIED
2516 : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
2517
2518 #if __arm64__
2519 if (collect_asid && have_pmap) {
2520 uint32_t asid = PMAP_VASID(task->map->pmap);
2521 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
2522 kdp_memcpy((void*)out_addr, &asid, sizeof(asid));
2523 }
2524 #endif
2525
2526 #if CONFIG_PERVASIVE_CPI
2527 if (collect_instrs_cycles) {
2528 kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
2529 }
2530 #endif /* CONFIG_PERVASIVE_CPI */
2531
2532 error_exit:
2533 return error;
2534 }
2535
2536 static kern_return_t
kcdata_record_thread_iostats(kcdata_descriptor_t kcd,thread_t thread)2537 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
2538 {
2539 kern_return_t error = KERN_SUCCESS;
2540 mach_vm_address_t out_addr = 0;
2541
2542 /* I/O Statistics */
2543 assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
2544 if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
2545 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
2546 struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
2547 _iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
2548 _iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
2549 _iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
2550 _iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
2551 _iostat->ss_paging_count = thread->thread_io_stats->paging.count;
2552 _iostat->ss_paging_size = thread->thread_io_stats->paging.size;
2553 _iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
2554 _iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
2555 _iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
2556 _iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
2557 _iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
2558 _iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
2559 for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
2560 _iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
2561 _iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
2562 }
2563 }
2564
2565 error_exit:
2566 return error;
2567 }
2568
2569 bool
machine_trace_thread_validate_kva(vm_offset_t addr)2570 machine_trace_thread_validate_kva(vm_offset_t addr)
2571 {
2572 return _stackshot_validate_kva(addr, sizeof(uintptr_t));
2573 }
2574
2575 struct _stackshot_backtrace_context {
2576 vm_map_t sbc_map;
2577 vm_offset_t sbc_prev_page;
2578 vm_offset_t sbc_prev_kva;
2579 uint32_t sbc_flags;
2580 bool sbc_allow_faulting;
2581 };
2582
2583 static errno_t
_stackshot_backtrace_copy(void * vctx,void * dst,user_addr_t src,size_t size)2584 _stackshot_backtrace_copy(void *vctx, void *dst, user_addr_t src, size_t size)
2585 {
2586 struct _stackshot_backtrace_context *ctx = vctx;
2587 size_t map_page_mask = 0;
2588 size_t __assert_only map_page_size = kdp_vm_map_get_page_size(ctx->sbc_map,
2589 &map_page_mask);
2590 assert(size < map_page_size);
2591 if (src & (size - 1)) {
2592 // The source should be aligned to the size passed in, like a stack
2593 // frame or word.
2594 return EINVAL;
2595 }
2596
2597 vm_offset_t src_page = src & ~map_page_mask;
2598 vm_offset_t src_kva = 0;
2599
2600 if (src_page != ctx->sbc_prev_page) {
2601 uint32_t res = 0;
2602 uint32_t flags = 0;
2603 vm_offset_t src_pa = stackshot_find_phys(ctx->sbc_map, src,
2604 ctx->sbc_allow_faulting, &res);
2605
2606 flags |= (res & KDP_FAULT_RESULT_PAGED_OUT) ? kThreadTruncatedBT : 0;
2607 flags |= (res & KDP_FAULT_RESULT_TRIED_FAULT) ? kThreadTriedFaultBT : 0;
2608 flags |= (res & KDP_FAULT_RESULT_FAULTED_IN) ? kThreadFaultedBT : 0;
2609 ctx->sbc_flags |= flags;
2610 if (src_pa == 0) {
2611 return EFAULT;
2612 }
2613
2614 src_kva = phystokv(src_pa);
2615 ctx->sbc_prev_page = src_page;
2616 ctx->sbc_prev_kva = (src_kva & ~map_page_mask);
2617 } else {
2618 src_kva = ctx->sbc_prev_kva + (src & map_page_mask);
2619 }
2620
2621 #if KASAN
2622 /*
2623 * KASan does not monitor accesses to userspace pages. Therefore, it is
2624 * pointless to maintain a shadow map for them. Instead, they are all
2625 * mapped to a single, always valid shadow map page. This approach saves
2626 * a considerable amount of shadow map pages which are limited and
2627 * precious.
2628 */
2629 kasan_notify_address_nopoison(src_kva, size);
2630 #endif
2631 memcpy(dst, (const void *)src_kva, size);
2632
2633 return 0;
2634 }
2635
2636 #if CONFIG_EXCLAVES
2637
2638 /* Return index of last xnu frame before secure world. Valid frame index is
2639 * always in range <0, nframes-1>. When frame is not found, return nframes
2640 * value. */
2641 static uint32_t
kdp_exclave_stack_offset(uintptr_t * out_addr,size_t nframes)2642 kdp_exclave_stack_offset(uintptr_t * out_addr, size_t nframes)
2643 {
2644 size_t i = 0;
2645 while (i < nframes &&
2646 !((exclaves_enter_range_start < out_addr[i]) && (out_addr[i] <= exclaves_enter_range_end))
2647 && !((exclaves_upcall_range_start < out_addr[i]) && (out_addr[i] <= exclaves_upcall_range_end))
2648 ) {
2649 i++;
2650 }
2651
2652 return (uint32_t)i;
2653 }
2654 #endif /* CONFIG_EXCLAVES */
2655
2656 static kern_return_t
kcdata_record_thread_snapshot(kcdata_descriptor_t kcd,thread_t thread,task_t task,uint64_t trace_flags,boolean_t have_pmap,boolean_t thread_on_core)2657 kcdata_record_thread_snapshot(
2658 kcdata_descriptor_t kcd, thread_t thread, task_t task, uint64_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
2659 {
2660 boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
2661 boolean_t active_kthreads_only_p = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
2662 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2663 boolean_t collect_iostats = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
2664 #if CONFIG_PERVASIVE_CPI
2665 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
2666 #endif /* CONFIG_PERVASIVE_CPI */
2667 kern_return_t error = KERN_SUCCESS;
2668
2669 #if STACKSHOT_COLLECTS_LATENCY_INFO
2670 struct stackshot_latency_thread latency_info;
2671 latency_info.cur_thsnap1_latency = mach_absolute_time();
2672 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2673
2674 mach_vm_address_t out_addr = 0;
2675 int saved_count = 0;
2676
2677 struct thread_snapshot_v4 * cur_thread_snap = NULL;
2678 char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
2679
2680 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
2681 cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
2682
2683 /* Populate the thread snapshot header */
2684 cur_thread_snap->ths_ss_flags = 0;
2685 cur_thread_snap->ths_thread_id = thread_tid(thread);
2686 cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
2687 cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
2688 cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
2689
2690 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
2691 cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
2692 } else {
2693 cur_thread_snap->ths_voucher_identifier = 0;
2694 }
2695
2696 #if STACKSHOT_COLLECTS_LATENCY_INFO
2697 latency_info.cur_thsnap1_latency = mach_absolute_time() - latency_info.cur_thsnap1_latency;
2698 latency_info.dispatch_serial_latency = mach_absolute_time();
2699 latency_info.dispatch_label_latency = 0;
2700 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2701
2702 cur_thread_snap->ths_dqserialnum = 0;
2703 if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
2704 uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
2705 if (dqkeyaddr != 0) {
2706 uint64_t dqaddr = 0;
2707 boolean_t copyin_ok = stackshot_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
2708 if (copyin_ok && dqaddr != 0) {
2709 uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
2710 uint64_t dqserialnum = 0;
2711 copyin_ok = stackshot_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
2712 if (copyin_ok) {
2713 cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
2714 cur_thread_snap->ths_dqserialnum = dqserialnum;
2715 }
2716
2717 #if STACKSHOT_COLLECTS_LATENCY_INFO
2718 latency_info.dispatch_serial_latency = mach_absolute_time() - latency_info.dispatch_serial_latency;
2719 latency_info.dispatch_label_latency = mach_absolute_time();
2720 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2721
2722 /* try copying in the queue label */
2723 uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
2724 if (label_offs) {
2725 uint64_t dqlabeladdr = dqaddr + label_offs;
2726 uint64_t actual_dqlabeladdr = 0;
2727
2728 copyin_ok = stackshot_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
2729 if (copyin_ok && actual_dqlabeladdr != 0) {
2730 char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
2731 int len;
2732
2733 bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
2734 len = stackshot_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
2735 if (len > 0) {
2736 mach_vm_address_t label_addr = 0;
2737 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
2738 kdp_strlcpy((char*)label_addr, &label_buf[0], len);
2739 }
2740 }
2741 }
2742 #if STACKSHOT_COLLECTS_LATENCY_INFO
2743 latency_info.dispatch_label_latency = mach_absolute_time() - latency_info.dispatch_label_latency;
2744 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2745 }
2746 }
2747 }
2748
2749 #if STACKSHOT_COLLECTS_LATENCY_INFO
2750 if ((cur_thread_snap->ths_ss_flags & kHasDispatchSerial) == 0) {
2751 latency_info.dispatch_serial_latency = 0;
2752 }
2753 latency_info.cur_thsnap2_latency = mach_absolute_time();
2754 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2755
2756 struct recount_times_mach times = recount_thread_times(thread);
2757 cur_thread_snap->ths_user_time = times.rtm_user;
2758 cur_thread_snap->ths_sys_time = times.rtm_system;
2759
2760 if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
2761 cur_thread_snap->ths_ss_flags |= kThreadMain;
2762 }
2763 if (thread->effective_policy.thep_darwinbg) {
2764 cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
2765 }
2766 if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
2767 cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
2768 }
2769 if (thread->suspend_count > 0) {
2770 cur_thread_snap->ths_ss_flags |= kThreadSuspended;
2771 }
2772 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
2773 cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
2774 }
2775 #if CONFIG_EXCLAVES
2776 if ((thread->th_exclaves_state & TH_EXCLAVES_RPC) && stackshot_exclave_inspect_ctids && !panic_stackshot) {
2777 /* save exclave thread for later collection */
2778 if (stackshot_exclave_inspect_ctid_count < stackshot_exclave_inspect_ctid_capacity) {
2779 /* certain threads, like the collector, must never be inspected */
2780 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) == 0) {
2781 stackshot_exclave_inspect_ctids[stackshot_exclave_inspect_ctid_count] = thread_get_ctid(thread);
2782 stackshot_exclave_inspect_ctid_count += 1;
2783 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_STACKSHOT) != 0) {
2784 panic("stackshot: trying to inspect already-queued thread");
2785 }
2786 }
2787 }
2788 }
2789 #endif /* CONFIG_EXCLAVES */
2790 if (thread_on_core) {
2791 cur_thread_snap->ths_ss_flags |= kThreadOnCore;
2792 }
2793 if (stackshot_thread_is_idle_worker_unsafe(thread)) {
2794 cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
2795 }
2796
2797 /* make sure state flags defined in kcdata.h still match internal flags */
2798 static_assert(SS_TH_WAIT == TH_WAIT);
2799 static_assert(SS_TH_SUSP == TH_SUSP);
2800 static_assert(SS_TH_RUN == TH_RUN);
2801 static_assert(SS_TH_UNINT == TH_UNINT);
2802 static_assert(SS_TH_TERMINATE == TH_TERMINATE);
2803 static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
2804 static_assert(SS_TH_IDLE == TH_IDLE);
2805
2806 cur_thread_snap->ths_last_run_time = thread->last_run_time;
2807 cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
2808 cur_thread_snap->ths_state = thread->state;
2809 cur_thread_snap->ths_sched_flags = thread->sched_flags;
2810 cur_thread_snap->ths_base_priority = thread->base_pri;
2811 cur_thread_snap->ths_sched_priority = thread->sched_pri;
2812 cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
2813 cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
2814 cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
2815 thread->requested_policy.thrp_qos_workq_override);
2816 cur_thread_snap->ths_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
2817 cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
2818
2819 static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
2820 static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
2821 cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
2822 cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
2823
2824 #if STACKSHOT_COLLECTS_LATENCY_INFO
2825 latency_info.cur_thsnap2_latency = mach_absolute_time() - latency_info.cur_thsnap2_latency;
2826 latency_info.thread_name_latency = mach_absolute_time();
2827 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2828
2829 /* if there is thread name then add to buffer */
2830 cur_thread_name[0] = '\0';
2831 proc_threadname_kdp(get_bsdthread_info(thread), cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
2832 if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
2833 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
2834 kdp_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
2835 }
2836
2837 #if STACKSHOT_COLLECTS_LATENCY_INFO
2838 latency_info.thread_name_latency = mach_absolute_time() - latency_info.thread_name_latency;
2839 latency_info.sur_times_latency = mach_absolute_time();
2840 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2841
2842 /* record system, user, and runnable times */
2843 time_value_t runnable_time;
2844 thread_read_times(thread, NULL, NULL, &runnable_time);
2845 clock_sec_t user_sec = 0, system_sec = 0;
2846 clock_usec_t user_usec = 0, system_usec = 0;
2847 absolutetime_to_microtime(times.rtm_user, &user_sec, &user_usec);
2848 absolutetime_to_microtime(times.rtm_system, &system_sec, &system_usec);
2849
2850 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
2851 struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
2852 *stackshot_cpu_times = (struct stackshot_cpu_times_v2){
2853 .user_usec = user_sec * USEC_PER_SEC + user_usec,
2854 .system_usec = system_sec * USEC_PER_SEC + system_usec,
2855 .runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
2856 };
2857
2858 #if STACKSHOT_COLLECTS_LATENCY_INFO
2859 latency_info.sur_times_latency = mach_absolute_time() - latency_info.sur_times_latency;
2860 latency_info.user_stack_latency = mach_absolute_time();
2861 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2862
2863 /* Trace user stack, if any */
2864 if (!active_kthreads_only_p && task->active && task->map != kernel_map) {
2865 uint32_t user_ths_ss_flags = 0;
2866
2867 /*
2868 * This relies on knowing the "end" address points to the start of the
2869 * next elements data and, in the case of arrays, the elements.
2870 */
2871 out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2872 mach_vm_address_t max_addr = (mach_vm_address_t)kcd_max_address(kcd);
2873 assert(out_addr <= max_addr);
2874 size_t avail_frames = (max_addr - out_addr) / sizeof(uintptr_t);
2875 size_t max_frames = MIN(avail_frames, MAX_FRAMES);
2876 if (max_frames == 0) {
2877 error = KERN_RESOURCE_SHORTAGE;
2878 goto error_exit;
2879 }
2880 struct _stackshot_backtrace_context ctx = {
2881 .sbc_map = task->map,
2882 .sbc_allow_faulting = stack_enable_faulting,
2883 .sbc_prev_page = -1,
2884 .sbc_prev_kva = -1,
2885 };
2886 struct backtrace_control ctl = {
2887 .btc_user_thread = thread,
2888 .btc_user_copy = _stackshot_backtrace_copy,
2889 .btc_user_copy_context = &ctx,
2890 };
2891 struct backtrace_user_info info = BTUINFO_INIT;
2892
2893 saved_count = backtrace_user((uintptr_t *)out_addr, max_frames, &ctl,
2894 &info);
2895 if (saved_count > 0) {
2896 #if __LP64__
2897 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR64
2898 #else // __LP64__
2899 #define STACKLR_WORDS STACKSHOT_KCTYPE_USER_STACKLR
2900 #endif // !__LP64__
2901 mach_vm_address_t out_addr_array;
2902 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd,
2903 STACKLR_WORDS, sizeof(uintptr_t), saved_count,
2904 &out_addr_array));
2905 /*
2906 * Ensure the kcd_end_address (above) trick worked.
2907 */
2908 assert(out_addr == out_addr_array);
2909 if (info.btui_info & BTI_64_BIT) {
2910 user_ths_ss_flags |= kUser64_p;
2911 }
2912 if ((info.btui_info & BTI_TRUNCATED) ||
2913 (ctx.sbc_flags & kThreadTruncatedBT)) {
2914 user_ths_ss_flags |= kThreadTruncatedBT;
2915 user_ths_ss_flags |= kThreadTruncUserBT;
2916 }
2917 user_ths_ss_flags |= ctx.sbc_flags;
2918 ctx.sbc_flags = 0;
2919 #if __LP64__
2920 /* We only support async stacks on 64-bit kernels */
2921 if (info.btui_async_frame_addr != 0) {
2922 uint32_t async_start_offset = info.btui_async_start_index;
2923 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_USER_ASYNC_START_INDEX,
2924 sizeof(async_start_offset), &async_start_offset));
2925 out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2926 assert(out_addr <= max_addr);
2927
2928 avail_frames = (max_addr - out_addr) / sizeof(uintptr_t);
2929 max_frames = MIN(avail_frames, MAX_FRAMES);
2930 if (max_frames == 0) {
2931 error = KERN_RESOURCE_SHORTAGE;
2932 goto error_exit;
2933 }
2934 ctl.btc_frame_addr = info.btui_async_frame_addr;
2935 ctl.btc_addr_offset = BTCTL_ASYNC_ADDR_OFFSET;
2936 info = BTUINFO_INIT;
2937 unsigned int async_count = backtrace_user((uintptr_t *)out_addr, max_frames, &ctl,
2938 &info);
2939 if (async_count > 0) {
2940 mach_vm_address_t async_out_addr;
2941 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd,
2942 STACKSHOT_KCTYPE_USER_ASYNC_STACKLR64, sizeof(uintptr_t), async_count,
2943 &async_out_addr));
2944 /*
2945 * Ensure the kcd_end_address (above) trick worked.
2946 */
2947 assert(out_addr == async_out_addr);
2948 if ((info.btui_info & BTI_TRUNCATED) ||
2949 (ctx.sbc_flags & kThreadTruncatedBT)) {
2950 user_ths_ss_flags |= kThreadTruncatedBT;
2951 user_ths_ss_flags |= kThreadTruncUserAsyncBT;
2952 }
2953 user_ths_ss_flags |= ctx.sbc_flags;
2954 }
2955 }
2956 #endif /* _LP64 */
2957 }
2958 if (user_ths_ss_flags != 0) {
2959 cur_thread_snap->ths_ss_flags |= user_ths_ss_flags;
2960 }
2961 }
2962
2963 #if STACKSHOT_COLLECTS_LATENCY_INFO
2964 latency_info.user_stack_latency = mach_absolute_time() - latency_info.user_stack_latency;
2965 latency_info.kernel_stack_latency = mach_absolute_time();
2966 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
2967
2968 /* Call through to the machine specific trace routines
2969 * Frames are added past the snapshot header.
2970 */
2971 if (thread->kernel_stack != 0) {
2972 uint32_t kern_ths_ss_flags = 0;
2973 out_addr = (mach_vm_address_t)kcd_end_address(kcd);
2974 #if defined(__LP64__)
2975 uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR64;
2976 extern int machine_trace_thread64(thread_t thread, char *tracepos,
2977 char *tracebound, int nframes, uint32_t *thread_trace_flags);
2978 saved_count = machine_trace_thread64(
2979 #else
2980 uint32_t stack_kcdata_type = STACKSHOT_KCTYPE_KERN_STACKLR;
2981 extern int machine_trace_thread(thread_t thread, char *tracepos,
2982 char *tracebound, int nframes, uint32_t *thread_trace_flags);
2983 saved_count = machine_trace_thread(
2984 #endif
2985 thread, (char *)out_addr, (char *)kcd_max_address(kcd), MAX_FRAMES,
2986 &kern_ths_ss_flags);
2987 if (saved_count > 0) {
2988 int frame_size = sizeof(uintptr_t);
2989 #if defined(__LP64__)
2990 cur_thread_snap->ths_ss_flags |= kKernel64_p;
2991 #endif
2992 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, stack_kcdata_type,
2993 frame_size, saved_count / frame_size, &out_addr));
2994 #if CONFIG_EXCLAVES
2995 if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
2996 struct thread_exclaves_info info = { 0 };
2997
2998 info.tei_flags = kExclaveRPCActive;
2999 if (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) {
3000 info.tei_flags |= kExclaveSchedulerRequest;
3001 }
3002 if (thread->th_exclaves_state & TH_EXCLAVES_UPCALL) {
3003 info.tei_flags |= kExclaveUpcallActive;
3004 }
3005 info.tei_scid = thread->th_exclaves_scheduling_context_id;
3006 info.tei_thread_offset = kdp_exclave_stack_offset((uintptr_t *)out_addr, saved_count / frame_size);
3007
3008 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_KERN_EXCLAVES_THREADINFO, sizeof(struct thread_exclaves_info), &info));
3009 }
3010 #endif /* CONFIG_EXCLAVES */
3011 }
3012 if (kern_ths_ss_flags & kThreadTruncatedBT) {
3013 kern_ths_ss_flags |= kThreadTruncKernBT;
3014 }
3015 if (kern_ths_ss_flags != 0) {
3016 cur_thread_snap->ths_ss_flags |= kern_ths_ss_flags;
3017 }
3018 }
3019
3020 #if STACKSHOT_COLLECTS_LATENCY_INFO
3021 latency_info.kernel_stack_latency = mach_absolute_time() - latency_info.kernel_stack_latency;
3022 latency_info.misc_latency = mach_absolute_time();
3023 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3024
3025 #if CONFIG_THREAD_GROUPS
3026 if (trace_flags & STACKSHOT_THREAD_GROUP) {
3027 uint64_t thread_group_id = thread->thread_group ? thread_group_get_id(thread->thread_group) : 0;
3028 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_GROUP, sizeof(thread_group_id), &out_addr));
3029 kdp_memcpy((void*)out_addr, &thread_group_id, sizeof(uint64_t));
3030 }
3031 #endif /* CONFIG_THREAD_GROUPS */
3032
3033 if (collect_iostats) {
3034 kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
3035 }
3036
3037 #if CONFIG_PERVASIVE_CPI
3038 if (collect_instrs_cycles) {
3039 struct recount_usage usage = { 0 };
3040 recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
3041 &usage);
3042
3043 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
3044 struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
3045 instrs_cycles->ics_instructions = recount_usage_instructions(&usage);
3046 instrs_cycles->ics_cycles = recount_usage_cycles(&usage);
3047 }
3048 #endif /* CONFIG_PERVASIVE_CPI */
3049
3050 #if STACKSHOT_COLLECTS_LATENCY_INFO
3051 latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
3052 if (collect_latency_info) {
3053 kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_LATENCY_INFO_THREAD, sizeof(latency_info), &latency_info));
3054 }
3055 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3056
3057 error_exit:
3058 return error;
3059 }
3060
3061 static int
kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap,thread_t thread,boolean_t thread_on_core)3062 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
3063 {
3064 cur_thread_snap->tds_thread_id = thread_tid(thread);
3065 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
3066 cur_thread_snap->tds_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
3067 } else {
3068 cur_thread_snap->tds_voucher_identifier = 0;
3069 }
3070
3071 cur_thread_snap->tds_ss_flags = 0;
3072 if (thread->effective_policy.thep_darwinbg) {
3073 cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
3074 }
3075 if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
3076 cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
3077 }
3078 if (thread->suspend_count > 0) {
3079 cur_thread_snap->tds_ss_flags |= kThreadSuspended;
3080 }
3081 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
3082 cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
3083 }
3084 if (thread_on_core) {
3085 cur_thread_snap->tds_ss_flags |= kThreadOnCore;
3086 }
3087 if (stackshot_thread_is_idle_worker_unsafe(thread)) {
3088 cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
3089 }
3090
3091 cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
3092 cur_thread_snap->tds_state = thread->state;
3093 cur_thread_snap->tds_sched_flags = thread->sched_flags;
3094 cur_thread_snap->tds_base_priority = thread->base_pri;
3095 cur_thread_snap->tds_sched_priority = thread->sched_pri;
3096 cur_thread_snap->tds_eqos = thread->effective_policy.thep_qos;
3097 cur_thread_snap->tds_rqos = thread->requested_policy.thrp_qos;
3098 cur_thread_snap->tds_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
3099 thread->requested_policy.thrp_qos_workq_override);
3100 cur_thread_snap->tds_io_tier = (uint8_t) proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
3101
3102 static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
3103 static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
3104 cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
3105 cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
3106
3107 return 0;
3108 }
3109
3110 /*
3111 * Why 12? 12 strikes a decent balance between allocating a large array on
3112 * the stack and having large kcdata item overheads for recording nonrunable
3113 * tasks.
3114 */
3115 #define UNIQUEIDSPERFLUSH 12
3116
3117 struct saved_uniqueids {
3118 uint64_t ids[UNIQUEIDSPERFLUSH];
3119 unsigned count;
3120 };
3121
3122 enum thread_classification {
3123 tc_full_snapshot, /* take a full snapshot */
3124 tc_delta_snapshot, /* take a delta snapshot */
3125 };
3126
3127 static enum thread_classification
classify_thread(thread_t thread,boolean_t * thread_on_core_p,boolean_t collect_delta_stackshot)3128 classify_thread(thread_t thread, boolean_t * thread_on_core_p, boolean_t collect_delta_stackshot)
3129 {
3130 processor_t last_processor = thread->last_processor;
3131
3132 boolean_t thread_on_core = FALSE;
3133 if (last_processor != PROCESSOR_NULL) {
3134 /* Idle threads are always treated as on-core, since the processor state can change while they are running. */
3135 thread_on_core = (thread == last_processor->idle_thread) ||
3136 ((last_processor->state == PROCESSOR_SHUTDOWN || last_processor->state == PROCESSOR_RUNNING) &&
3137 last_processor->active_thread == thread);
3138 }
3139
3140 *thread_on_core_p = thread_on_core;
3141
3142 /* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
3143 * previous full stackshot */
3144 if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stack_snapshot_delta_since_timestamp)) {
3145 return tc_full_snapshot;
3146 } else {
3147 return tc_delta_snapshot;
3148 }
3149 }
3150
3151 struct stackshot_context {
3152 int pid;
3153 uint64_t trace_flags;
3154 bool include_drivers;
3155 };
3156
3157 static kern_return_t
kdp_stackshot_record_task(struct stackshot_context * ctx,task_t task)3158 kdp_stackshot_record_task(struct stackshot_context *ctx, task_t task)
3159 {
3160 boolean_t active_kthreads_only_p = ((ctx->trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
3161 boolean_t save_donating_pids_p = ((ctx->trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
3162 boolean_t collect_delta_stackshot = ((ctx->trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3163 boolean_t save_owner_info = ((ctx->trace_flags & STACKSHOT_THREAD_WAITINFO) != 0);
3164
3165 kern_return_t error = KERN_SUCCESS;
3166 mach_vm_address_t out_addr = 0;
3167 int saved_count = 0;
3168
3169 int task_pid = 0;
3170 uint64_t task_uniqueid = 0;
3171 int num_delta_thread_snapshots = 0;
3172 int num_waitinfo_threads = 0;
3173 int num_turnstileinfo_threads = 0;
3174
3175 uint64_t task_start_abstime = 0;
3176 boolean_t have_map = FALSE, have_pmap = FALSE;
3177 boolean_t some_thread_ran = FALSE;
3178 unaligned_u64 task_snap_ss_flags = 0;
3179 #if STACKSHOT_COLLECTS_LATENCY_INFO
3180 struct stackshot_latency_task latency_info;
3181 latency_info.setup_latency = mach_absolute_time();
3182 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3183
3184 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3185 uint64_t task_begin_cpu_cycle_count = 0;
3186 if (!panic_stackshot) {
3187 task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3188 }
3189 #endif
3190
3191 if ((task == NULL) || !_stackshot_validate_kva((vm_offset_t)task, sizeof(struct task))) {
3192 error = KERN_FAILURE;
3193 goto error_exit;
3194 }
3195
3196 void *bsd_info = get_bsdtask_info(task);
3197 boolean_t task_in_teardown = (bsd_info == NULL) || proc_in_teardown(bsd_info);// has P_LPEXIT set during proc_exit()
3198 boolean_t task_in_transition = task_in_teardown; // here we can add other types of transition.
3199 uint32_t container_type = (task_in_transition) ? STACKSHOT_KCCONTAINER_TRANSITIONING_TASK : STACKSHOT_KCCONTAINER_TASK;
3200 uint32_t transition_type = (task_in_teardown) ? kTaskIsTerminated : 0;
3201
3202 if (task_in_transition) {
3203 collect_delta_stackshot = FALSE;
3204 }
3205
3206 have_map = (task->map != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map), sizeof(struct _vm_map)));
3207 have_pmap = have_map && (task->map->pmap != NULL) && (_stackshot_validate_kva((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
3208
3209 task_pid = pid_from_task(task);
3210 /* Is returning -1 ok for terminating task ok ??? */
3211 task_uniqueid = get_task_uniqueid(task);
3212
3213 if (!task->active || task_is_a_corpse(task) || task_is_a_corpse_fork(task)) {
3214 /*
3215 * Not interested in terminated tasks without threads.
3216 */
3217 if (queue_empty(&task->threads) || task_pid == -1) {
3218 return KERN_SUCCESS;
3219 }
3220 }
3221
3222 /* All PIDs should have the MSB unset */
3223 assert((task_pid & (1ULL << 31)) == 0);
3224
3225 #if STACKSHOT_COLLECTS_LATENCY_INFO
3226 latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
3227 latency_info.task_uniqueid = task_uniqueid;
3228 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3229
3230 /* Trace everything, unless a process was specified. Add in driver tasks if requested. */
3231 if ((ctx->pid == -1) || (ctx->pid == task_pid) || (ctx->include_drivers && task_is_driver(task))) {
3232 /* add task snapshot marker */
3233 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3234 container_type, task_uniqueid));
3235
3236 if (collect_delta_stackshot) {
3237 /*
3238 * For delta stackshots we need to know if a thread from this task has run since the
3239 * previous timestamp to decide whether we're going to record a full snapshot and UUID info.
3240 */
3241 thread_t thread = THREAD_NULL;
3242 queue_iterate(&task->threads, thread, thread_t, task_threads)
3243 {
3244 if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
3245 error = KERN_FAILURE;
3246 goto error_exit;
3247 }
3248
3249 if (active_kthreads_only_p && thread->kernel_stack == 0) {
3250 continue;
3251 }
3252
3253 boolean_t thread_on_core;
3254 enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
3255
3256 switch (thread_classification) {
3257 case tc_full_snapshot:
3258 some_thread_ran = TRUE;
3259 break;
3260 case tc_delta_snapshot:
3261 num_delta_thread_snapshots++;
3262 break;
3263 }
3264 }
3265 }
3266
3267 if (collect_delta_stackshot) {
3268 proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &task_start_abstime);
3269 }
3270
3271 /* Next record any relevant UUID info and store the task snapshot */
3272 if (task_in_transition ||
3273 !collect_delta_stackshot ||
3274 (task_start_abstime == 0) ||
3275 (task_start_abstime > stack_snapshot_delta_since_timestamp) ||
3276 some_thread_ran) {
3277 /*
3278 * Collect full task information in these scenarios:
3279 *
3280 * 1) a full stackshot or the task is in transition
3281 * 2) a delta stackshot where the task started after the previous full stackshot
3282 * 3) a delta stackshot where any thread from the task has run since the previous full stackshot
3283 *
3284 * because the task may have exec'ed, changing its name, architecture, load info, etc
3285 */
3286
3287 kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, &task_snap_ss_flags));
3288 kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, &task_snap_ss_flags));
3289 #if STACKSHOT_COLLECTS_LATENCY_INFO
3290 if (!task_in_transition) {
3291 kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags, &latency_info));
3292 } else {
3293 kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
3294 }
3295 #else
3296 if (!task_in_transition) {
3297 kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
3298 } else {
3299 kcd_exit_on_error(kcdata_record_transitioning_task_snapshot(stackshot_kcdata_p, task, task_snap_ss_flags, transition_type));
3300 }
3301 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3302 } else {
3303 kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
3304 }
3305
3306 #if STACKSHOT_COLLECTS_LATENCY_INFO
3307 latency_info.misc_latency = mach_absolute_time();
3308 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3309
3310 struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
3311 int current_delta_snapshot_index = 0;
3312 if (num_delta_thread_snapshots > 0) {
3313 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
3314 sizeof(struct thread_delta_snapshot_v3),
3315 num_delta_thread_snapshots, &out_addr));
3316 delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
3317 }
3318
3319 #if STACKSHOT_COLLECTS_LATENCY_INFO
3320 latency_info.task_thread_count_loop_latency = mach_absolute_time();
3321 #endif
3322 /*
3323 * Iterate over the task threads to save thread snapshots and determine
3324 * how much space we need for waitinfo and turnstile info
3325 */
3326 thread_t thread = THREAD_NULL;
3327 queue_iterate(&task->threads, thread, thread_t, task_threads)
3328 {
3329 if ((thread == NULL) || !_stackshot_validate_kva((vm_offset_t)thread, sizeof(struct thread))) {
3330 error = KERN_FAILURE;
3331 goto error_exit;
3332 }
3333
3334 uint64_t thread_uniqueid;
3335 if (active_kthreads_only_p && thread->kernel_stack == 0) {
3336 continue;
3337 }
3338 thread_uniqueid = thread_tid(thread);
3339
3340 boolean_t thread_on_core;
3341 enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, collect_delta_stackshot);
3342
3343 switch (thread_classification) {
3344 case tc_full_snapshot:
3345 /* add thread marker */
3346 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3347 STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
3348
3349 /* thread snapshot can be large, including strings, avoid overflowing the stack. */
3350 kcdata_compression_window_open(stackshot_kcdata_p);
3351
3352 kcd_exit_on_error(kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, ctx->trace_flags, have_pmap, thread_on_core));
3353
3354 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3355
3356 /* mark end of thread snapshot data */
3357 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3358 STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
3359 break;
3360 case tc_delta_snapshot:
3361 kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++], thread, thread_on_core));
3362 break;
3363 }
3364
3365 /*
3366 * We want to report owner information regardless of whether a thread
3367 * has changed since the last delta, whether it's a normal stackshot,
3368 * or whether it's nonrunnable
3369 */
3370 if (save_owner_info) {
3371 if (stackshot_thread_has_valid_waitinfo(thread)) {
3372 num_waitinfo_threads++;
3373 }
3374
3375 if (stackshot_thread_has_valid_turnstileinfo(thread)) {
3376 num_turnstileinfo_threads++;
3377 }
3378 }
3379 }
3380 #if STACKSHOT_COLLECTS_LATENCY_INFO
3381 latency_info.task_thread_count_loop_latency = mach_absolute_time() - latency_info.task_thread_count_loop_latency;
3382 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3383
3384
3385 thread_waitinfo_v2_t *thread_waitinfo = NULL;
3386 thread_turnstileinfo_v2_t *thread_turnstileinfo = NULL;
3387 int current_waitinfo_index = 0;
3388 int current_turnstileinfo_index = 0;
3389 /* allocate space for the wait and turnstil info */
3390 if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
3391 /* thread waitinfo and turnstileinfo can be quite large, avoid overflowing the stack */
3392 kcdata_compression_window_open(stackshot_kcdata_p);
3393
3394 if (num_waitinfo_threads > 0) {
3395 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
3396 sizeof(thread_waitinfo_v2_t), num_waitinfo_threads, &out_addr));
3397 thread_waitinfo = (thread_waitinfo_v2_t *)out_addr;
3398 }
3399
3400 if (num_turnstileinfo_threads > 0) {
3401 /* get space for the turnstile info */
3402 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
3403 sizeof(thread_turnstileinfo_v2_t), num_turnstileinfo_threads, &out_addr));
3404 thread_turnstileinfo = (thread_turnstileinfo_v2_t *)out_addr;
3405 }
3406
3407 stackshot_plh_resetgen(); // so we know which portlabel_ids are referenced
3408 }
3409
3410 #if STACKSHOT_COLLECTS_LATENCY_INFO
3411 latency_info.misc_latency = mach_absolute_time() - latency_info.misc_latency;
3412 latency_info.task_thread_data_loop_latency = mach_absolute_time();
3413 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3414
3415 /* Iterate over the task's threads to save the wait and turnstile info */
3416 queue_iterate(&task->threads, thread, thread_t, task_threads)
3417 {
3418 uint64_t thread_uniqueid;
3419
3420 if (active_kthreads_only_p && thread->kernel_stack == 0) {
3421 continue;
3422 }
3423
3424 thread_uniqueid = thread_tid(thread);
3425
3426 /* If we want owner info, we should capture it regardless of its classification */
3427 if (save_owner_info) {
3428 if (stackshot_thread_has_valid_waitinfo(thread)) {
3429 stackshot_thread_wait_owner_info(
3430 thread,
3431 &thread_waitinfo[current_waitinfo_index++]);
3432 }
3433
3434 if (stackshot_thread_has_valid_turnstileinfo(thread)) {
3435 stackshot_thread_turnstileinfo(
3436 thread,
3437 &thread_turnstileinfo[current_turnstileinfo_index++]);
3438 }
3439 }
3440 }
3441
3442 #if STACKSHOT_COLLECTS_LATENCY_INFO
3443 latency_info.task_thread_data_loop_latency = mach_absolute_time() - latency_info.task_thread_data_loop_latency;
3444 latency_info.misc2_latency = mach_absolute_time();
3445 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3446
3447 #if DEBUG || DEVELOPMENT
3448 if (current_delta_snapshot_index != num_delta_thread_snapshots) {
3449 panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
3450 num_delta_thread_snapshots, current_delta_snapshot_index);
3451 }
3452 if (current_waitinfo_index != num_waitinfo_threads) {
3453 panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
3454 num_waitinfo_threads, current_waitinfo_index);
3455 }
3456 #endif
3457
3458 if (num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
3459 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3460 // now, record the portlabel hashes.
3461 kcd_exit_on_error(kdp_stackshot_plh_record());
3462 }
3463
3464 #if IMPORTANCE_INHERITANCE
3465 if (save_donating_pids_p) {
3466 kcd_exit_on_error(
3467 ((((mach_vm_address_t)kcd_end_address(stackshot_kcdata_p) + (TASK_IMP_WALK_LIMIT * sizeof(int32_t))) <
3468 (mach_vm_address_t)kcd_max_address(stackshot_kcdata_p))
3469 ? KERN_SUCCESS
3470 : KERN_RESOURCE_SHORTAGE));
3471 saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
3472 (void *)kcd_end_address(stackshot_kcdata_p), TASK_IMP_WALK_LIMIT);
3473 if (saved_count > 0) {
3474 /* Variable size array - better not have it on the stack. */
3475 kcdata_compression_window_open(stackshot_kcdata_p);
3476 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
3477 sizeof(int32_t), saved_count, &out_addr));
3478 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3479 }
3480 }
3481 #endif
3482
3483 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3484 if (!panic_stackshot) {
3485 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
3486 "task_cpu_cycle_count"));
3487 }
3488 #endif
3489
3490 #if STACKSHOT_COLLECTS_LATENCY_INFO
3491 latency_info.misc2_latency = mach_absolute_time() - latency_info.misc2_latency;
3492 if (collect_latency_info) {
3493 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO_TASK, sizeof(latency_info), &latency_info));
3494 }
3495 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3496
3497 /* mark end of task snapshot data */
3498 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, container_type,
3499 task_uniqueid));
3500 }
3501
3502
3503 error_exit:
3504 return error;
3505 }
3506
3507 /* Record global shared regions */
3508 static kern_return_t
kdp_stackshot_shared_regions(uint64_t trace_flags)3509 kdp_stackshot_shared_regions(uint64_t trace_flags)
3510 {
3511 kern_return_t error = KERN_SUCCESS;
3512
3513 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3514 extern queue_head_t vm_shared_region_queue;
3515 vm_shared_region_t sr;
3516
3517 extern queue_head_t vm_shared_region_queue;
3518 queue_iterate(&vm_shared_region_queue,
3519 sr,
3520 vm_shared_region_t,
3521 sr_q) {
3522 struct dyld_shared_cache_loadinfo_v2 scinfo = {0};
3523 if (!_stackshot_validate_kva((vm_offset_t)sr, sizeof(*sr))) {
3524 break;
3525 }
3526 if (collect_delta_stackshot && sr->sr_install_time < stack_snapshot_delta_since_timestamp) {
3527 continue; // only include new shared caches in delta stackshots
3528 }
3529 uint32_t sharedCacheFlags = ((sr == primary_system_shared_region) ? kSharedCacheSystemPrimary : 0) |
3530 (sr->sr_driverkit ? kSharedCacheDriverkit : 0);
3531 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
3532 STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
3533 kdp_memcpy(scinfo.sharedCacheUUID, sr->sr_uuid, sizeof(sr->sr_uuid));
3534 scinfo.sharedCacheSlide = sr->sr_slide;
3535 scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_base_address + sr->sr_first_mapping;
3536 scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
3537 scinfo.sharedCacheID = sr->sr_id;
3538 scinfo.sharedCacheFlags = sharedCacheFlags;
3539
3540 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_INFO,
3541 sizeof(scinfo), &scinfo));
3542
3543 if ((trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) && sr->sr_images != NULL &&
3544 _stackshot_validate_kva((vm_offset_t)sr->sr_images, sr->sr_images_count * sizeof(struct dyld_uuid_info_64))) {
3545 assert(sr->sr_images_count != 0);
3546 kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
3547 }
3548 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
3549 STACKSHOT_KCCONTAINER_SHAREDCACHE, sr->sr_id));
3550 }
3551
3552 /*
3553 * For backwards compatibility; this will eventually be removed.
3554 * Another copy of the Primary System Shared Region, for older readers.
3555 */
3556 sr = primary_system_shared_region;
3557 /* record system level shared cache load info (if available) */
3558 if (!collect_delta_stackshot && sr &&
3559 _stackshot_validate_kva((vm_offset_t)sr, sizeof(struct vm_shared_region))) {
3560 struct dyld_shared_cache_loadinfo scinfo = {0};
3561
3562 /*
3563 * Historically, this data was in a dyld_uuid_info_64 structure, but the
3564 * naming of both the structure and fields for this use isn't great. The
3565 * dyld_shared_cache_loadinfo structure has better names, but the same
3566 * layout and content as the original.
3567 *
3568 * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
3569 * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
3570 * entries; here, it's the slid base address, and we leave it that way
3571 * for backwards compatibility.
3572 */
3573 kdp_memcpy(scinfo.sharedCacheUUID, &sr->sr_uuid, sizeof(sr->sr_uuid));
3574 scinfo.sharedCacheSlide = sr->sr_slide;
3575 scinfo.sharedCacheUnreliableSlidBaseAddress = sr->sr_slide + sr->sr_base_address;
3576 scinfo.sharedCacheSlidFirstMapping = sr->sr_base_address + sr->sr_first_mapping;
3577
3578 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
3579 sizeof(scinfo), &scinfo));
3580
3581 if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
3582 /*
3583 * Include a map of the system shared cache layout if it has been populated
3584 * (which is only when the system is using a custom shared cache).
3585 */
3586 if (sr->sr_images && _stackshot_validate_kva((vm_offset_t)sr->sr_images,
3587 (sr->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
3588 assert(sr->sr_images_count != 0);
3589 kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), sr->sr_images_count, sr->sr_images));
3590 }
3591 }
3592 }
3593
3594 error_exit:
3595 return error;
3596 }
3597
3598 static kern_return_t
kdp_stackshot_kcdata_format(int pid,uint64_t * trace_flags_p)3599 kdp_stackshot_kcdata_format(int pid, uint64_t * trace_flags_p)
3600 {
3601 kern_return_t error = KERN_SUCCESS;
3602 mach_vm_address_t out_addr = 0;
3603 uint64_t abs_time = 0, abs_time_end = 0;
3604 uint64_t system_state_flags = 0;
3605 task_t task = TASK_NULL;
3606 mach_timebase_info_data_t timebase = {0, 0};
3607 uint32_t length_to_copy = 0, tmp32 = 0;
3608 abs_time = mach_absolute_time();
3609 uint64_t last_task_start_time = 0;
3610 uint64_t trace_flags = 0;
3611
3612 if (!trace_flags_p) {
3613 panic("Invalid kdp_stackshot_kcdata_format trace_flags_p value");
3614 }
3615 trace_flags = *trace_flags_p;
3616
3617 #if STACKSHOT_COLLECTS_LATENCY_INFO
3618 struct stackshot_latency_collection latency_info;
3619 #endif
3620
3621 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3622 uint64_t stackshot_begin_cpu_cycle_count = 0;
3623
3624 if (!panic_stackshot) {
3625 stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3626 }
3627 #endif
3628
3629 #if STACKSHOT_COLLECTS_LATENCY_INFO
3630 collect_latency_info = trace_flags & STACKSHOT_DISABLE_LATENCY_INFO ? false : true;
3631 #endif
3632 /* process the flags */
3633 bool collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
3634 bool use_fault_path = ((trace_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
3635 stack_enable_faulting = (trace_flags & (STACKSHOT_ENABLE_BT_FAULTING));
3636
3637 /* Currently we only support returning explicit KEXT load info on fileset kernels */
3638 kc_format_t primary_kc_type = KCFormatUnknown;
3639 if (PE_get_primary_kc_format(&primary_kc_type) && (primary_kc_type != KCFormatFileset)) {
3640 trace_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
3641 }
3642
3643 struct stackshot_context ctx = {};
3644 ctx.trace_flags = trace_flags;
3645 ctx.pid = pid;
3646 ctx.include_drivers = (pid == 0 && (trace_flags & STACKSHOT_INCLUDE_DRIVER_THREADS_IN_KERNEL) != 0);
3647
3648 if (use_fault_path) {
3649 fault_stats.sfs_pages_faulted_in = 0;
3650 fault_stats.sfs_time_spent_faulting = 0;
3651 fault_stats.sfs_stopped_faulting = (uint8_t) FALSE;
3652 }
3653
3654 if (sizeof(void *) == 8) {
3655 system_state_flags |= kKernel64_p;
3656 }
3657
3658 if (stackshot_kcdata_p == NULL) {
3659 error = KERN_INVALID_ARGUMENT;
3660 goto error_exit;
3661 }
3662
3663 _stackshot_validation_reset();
3664 #if CONFIG_EXCLAVES
3665 if (!panic_stackshot) {
3666 kcd_exit_on_error(stackshot_setup_exclave_waitlist(stackshot_kcdata_p)); /* Allocate list of exclave threads */
3667 }
3668 #endif
3669 stackshot_plh_setup(stackshot_kcdata_p); /* set up port label hash */
3670
3671
3672 /* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
3673 clock_timebase_info(&timebase);
3674
3675 /* begin saving data into the buffer */
3676 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, trace_flags, "stackshot_in_flags"));
3677 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)pid, "stackshot_in_pid"));
3678 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
3679 if (trace_flags & STACKSHOT_PAGE_TABLES) {
3680 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stack_snapshot_pagetable_mask, "stackshot_pagetable_mask"));
3681 }
3682 if (stackshot_initial_estimate != 0) {
3683 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
3684 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate_adj, "stackshot_size_estimate_adj"));
3685 }
3686
3687 #if STACKSHOT_COLLECTS_LATENCY_INFO
3688 latency_info.setup_latency = mach_absolute_time();
3689 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3690
3691 #if CONFIG_JETSAM
3692 tmp32 = memorystatus_get_pressure_status_kdp();
3693 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &tmp32));
3694 #endif
3695
3696 if (!collect_delta_stackshot) {
3697 tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
3698 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &tmp32));
3699
3700 tmp32 = PAGE_SIZE;
3701 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &tmp32));
3702
3703 /* save boot-args and osversion string */
3704 length_to_copy = MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
3705 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, (const void *)version));
3706
3707
3708 length_to_copy = MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
3709 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, PE_boot_args()));
3710
3711 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &timebase));
3712 } else {
3713 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &stack_snapshot_delta_since_timestamp));
3714 }
3715
3716 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &abs_time));
3717
3718 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_microsecs));
3719
3720 kcd_exit_on_error(kdp_stackshot_shared_regions(trace_flags));
3721
3722 /* Add requested information first */
3723 if (trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
3724 struct mem_and_io_snapshot mais = {0};
3725 kdp_mem_and_io_snapshot(&mais);
3726 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(mais), &mais));
3727 }
3728
3729 #if CONFIG_THREAD_GROUPS
3730 struct thread_group_snapshot_v3 *thread_groups = NULL;
3731 int num_thread_groups = 0;
3732
3733 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3734 uint64_t thread_group_begin_cpu_cycle_count = 0;
3735
3736 if (!panic_stackshot && (trace_flags & STACKSHOT_THREAD_GROUP)) {
3737 thread_group_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3738 }
3739 #endif
3740
3741 /* Iterate over thread group names */
3742 if (trace_flags & STACKSHOT_THREAD_GROUP) {
3743 /* Variable size array - better not have it on the stack. */
3744 kcdata_compression_window_open(stackshot_kcdata_p);
3745
3746 if (thread_group_iterate_stackshot(stackshot_thread_group_count, &num_thread_groups) != KERN_SUCCESS) {
3747 trace_flags &= ~(STACKSHOT_THREAD_GROUP);
3748 }
3749
3750 if (num_thread_groups > 0) {
3751 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_GROUP_SNAPSHOT, sizeof(struct thread_group_snapshot_v3), num_thread_groups, &out_addr));
3752 thread_groups = (struct thread_group_snapshot_v3 *)out_addr;
3753 }
3754
3755 if (thread_group_iterate_stackshot(stackshot_thread_group_snapshot, thread_groups) != KERN_SUCCESS) {
3756 error = KERN_FAILURE;
3757 goto error_exit;
3758 }
3759
3760 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3761 }
3762
3763 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3764 if (!panic_stackshot && (thread_group_begin_cpu_cycle_count != 0)) {
3765 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - thread_group_begin_cpu_cycle_count),
3766 "thread_groups_cpu_cycle_count"));
3767 }
3768 #endif
3769 #else
3770 trace_flags &= ~(STACKSHOT_THREAD_GROUP);
3771 #endif /* CONFIG_THREAD_GROUPS */
3772
3773
3774 #if STACKSHOT_COLLECTS_LATENCY_INFO
3775 latency_info.setup_latency = mach_absolute_time() - latency_info.setup_latency;
3776 latency_info.total_task_iteration_latency = mach_absolute_time();
3777 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3778
3779 bool const process_scoped = (ctx.pid != -1) && !ctx.include_drivers;
3780
3781 /* Iterate over tasks */
3782 queue_iterate(&tasks, task, task_t, tasks)
3783 {
3784 if (collect_delta_stackshot) {
3785 uint64_t abstime;
3786 proc_starttime_kdp(get_bsdtask_info(task), NULL, NULL, &abstime);
3787
3788 if (abstime > last_task_start_time) {
3789 last_task_start_time = abstime;
3790 }
3791 }
3792
3793 if (process_scoped && (pid_from_task(task) != ctx.pid)) {
3794 continue;
3795 }
3796
3797 error = kdp_stackshot_record_task(&ctx, task);
3798 if (error) {
3799 goto error_exit;
3800 } else if (process_scoped) {
3801 /* Only targeting one process, we're done now. */
3802 break;
3803 }
3804 }
3805
3806
3807 #if STACKSHOT_COLLECTS_LATENCY_INFO
3808 latency_info.total_task_iteration_latency = mach_absolute_time() - latency_info.total_task_iteration_latency;
3809 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3810
3811 #if CONFIG_COALITIONS
3812 /* Don't collect jetsam coalition snapshots in delta stackshots - these don't change */
3813 if (!collect_delta_stackshot || (last_task_start_time > stack_snapshot_delta_since_timestamp)) {
3814 int num_coalitions = 0;
3815 struct jetsam_coalition_snapshot *coalitions = NULL;
3816
3817 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3818 uint64_t coalition_begin_cpu_cycle_count = 0;
3819
3820 if (!panic_stackshot && (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
3821 coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
3822 }
3823 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
3824
3825 /* Iterate over coalitions */
3826 if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
3827 if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
3828 trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
3829 }
3830 }
3831 if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
3832 if (num_coalitions > 0) {
3833 /* Variable size array - better not have it on the stack. */
3834 kcdata_compression_window_open(stackshot_kcdata_p);
3835 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
3836 coalitions = (struct jetsam_coalition_snapshot*)out_addr;
3837
3838 if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
3839 error = KERN_FAILURE;
3840 goto error_exit;
3841 }
3842
3843 kcd_exit_on_error(kcdata_compression_window_close(stackshot_kcdata_p));
3844 }
3845 }
3846 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3847 if (!panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
3848 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
3849 "coalitions_cpu_cycle_count"));
3850 }
3851 #endif /* SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI */
3852 }
3853 #else
3854 trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
3855 #endif /* CONFIG_COALITIONS */
3856
3857 #if STACKSHOT_COLLECTS_LATENCY_INFO
3858 latency_info.total_terminated_task_iteration_latency = mach_absolute_time();
3859 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3860
3861 /*
3862 * Iterate over the tasks in the terminated tasks list. We only inspect
3863 * tasks that have a valid bsd_info pointer. The check for task transition
3864 * like past P_LPEXIT during proc_exit() is now checked for inside the
3865 * kdp_stackshot_record_task(), and then a safer and minimal
3866 * transitioning_task_snapshot struct is collected via
3867 * kcdata_record_transitioning_task_snapshot()
3868 */
3869 queue_iterate(&terminated_tasks, task, task_t, tasks)
3870 {
3871 error = kdp_stackshot_record_task(&ctx, task);
3872 if (error) {
3873 goto error_exit;
3874 }
3875 }
3876 #if DEVELOPMENT || DEBUG
3877 kcd_exit_on_error(kdp_stackshot_plh_stats());
3878 #endif /* DEVELOPMENT || DEBUG */
3879
3880 #if STACKSHOT_COLLECTS_LATENCY_INFO
3881 latency_info.total_terminated_task_iteration_latency = mach_absolute_time() - latency_info.total_terminated_task_iteration_latency;
3882 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3883
3884 if (use_fault_path) {
3885 kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
3886 sizeof(struct stackshot_fault_stats), &fault_stats);
3887 }
3888
3889 #if STACKSHOT_COLLECTS_LATENCY_INFO
3890 if (collect_latency_info) {
3891 latency_info.latency_version = 1;
3892 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_LATENCY_INFO, sizeof(latency_info), &latency_info));
3893 }
3894 #endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
3895
3896 /* update timestamp of the stackshot */
3897 abs_time_end = mach_absolute_time();
3898 struct stackshot_duration_v2 stackshot_duration = {
3899 .stackshot_duration = (abs_time_end - abs_time),
3900 .stackshot_duration_outer = 0,
3901 .stackshot_duration_prior = stackshot_duration_prior_abs,
3902 };
3903
3904 if ((trace_flags & STACKSHOT_DO_COMPRESS) == 0) {
3905 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
3906 sizeof(struct stackshot_duration_v2), &out_addr));
3907 struct stackshot_duration_v2 *duration_p = (void *) out_addr;
3908 kdp_memcpy(duration_p, &stackshot_duration, sizeof(*duration_p));
3909 stackshot_duration_outer = (unaligned_u64 *)&duration_p->stackshot_duration_outer;
3910 } else {
3911 kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_duration), &stackshot_duration));
3912 stackshot_duration_outer = NULL;
3913 }
3914
3915 #if SCHED_HYGIENE_DEBUG && CONFIG_PERVASIVE_CPI
3916 if (!panic_stackshot) {
3917 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
3918 "stackshot_total_cpu_cycle_cnt"));
3919 }
3920 #endif
3921
3922 #if CONFIG_EXCLAVES
3923 /* Avoid setting AST until as late as possible, in case the stackshot fails */
3924 commit_exclaves_ast();
3925 #endif
3926
3927 *trace_flags_p = trace_flags;
3928
3929 error_exit:;
3930
3931 #if CONFIG_EXCLAVES
3932 if (error != KERN_SUCCESS && stackshot_exclave_inspect_ctids) {
3933 /* Clear inspection CTID list: no need to wait for these threads */
3934 stackshot_exclave_inspect_ctid_count = 0;
3935 stackshot_exclave_inspect_ctid_capacity = 0;
3936 stackshot_exclave_inspect_ctids = NULL;
3937 }
3938 #endif
3939
3940 #if SCHED_HYGIENE_DEBUG
3941 bool disable_interrupts_masked_check = kern_feature_override(
3942 KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
3943 (trace_flags & STACKSHOT_DO_COMPRESS) != 0;
3944
3945 #if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
3946 disable_interrupts_masked_check = true;
3947 #endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
3948
3949 if (disable_interrupts_masked_check) {
3950 ml_spin_debug_clear_self();
3951 }
3952
3953 if (!panic_stackshot && interrupt_masked_debug_mode) {
3954 /*
3955 * Try to catch instances where stackshot takes too long BEFORE returning from
3956 * the debugger
3957 */
3958 ml_handle_stackshot_interrupt_disabled_duration(current_thread());
3959 }
3960 #endif /* SCHED_HYGIENE_DEBUG */
3961 stackshot_plh_reset();
3962 stack_enable_faulting = FALSE;
3963
3964 return error;
3965 }
3966
3967 static uint64_t
proc_was_throttled_from_task(task_t task)3968 proc_was_throttled_from_task(task_t task)
3969 {
3970 uint64_t was_throttled = 0;
3971 void *bsd_info = get_bsdtask_info(task);
3972
3973 if (bsd_info) {
3974 was_throttled = proc_was_throttled(bsd_info);
3975 }
3976
3977 return was_throttled;
3978 }
3979
3980 static uint64_t
proc_did_throttle_from_task(task_t task)3981 proc_did_throttle_from_task(task_t task)
3982 {
3983 uint64_t did_throttle = 0;
3984 void *bsd_info = get_bsdtask_info(task);
3985
3986 if (bsd_info) {
3987 did_throttle = proc_did_throttle(bsd_info);
3988 }
3989
3990 return did_throttle;
3991 }
3992
3993 static void
kdp_mem_and_io_snapshot(struct mem_and_io_snapshot * memio_snap)3994 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
3995 {
3996 unsigned int pages_reclaimed;
3997 unsigned int pages_wanted;
3998 kern_return_t kErr;
3999
4000 uint64_t compressions = 0;
4001 uint64_t decompressions = 0;
4002
4003 compressions = counter_load(&vm_statistics_compressions);
4004 decompressions = counter_load(&vm_statistics_decompressions);
4005
4006 memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
4007 memio_snap->free_pages = vm_page_free_count;
4008 memio_snap->active_pages = vm_page_active_count;
4009 memio_snap->inactive_pages = vm_page_inactive_count;
4010 memio_snap->purgeable_pages = vm_page_purgeable_count;
4011 memio_snap->wired_pages = vm_page_wire_count;
4012 memio_snap->speculative_pages = vm_page_speculative_count;
4013 memio_snap->throttled_pages = vm_page_throttled_count;
4014 memio_snap->busy_buffer_count = count_busy_buffers();
4015 memio_snap->filebacked_pages = vm_page_pageable_external_count;
4016 memio_snap->compressions = (uint32_t)compressions;
4017 memio_snap->decompressions = (uint32_t)decompressions;
4018 memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
4019 kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
4020
4021 if (!kErr) {
4022 memio_snap->pages_wanted = (uint32_t)pages_wanted;
4023 memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
4024 memio_snap->pages_wanted_reclaimed_valid = 1;
4025 } else {
4026 memio_snap->pages_wanted = 0;
4027 memio_snap->pages_reclaimed = 0;
4028 memio_snap->pages_wanted_reclaimed_valid = 0;
4029 }
4030 }
4031
4032 static vm_offset_t
stackshot_find_phys(vm_map_t map,vm_offset_t target_addr,kdp_fault_flags_t fault_flags,uint32_t * kdp_fault_result_flags)4033 stackshot_find_phys(vm_map_t map, vm_offset_t target_addr, kdp_fault_flags_t fault_flags, uint32_t *kdp_fault_result_flags)
4034 {
4035 vm_offset_t result;
4036 struct kdp_fault_result fault_results = {0};
4037 if (fault_stats.sfs_stopped_faulting) {
4038 fault_flags &= ~KDP_FAULT_FLAGS_ENABLE_FAULTING;
4039 }
4040
4041 result = kdp_find_phys(map, target_addr, fault_flags, &fault_results);
4042
4043 if ((fault_results.flags & KDP_FAULT_RESULT_TRIED_FAULT) || (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN)) {
4044 fault_stats.sfs_time_spent_faulting += fault_results.time_spent_faulting;
4045
4046 if ((fault_stats.sfs_time_spent_faulting >= fault_stats.sfs_system_max_fault_time) && !panic_stackshot) {
4047 fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
4048 }
4049 }
4050
4051 if (fault_results.flags & KDP_FAULT_RESULT_FAULTED_IN) {
4052 fault_stats.sfs_pages_faulted_in++;
4053 }
4054
4055 if (kdp_fault_result_flags) {
4056 *kdp_fault_result_flags = fault_results.flags;
4057 }
4058
4059 return result;
4060 }
4061
4062 /*
4063 * Wrappers around kdp_generic_copyin, kdp_generic_copyin_word, kdp_generic_copyin_string that use stackshot_find_phys
4064 * in order to:
4065 * 1. collect statistics on the number of pages faulted in
4066 * 2. stop faulting if the time spent faulting has exceeded the limit.
4067 */
4068 static boolean_t
stackshot_copyin(vm_map_t map,uint64_t uaddr,void * dest,size_t size,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)4069 stackshot_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
4070 {
4071 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
4072 if (try_fault) {
4073 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
4074 }
4075 return kdp_generic_copyin(map, uaddr, dest, size, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
4076 }
4077 static boolean_t
stackshot_copyin_word(task_t task,uint64_t addr,uint64_t * result,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)4078 stackshot_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
4079 {
4080 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
4081 if (try_fault) {
4082 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
4083 }
4084 return kdp_generic_copyin_word(task, addr, result, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags) == KERN_SUCCESS;
4085 }
4086 static int
stackshot_copyin_string(task_t task,uint64_t addr,char * buf,int buf_sz,boolean_t try_fault,kdp_fault_result_flags_t * kdp_fault_result_flags)4087 stackshot_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, kdp_fault_result_flags_t *kdp_fault_result_flags)
4088 {
4089 kdp_fault_flags_t fault_flags = KDP_FAULT_FLAGS_NONE;
4090 if (try_fault) {
4091 fault_flags |= KDP_FAULT_FLAGS_ENABLE_FAULTING;
4092 }
4093 return kdp_generic_copyin_string(task, addr, buf, buf_sz, fault_flags, (find_phys_fn_t)stackshot_find_phys, kdp_fault_result_flags);
4094 }
4095
4096 kern_return_t
do_stackshot(void * context)4097 do_stackshot(void *context)
4098 {
4099 #pragma unused(context)
4100 kdp_snapshot++;
4101
4102 stackshot_out_flags = stack_snapshot_flags;
4103
4104 stack_snapshot_ret = kdp_stackshot_kcdata_format(stack_snapshot_pid, &stackshot_out_flags);
4105
4106 kdp_snapshot--;
4107 return stack_snapshot_ret;
4108 }
4109
4110 kern_return_t
4111 do_panic_stackshot(void *context);
4112
4113 kern_return_t
do_panic_stackshot(void * context)4114 do_panic_stackshot(void *context)
4115 {
4116 kern_return_t ret = do_stackshot(context);
4117 kern_return_t error = finalize_kcdata(stackshot_kcdata_p);
4118
4119 // Return ret if it's already an error, error otherwise. Usually both
4120 // are KERN_SUCCESS.
4121 return (ret != KERN_SUCCESS) ? ret : error;
4122 }
4123
4124 boolean_t
stackshot_thread_is_idle_worker_unsafe(thread_t thread)4125 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
4126 {
4127 /* When the pthread kext puts a worker thread to sleep, it will
4128 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
4129 * struct. See parkit() in kern/kern_support.c in libpthread.
4130 */
4131 return (thread->state & TH_WAIT) &&
4132 (thread->block_hint == kThreadWaitParkedWorkQueue);
4133 }
4134
4135 #if CONFIG_COALITIONS
4136 static void
stackshot_coalition_jetsam_count(void * arg,int i,coalition_t coal)4137 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
4138 {
4139 #pragma unused(i, coal)
4140 unsigned int *coalition_count = (unsigned int*)arg;
4141 (*coalition_count)++;
4142 }
4143
4144 static void
stackshot_coalition_jetsam_snapshot(void * arg,int i,coalition_t coal)4145 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
4146 {
4147 if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
4148 return;
4149 }
4150
4151 struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
4152 struct jetsam_coalition_snapshot *jcs = &coalitions[i];
4153 task_t leader = TASK_NULL;
4154 jcs->jcs_id = coalition_id(coal);
4155 jcs->jcs_flags = 0;
4156 jcs->jcs_thread_group = 0;
4157
4158 if (coalition_term_requested(coal)) {
4159 jcs->jcs_flags |= kCoalitionTermRequested;
4160 }
4161 if (coalition_is_terminated(coal)) {
4162 jcs->jcs_flags |= kCoalitionTerminated;
4163 }
4164 if (coalition_is_reaped(coal)) {
4165 jcs->jcs_flags |= kCoalitionReaped;
4166 }
4167 if (coalition_is_privileged(coal)) {
4168 jcs->jcs_flags |= kCoalitionPrivileged;
4169 }
4170
4171 #if CONFIG_THREAD_GROUPS
4172 struct thread_group *thread_group = kdp_coalition_get_thread_group(coal);
4173 if (thread_group) {
4174 jcs->jcs_thread_group = thread_group_get_id(thread_group);
4175 }
4176 #endif /* CONFIG_THREAD_GROUPS */
4177
4178 leader = kdp_coalition_get_leader(coal);
4179 if (leader) {
4180 jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
4181 } else {
4182 jcs->jcs_leader_task_uniqueid = 0;
4183 }
4184 }
4185 #endif /* CONFIG_COALITIONS */
4186
4187 #if CONFIG_THREAD_GROUPS
4188 static void
stackshot_thread_group_count(void * arg,int i,struct thread_group * tg)4189 stackshot_thread_group_count(void *arg, int i, struct thread_group *tg)
4190 {
4191 #pragma unused(i, tg)
4192 unsigned int *n = (unsigned int*)arg;
4193 (*n)++;
4194 }
4195
4196 static void
stackshot_thread_group_snapshot(void * arg,int i,struct thread_group * tg)4197 stackshot_thread_group_snapshot(void *arg, int i, struct thread_group *tg)
4198 {
4199 struct thread_group_snapshot_v3 *thread_groups = arg;
4200 struct thread_group_snapshot_v3 *tgs = &thread_groups[i];
4201 const char *name = thread_group_get_name(tg);
4202 uint32_t flags = thread_group_get_flags(tg);
4203 tgs->tgs_id = thread_group_get_id(tg);
4204 static_assert(THREAD_GROUP_MAXNAME > sizeof(tgs->tgs_name));
4205 kdp_memcpy(tgs->tgs_name, name, sizeof(tgs->tgs_name));
4206 kdp_memcpy(tgs->tgs_name_cont, name + sizeof(tgs->tgs_name),
4207 sizeof(tgs->tgs_name_cont));
4208 tgs->tgs_flags =
4209 ((flags & THREAD_GROUP_FLAGS_EFFICIENT) ? kThreadGroupEfficient : 0) |
4210 ((flags & THREAD_GROUP_FLAGS_APPLICATION) ? kThreadGroupApplication : 0) |
4211 ((flags & THREAD_GROUP_FLAGS_CRITICAL) ? kThreadGroupCritical : 0) |
4212 ((flags & THREAD_GROUP_FLAGS_BEST_EFFORT) ? kThreadGroupBestEffort : 0) |
4213 ((flags & THREAD_GROUP_FLAGS_UI_APP) ? kThreadGroupUIApplication : 0) |
4214 ((flags & THREAD_GROUP_FLAGS_MANAGED) ? kThreadGroupManaged : 0) |
4215 ((flags & THREAD_GROUP_FLAGS_STRICT_TIMERS) ? kThreadGroupStrictTimers : 0) |
4216 0;
4217 }
4218 #endif /* CONFIG_THREAD_GROUPS */
4219
4220 /* Determine if a thread has waitinfo that stackshot can provide */
4221 static int
stackshot_thread_has_valid_waitinfo(thread_t thread)4222 stackshot_thread_has_valid_waitinfo(thread_t thread)
4223 {
4224 if (!(thread->state & TH_WAIT)) {
4225 return 0;
4226 }
4227
4228 switch (thread->block_hint) {
4229 // If set to None or is a parked work queue, ignore it
4230 case kThreadWaitParkedWorkQueue:
4231 case kThreadWaitNone:
4232 return 0;
4233 // There is a short window where the pthread kext removes a thread
4234 // from its ksyn wait queue before waking the thread up
4235 case kThreadWaitPThreadMutex:
4236 case kThreadWaitPThreadRWLockRead:
4237 case kThreadWaitPThreadRWLockWrite:
4238 case kThreadWaitPThreadCondVar:
4239 return kdp_pthread_get_thread_kwq(thread) != NULL;
4240 // All other cases are valid block hints if in a wait state
4241 default:
4242 return 1;
4243 }
4244 }
4245
4246 /* Determine if a thread has turnstileinfo that stackshot can provide */
4247 static int
stackshot_thread_has_valid_turnstileinfo(thread_t thread)4248 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
4249 {
4250 struct turnstile *ts = thread_get_waiting_turnstile(thread);
4251
4252 return stackshot_thread_has_valid_waitinfo(thread) &&
4253 ts != TURNSTILE_NULL;
4254 }
4255
4256 static void
stackshot_thread_turnstileinfo(thread_t thread,thread_turnstileinfo_v2_t * tsinfo)4257 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_v2_t *tsinfo)
4258 {
4259 struct turnstile *ts;
4260 struct ipc_service_port_label *ispl = NULL;
4261
4262 /* acquire turnstile information and store it in the stackshot */
4263 ts = thread_get_waiting_turnstile(thread);
4264 tsinfo->waiter = thread_tid(thread);
4265 kdp_turnstile_fill_tsinfo(ts, tsinfo, &ispl);
4266 tsinfo->portlabel_id = stackshot_plh_lookup(ispl,
4267 (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_SENDPORT) ? STACKSHOT_PLH_LOOKUP_SEND :
4268 (tsinfo->turnstile_flags & STACKSHOT_TURNSTILE_STATUS_RECEIVEPORT) ? STACKSHOT_PLH_LOOKUP_RECEIVE :
4269 STACKSHOT_PLH_LOOKUP_UNKNOWN);
4270 }
4271
4272 static void
stackshot_thread_wait_owner_info(thread_t thread,thread_waitinfo_v2_t * waitinfo)4273 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_v2_t *waitinfo)
4274 {
4275 thread_waitinfo_t *waitinfo_v1 = (thread_waitinfo_t *)waitinfo;
4276 struct ipc_service_port_label *ispl = NULL;
4277
4278 waitinfo->waiter = thread_tid(thread);
4279 waitinfo->wait_type = thread->block_hint;
4280 waitinfo->wait_flags = 0;
4281
4282 switch (waitinfo->wait_type) {
4283 case kThreadWaitKernelMutex:
4284 kdp_lck_mtx_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4285 break;
4286 case kThreadWaitPortReceive:
4287 kdp_mqueue_recv_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
4288 waitinfo->portlabel_id = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_RECEIVE);
4289 break;
4290 case kThreadWaitPortSend:
4291 kdp_mqueue_send_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo, &ispl);
4292 waitinfo->portlabel_id = stackshot_plh_lookup(ispl, STACKSHOT_PLH_LOOKUP_SEND);
4293 break;
4294 case kThreadWaitSemaphore:
4295 kdp_sema_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4296 break;
4297 case kThreadWaitUserLock:
4298 kdp_ulock_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4299 break;
4300 case kThreadWaitKernelRWLockRead:
4301 case kThreadWaitKernelRWLockWrite:
4302 case kThreadWaitKernelRWLockUpgrade:
4303 kdp_rwlck_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4304 break;
4305 case kThreadWaitPThreadMutex:
4306 case kThreadWaitPThreadRWLockRead:
4307 case kThreadWaitPThreadRWLockWrite:
4308 case kThreadWaitPThreadCondVar:
4309 kdp_pthread_find_owner(thread, waitinfo_v1);
4310 break;
4311 case kThreadWaitWorkloopSyncWait:
4312 kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo_v1);
4313 break;
4314 case kThreadWaitOnProcess:
4315 kdp_wait4_find_process(thread, thread->wait_event, waitinfo_v1);
4316 break;
4317 case kThreadWaitSleepWithInheritor:
4318 kdp_sleep_with_inheritor_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4319 break;
4320 case kThreadWaitEventlink:
4321 kdp_eventlink_find_owner(thread->waitq.wq_q, thread->wait_event, waitinfo_v1);
4322 break;
4323 case kThreadWaitCompressor:
4324 kdp_compressor_busy_find_owner(thread->wait_event, waitinfo_v1);
4325 break;
4326 default:
4327 waitinfo->owner = 0;
4328 waitinfo->context = 0;
4329 break;
4330 }
4331 }
4332