1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/kdebug_common.h>
24
25 LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug");
26 int kdbg_debug = 0;
27
28 extern struct kd_ctrl_page_t kd_ctrl_page_trace, kd_ctrl_page_triage;
29
30 void
kdebug_lck_init(void)31 kdebug_lck_init(void)
32 {
33 lck_spin_init(&kd_ctrl_page_trace.kds_spin_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
34 lck_spin_init(&kd_ctrl_page_triage.kds_spin_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
35 }
36
37 int
kdebug_storage_lock(struct kd_ctrl_page_t * kd_ctrl_page)38 kdebug_storage_lock(struct kd_ctrl_page_t *kd_ctrl_page)
39 {
40 int intrs_en = ml_set_interrupts_enabled(false);
41 lck_spin_lock_grp(&kd_ctrl_page->kds_spin_lock, &kdebug_lck_grp);
42 return intrs_en;
43 }
44
45 void
kdebug_storage_unlock(struct kd_ctrl_page_t * kd_ctrl_page,int intrs_en)46 kdebug_storage_unlock(struct kd_ctrl_page_t *kd_ctrl_page, int intrs_en)
47 {
48 lck_spin_unlock(&kd_ctrl_page->kds_spin_lock);
49 ml_set_interrupts_enabled(intrs_en);
50 }
51
52 extern int kernel_debug_trace_write_to_file(user_addr_t *buffer, size_t *number, size_t *count, size_t tempbuf_number, vnode_t vp, vfs_context_t ctx, uint32_t file_version);
53
54 uint32_t
kdbg_cpu_count(bool early_trace)55 kdbg_cpu_count(bool early_trace)
56 {
57 if (early_trace) {
58 #if defined(__x86_64__)
59 return max_ncpus;
60 #else /* defined(__x86_64__) */
61 return ml_get_cpu_count();
62 #endif /* !defined(__x86_64__) */
63 }
64
65 #if defined(__x86_64__)
66 host_basic_info_data_t hinfo;
67 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
68 host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
69 assert(hinfo.logical_cpu_max > 0);
70 return hinfo.logical_cpu_max;
71 #else /* defined(__x86_64__) */
72 return ml_get_topology_info()->max_cpu_id + 1;
73 #endif /* !defined(__x86_64__) */
74 }
75
76 /*
77 * Both kdebug_timestamp and kdebug_using_continuous_time are known
78 * to kexts. And going forward we always want to use mach_continuous_time().
79 * So we keep these 2 routines as-is to keep the TRACE mode use outside
80 * the kernel intact. TRIAGE mode will explicitly only use mach_continuous_time()
81 * for its timestamp.
82 */
83 bool
kdebug_using_continuous_time(void)84 kdebug_using_continuous_time(void)
85 {
86 return kd_ctrl_page_trace.kdebug_flags & KDBG_CONTINUOUS_TIME;
87 }
88
89 uint64_t
kdebug_timestamp(void)90 kdebug_timestamp(void)
91 {
92 if (kdebug_using_continuous_time()) {
93 return mach_continuous_time();
94 } else {
95 return mach_absolute_time();
96 }
97 }
98
99 int
create_buffers(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,vm_tag_t tag)100 create_buffers(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, vm_tag_t tag)
101 {
102 unsigned int i;
103 unsigned int p_buffer_size;
104 unsigned int f_buffer_size;
105 unsigned int f_buffers;
106 int error = 0;
107 int ncpus, count_storage_units = 0;
108
109 struct kd_bufinfo *kdbip = NULL;
110 struct kd_storage_buffers *kd_bufs = NULL;
111 int n_storage_units = kd_data_page->n_storage_units;
112
113 ncpus = kd_ctrl_page->kdebug_cpus;
114
115 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip,
116 sizeof(struct kd_bufinfo) * ncpus,
117 KMA_ZERO | KMA_DATA, tag) != KERN_SUCCESS) {
118 error = ENOSPC;
119 goto out;
120 }
121 kd_data_page->kdbip = kdbip;
122
123 f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
124 kd_data_page->n_storage_buffer = f_buffers;
125
126 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
127 p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
128
129 if (p_buffer_size) {
130 kd_data_page->n_storage_buffer++;
131 }
132
133 if (kd_data_page->kdcopybuf == 0) {
134 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_data_page->kdcopybuf,
135 (vm_size_t) kd_ctrl_page->kdebug_kdcopybuf_size,
136 KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
137 error = ENOSPC;
138 goto out;
139 }
140 }
141
142 kd_bufs = kalloc_type_tag(struct kd_storage_buffers, kd_data_page->n_storage_buffer,
143 Z_WAITOK | Z_ZERO, tag);
144 if (kd_bufs == NULL) {
145 error = ENOSPC;
146 goto out;
147 }
148 kd_data_page->kd_bufs = kd_bufs;
149
150 for (i = 0; i < f_buffers; i++) {
151 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr,
152 (vm_size_t)f_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
153 error = ENOSPC;
154 goto out;
155 }
156
157 kd_bufs[i].kdsb_size = f_buffer_size;
158 }
159 if (p_buffer_size) {
160 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr,
161 (vm_size_t)p_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
162 error = ENOSPC;
163 goto out;
164 }
165
166 kd_bufs[i].kdsb_size = p_buffer_size;
167 }
168
169 count_storage_units = 0;
170 for (i = 0; i < kd_data_page->n_storage_buffer; i++) {
171 struct kd_storage *kds;
172 uint16_t n_elements;
173 static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX);
174 assert(kd_bufs[i].kdsb_size <= N_STORAGE_UNITS_PER_BUFFER *
175 sizeof(struct kd_storage));
176
177 n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
178 kds = kd_bufs[i].kdsb_addr;
179
180 for (uint16_t n = 0; n < n_elements; n++) {
181 kds[n].kds_next.buffer_index = kd_ctrl_page->kds_free_list.buffer_index;
182 kds[n].kds_next.offset = kd_ctrl_page->kds_free_list.offset;
183
184 kd_ctrl_page->kds_free_list.buffer_index = i;
185 kd_ctrl_page->kds_free_list.offset = n;
186 }
187 count_storage_units += n_elements;
188 }
189
190 kd_data_page->n_storage_units = count_storage_units;
191
192 for (i = 0; i < ncpus; i++) {
193 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
194 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
195 kdbip[i].kd_lostevents = false;
196 kdbip[i].num_bufs = 0;
197 }
198
199 kd_ctrl_page->kdebug_flags |= KDBG_BUFINIT;
200
201 kd_ctrl_page->kds_inuse_count = 0;
202 out:
203 if (error) {
204 delete_buffers(kd_ctrl_page, kd_data_page);
205 }
206
207 return error;
208 }
209
210 void
delete_buffers(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page)211 delete_buffers(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page)
212 {
213 unsigned int i;
214 int n_storage_buffer = kd_data_page->n_storage_buffer;
215
216 struct kd_bufinfo *kdbip = kd_data_page->kdbip;
217 struct kd_storage_buffers *kd_bufs = kd_data_page->kd_bufs;
218
219 if (kd_bufs) {
220 for (i = 0; i < n_storage_buffer; i++) {
221 if (kd_bufs[i].kdsb_addr) {
222 kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
223 }
224 }
225 kfree_type(struct kd_storage_buffers, n_storage_buffer, kd_bufs);
226
227 kd_data_page->kd_bufs = NULL;
228 kd_data_page->n_storage_buffer = 0;
229 }
230 if (kd_data_page->kdcopybuf) {
231 kmem_free(kernel_map, (vm_offset_t)kd_data_page->kdcopybuf, kd_ctrl_page->kdebug_kdcopybuf_size);
232
233 kd_data_page->kdcopybuf = NULL;
234 }
235 kd_ctrl_page->kds_free_list.raw = KDS_PTR_NULL;
236
237 if (kdbip) {
238 kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page->kdebug_cpus);
239 kd_data_page->kdbip = NULL;
240 }
241 kd_ctrl_page->kdebug_iops = NULL;
242 kd_ctrl_page->kdebug_cpus = 0;
243 kd_ctrl_page->kdebug_flags &= ~KDBG_BUFINIT;
244 }
245
246 bool
allocate_storage_unit(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,int cpu)247 allocate_storage_unit(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, int cpu)
248 {
249 union kds_ptr kdsp;
250 struct kd_storage *kdsp_actual, *kdsp_next_actual;
251 struct kd_bufinfo *kdbip, *kdbp, *kdbp_vict, *kdbp_try;
252 uint64_t oldest_ts, ts;
253 bool retval = true;
254 struct kd_storage_buffers *kd_bufs;
255
256 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
257
258 kdbp = &kd_data_page->kdbip[cpu];
259 kd_bufs = kd_data_page->kd_bufs;
260 kdbip = kd_data_page->kdbip;
261
262 /* If someone beat us to the allocate, return success */
263 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
264 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail);
265
266 if (kdsp_actual->kds_bufindx < kd_ctrl_page->kdebug_events_per_storage_unit) {
267 goto out;
268 }
269 }
270
271 if ((kdsp = kd_ctrl_page->kds_free_list).raw != KDS_PTR_NULL) {
272 /*
273 * If there's a free page, grab it from the free list.
274 */
275 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
276 kd_ctrl_page->kds_free_list = kdsp_actual->kds_next;
277
278 kd_ctrl_page->kds_inuse_count++;
279 } else {
280 /*
281 * Otherwise, we're going to lose events and repurpose the oldest
282 * storage unit we can find.
283 */
284 if (kd_ctrl_page->kdebug_flags & KDBG_NOWRAP) {
285 kd_ctrl_page->kdebug_slowcheck |= SLOW_NOLOG;
286 kdbp->kd_lostevents = true;
287 retval = false;
288 goto out;
289 }
290 kdbp_vict = NULL;
291 oldest_ts = UINT64_MAX;
292
293 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page->kdebug_cpus]; kdbp_try++) {
294 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
295 /*
296 * no storage unit to steal
297 */
298 continue;
299 }
300
301 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_try->kd_list_head);
302
303 if (kdsp_actual->kds_bufcnt < kd_ctrl_page->kdebug_events_per_storage_unit) {
304 /*
305 * make sure we don't steal the storage unit
306 * being actively recorded to... need to
307 * move on because we don't want an out-of-order
308 * set of events showing up later
309 */
310 continue;
311 }
312
313 /*
314 * When wrapping, steal the storage unit with the
315 * earliest timestamp on its last event, instead of the
316 * earliest timestamp on the first event. This allows a
317 * storage unit with more recent events to be preserved,
318 * even if the storage unit contains events that are
319 * older than those found in other CPUs.
320 */
321 ts = kdbg_get_timestamp(&kdsp_actual->kds_records[kd_ctrl_page->kdebug_events_per_storage_unit - 1]);
322 if (ts < oldest_ts) {
323 oldest_ts = ts;
324 kdbp_vict = kdbp_try;
325 }
326 }
327 if (kdbp_vict == NULL && kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
328 kdebug_enable = 0;
329 kd_ctrl_page->enabled = 0;
330 commpage_update_kdebug_state();
331 retval = false;
332 goto out;
333 }
334 kdsp = kdbp_vict->kd_list_head;
335 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
336 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
337
338 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
339 kdsp_next_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_vict->kd_list_head);
340 kdsp_next_actual->kds_lostevents = true;
341 } else {
342 kdbp_vict->kd_lostevents = true;
343 }
344
345 if (kd_ctrl_page->oldest_time < oldest_ts) {
346 kd_ctrl_page->oldest_time = oldest_ts;
347 }
348 kd_ctrl_page->kdebug_flags |= KDBG_WRAPPED;
349 }
350
351 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
352 kdsp_actual->kds_timestamp = kdebug_timestamp();
353 } else {
354 kdsp_actual->kds_timestamp = mach_continuous_time();
355 }
356
357 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
358 kdsp_actual->kds_bufcnt = 0;
359 kdsp_actual->kds_readlast = 0;
360
361 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
362 kdbp->kd_lostevents = false;
363 kdsp_actual->kds_bufindx = 0;
364
365 if (kdbp->kd_list_head.raw == KDS_PTR_NULL) {
366 kdbp->kd_list_head = kdsp;
367 } else {
368 POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail)->kds_next = kdsp;
369 }
370 kdbp->kd_list_tail = kdsp;
371 out:
372 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
373
374 return retval;
375 }
376
377 void
release_storage_unit(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,int cpu,uint32_t kdsp_raw)378 release_storage_unit(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, int cpu, uint32_t kdsp_raw)
379 {
380 struct kd_storage *kdsp_actual;
381 struct kd_bufinfo *kdbp;
382 union kds_ptr kdsp;
383
384 kdbp = &kd_data_page->kdbip[cpu];
385
386 kdsp.raw = kdsp_raw;
387
388 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
389
390 if (kdsp.raw == kdbp->kd_list_head.raw) {
391 /*
392 * it's possible for the storage unit pointed to
393 * by kdsp to have already been stolen... so
394 * check to see if it's still the head of the list
395 * now that we're behind the lock that protects
396 * adding and removing from the queue...
397 * since we only ever release and steal units from
398 * that position, if it's no longer the head
399 * we having nothing to do in this context
400 */
401 kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kdsp);
402 kdbp->kd_list_head = kdsp_actual->kds_next;
403
404 kdsp_actual->kds_next = kd_ctrl_page->kds_free_list;
405 kd_ctrl_page->kds_free_list = kdsp;
406
407 kd_ctrl_page->kds_inuse_count--;
408 }
409
410 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
411 }
412
413
414 /*
415 * Disable wrapping and return true if trace wrapped, false otherwise.
416 */
417 bool
disable_wrap(struct kd_ctrl_page_t * kd_ctrl_page,uint32_t * old_slowcheck,uint32_t * old_flags)418 disable_wrap(struct kd_ctrl_page_t *kd_ctrl_page, uint32_t *old_slowcheck, uint32_t *old_flags)
419 {
420 bool wrapped;
421 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
422
423 *old_slowcheck = kd_ctrl_page->kdebug_slowcheck;
424 *old_flags = kd_ctrl_page->kdebug_flags;
425
426 wrapped = kd_ctrl_page->kdebug_flags & KDBG_WRAPPED;
427 kd_ctrl_page->kdebug_flags &= ~KDBG_WRAPPED;
428 kd_ctrl_page->kdebug_flags |= KDBG_NOWRAP;
429
430 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
431
432 return wrapped;
433 }
434
435 void
enable_wrap(struct kd_ctrl_page_t * kd_ctrl_page,uint32_t old_slowcheck)436 enable_wrap(struct kd_ctrl_page_t *kd_ctrl_page, uint32_t old_slowcheck)
437 {
438 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
439
440 kd_ctrl_page->kdebug_flags &= ~KDBG_NOWRAP;
441
442 if (!(old_slowcheck & SLOW_NOLOG)) {
443 kd_ctrl_page->kdebug_slowcheck &= ~SLOW_NOLOG;
444 }
445 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
446 }
447
448 __attribute__((always_inline))
449 void
kernel_debug_write(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,struct kd_record kd_rec)450 kernel_debug_write(struct kd_ctrl_page_t *kd_ctrl_page,
451 struct kd_data_page_t *kd_data_page,
452 struct kd_record kd_rec)
453 {
454 uint64_t now = 0;
455 uint32_t bindx;
456 kd_buf *kd;
457 int cpu;
458 struct kd_bufinfo *kdbp;
459 struct kd_storage *kdsp_actual;
460 union kds_ptr kds_raw;
461
462 disable_preemption();
463
464 if (kd_ctrl_page->enabled == 0) {
465 goto out;
466 }
467
468 if (kd_rec.cpu == -1) {
469 cpu = cpu_number();
470 } else {
471 cpu = kd_rec.cpu;
472 }
473
474 kdbp = &kd_data_page->kdbip[cpu];
475
476 bool timestamp_is_continuous = kdbp->continuous_timestamps;
477
478 if (kd_rec.timestamp != -1) {
479 if (kdebug_using_continuous_time()) {
480 if (!timestamp_is_continuous) {
481 kd_rec.timestamp = absolutetime_to_continuoustime(kd_rec.timestamp);
482 }
483 } else {
484 if (timestamp_is_continuous) {
485 kd_rec.timestamp = continuoustime_to_absolutetime(kd_rec.timestamp);
486 }
487 }
488
489 if (kd_rec.timestamp < kd_ctrl_page->oldest_time) {
490 goto out;
491 }
492
493 kd_rec.timestamp &= KDBG_TIMESTAMP_MASK;
494 }
495
496 retry_q:
497 kds_raw = kdbp->kd_list_tail;
498
499 if (kds_raw.raw != KDS_PTR_NULL) {
500 kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kds_raw);
501 bindx = kdsp_actual->kds_bufindx;
502 } else {
503 kdsp_actual = NULL;
504 bindx = kd_ctrl_page->kdebug_events_per_storage_unit;
505 }
506
507 if (kdsp_actual == NULL || bindx >= kd_ctrl_page->kdebug_events_per_storage_unit) {
508 if (allocate_storage_unit(kd_ctrl_page, kd_data_page, cpu) == false) {
509 /*
510 * this can only happen if wrapping
511 * has been disabled
512 */
513 goto out;
514 }
515 goto retry_q;
516 }
517
518 if (kd_rec.timestamp != -1) {
519 /*
520 * IOP entries can be allocated before xnu allocates and inits the buffer
521 * And, Intel uses a special 0 value as a early tracing timestamp sentinel
522 * to set the start of trace-time-start-of-interest.
523 */
524 if (kd_rec.timestamp < kdsp_actual->kds_timestamp) {
525 kdsp_actual->kds_timestamp = kd_rec.timestamp;
526 }
527 now = kd_rec.timestamp;
528 } else {
529 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
530 now = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
531 } else {
532 now = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
533 }
534 }
535
536 if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
537 goto retry_q;
538 }
539
540 kd = &kdsp_actual->kds_records[bindx];
541
542 if (kd_ctrl_page->kdebug_flags & KDBG_DEBUGID_64) {
543 /*DebugID has been passed in arg 4*/
544 kd->debugid = 0;
545 } else {
546 kd->debugid = kd_rec.debugid;
547 }
548
549 kd->arg1 = kd_rec.arg1;
550 kd->arg2 = kd_rec.arg2;
551 kd->arg3 = kd_rec.arg3;
552 kd->arg4 = kd_rec.arg4;
553 kd->arg5 = kd_rec.arg5; /* thread ID */
554
555 kdbg_set_timestamp_and_cpu(kd, now, cpu);
556
557 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
558
559 out:
560 enable_preemption();
561 }
562
563 /*
564 * This is a general purpose routine to read the kd_bufs from the storage units.
565 * So it is the responsibility of the caller to make sure they have acquired
566 * the right synchronization primitive so they can retrieve the data in the correct
567 * way.
568 */
569 int
kernel_debug_read(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,user_addr_t buffer,size_t * number,vnode_t vp,vfs_context_t ctx,uint32_t file_version)570 kernel_debug_read(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
571 {
572 size_t count;
573 unsigned int cpu, min_cpu;
574 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
575 int error = 0;
576 kd_buf *tempbuf;
577 uint32_t rcursor;
578 kd_buf lostevent;
579 union kds_ptr kdsp;
580 bool traced_retrograde = false;
581 struct kd_storage *kdsp_actual;
582 struct kd_bufinfo *kdbp;
583 struct kd_bufinfo *min_kdbp;
584 size_t tempbuf_count;
585 uint32_t tempbuf_number;
586 uint32_t old_kdebug_flags;
587 uint32_t old_kdebug_slowcheck;
588 bool out_of_events = false;
589 bool wrapped = false;
590 bool set_preempt = true;
591 bool should_disable = false;
592
593 struct kd_bufinfo *kdbip = kd_data_page->kdbip;
594 struct kd_storage_buffers *kd_bufs = kd_data_page->kd_bufs;
595
596 assert(number != NULL);
597 count = *number / sizeof(kd_buf);
598 *number = 0;
599
600 if (count == 0 || !(kd_ctrl_page->kdebug_flags & KDBG_BUFINIT) || kd_data_page->kdcopybuf == 0) {
601 return EINVAL;
602 }
603
604 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
605 /*
606 * A corpse can be created due to 'TASK_HAS_TOO_MANY_THREADS'
607 * and that can be handled by a callout thread that already
608 * has the eager-preemption set.
609 * So check to see if we are dealing with one such thread.
610 */
611 set_preempt = !(thread_is_eager_preempt(current_thread()));
612 }
613
614 if (set_preempt) {
615 thread_set_eager_preempt(current_thread());
616 }
617
618 memset(&lostevent, 0, sizeof(lostevent));
619 lostevent.debugid = TRACE_LOST_EVENTS;
620
621 /*
622 * Capture the current time. Only sort events that have occured
623 * before now. Since the IOPs are being flushed here, it is possible
624 * that events occur on the AP while running live tracing.
625 */
626 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
627 barrier_max = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
628 } else {
629 barrier_max = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
630 }
631
632 /*
633 * Disable wrap so storage units cannot be stolen out from underneath us
634 * while merging events.
635 *
636 * Because we hold ktrace_lock, no other control threads can be playing
637 * with kdebug_flags. The code that emits new events could be running,
638 * but it grabs kds_spin_lock if it needs to acquire a new storage
639 * chunk, which is where it examines kdebug_flags. If it is adding to
640 * the same chunk we're reading from, check for that below.
641 */
642 wrapped = disable_wrap(kd_ctrl_page, &old_kdebug_slowcheck, &old_kdebug_flags);
643
644 if (count > kd_data_page->nkdbufs) {
645 count = kd_data_page->nkdbufs;
646 }
647
648 if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
649 tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
650 }
651
652 /*
653 * If the buffers have wrapped, do not emit additional lost events for the
654 * oldest storage units.
655 */
656 if (wrapped) {
657 kd_ctrl_page->kdebug_flags &= ~KDBG_WRAPPED;
658
659 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
660 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
661 continue;
662 }
663 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
664 kdsp_actual->kds_lostevents = false;
665 }
666 }
667
668 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
669 /*
670 * In TRIAGE mode we want to extract all the current
671 * records regardless of where we stopped reading last
672 * time so that we have the best shot at getting older
673 * records for threads before the buffers are wrapped.
674 * So set readlast to 0 to initiate the search from the
675 * 1st record.
676 */
677 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
678 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
679 continue;
680 }
681 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
682 kdsp_actual->kds_readlast = 0;
683 }
684 }
685
686 /*
687 * Capture the earliest time where there are events for all CPUs and don't
688 * emit events with timestamps prior.
689 */
690 barrier_min = kd_ctrl_page->oldest_time;
691
692 while (count) {
693 tempbuf = kd_data_page->kdcopybuf;
694 tempbuf_number = 0;
695
696 if (wrapped) {
697 /*
698 * Emit a lost events tracepoint to indicate that previous events
699 * were lost -- the thread map cannot be trusted. A new one must
700 * be taken so tools can analyze the trace in a backwards-facing
701 * fashion.
702 */
703 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
704 *tempbuf = lostevent;
705 wrapped = false;
706 goto nextevent;
707 }
708
709 /* While space left in merged events scratch buffer. */
710 while (tempbuf_count) {
711 bool lostevents = false;
712 int lostcpu = 0;
713 earliest_time = UINT64_MAX;
714 min_kdbp = NULL;
715 min_cpu = 0;
716
717 /* Check each CPU's buffers for the earliest event. */
718 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
719 /* Skip CPUs without data in their oldest storage unit. */
720 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
721 next_cpu:
722 continue;
723 }
724 /* From CPU data to buffer header to buffer. */
725 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
726
727 next_event:
728 /* The next event to be read from this buffer. */
729 rcursor = kdsp_actual->kds_readlast;
730
731 /* Skip this buffer if there are no events left. */
732 if (rcursor == kdsp_actual->kds_bufindx) {
733 continue;
734 }
735
736 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
737 /*
738 * TRIAGE mode record keeping doesn't (currently)
739 * use lostevent markers. It also doesn't want to
740 * call release_storage_unit() in this read call.
741 * It expects the buffers to wrap and records reclaimed
742 * in that way solely.
743 */
744 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
745 goto skip_record_checks;
746 }
747
748 /*
749 * Check that this storage unit wasn't stolen and events were
750 * lost. This must have happened while wrapping was disabled
751 * in this function.
752 */
753 if (kdsp_actual->kds_lostevents) {
754 lostevents = true;
755 kdsp_actual->kds_lostevents = false;
756
757 /*
758 * The earliest event we can trust is the first one in this
759 * stolen storage unit.
760 */
761 uint64_t lost_time =
762 kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
763 if (kd_ctrl_page->oldest_time < lost_time) {
764 /*
765 * If this is the first time we've seen lost events for
766 * this gap, record its timestamp as the oldest
767 * timestamp we're willing to merge for the lost events
768 * tracepoint.
769 */
770 kd_ctrl_page->oldest_time = barrier_min = lost_time;
771 lostcpu = cpu;
772 }
773 }
774
775 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
776
777 if (t > barrier_max) {
778 if (kdbg_debug) {
779 printf("kdebug: FUTURE EVENT: debugid %#8x: "
780 "time %lld from CPU %u "
781 "(barrier at time %lld, read %lu events)\n",
782 kdsp_actual->kds_records[rcursor].debugid,
783 t, cpu, barrier_max, *number + tempbuf_number);
784 }
785 goto next_cpu;
786 }
787 if (t < kdsp_actual->kds_timestamp) {
788 /*
789 * This indicates the event emitter hasn't completed
790 * filling in the event (becuase we're looking at the
791 * buffer that the record head is using). The max barrier
792 * timestamp should have saved us from seeing these kinds
793 * of things, but other CPUs might be slow on the up-take.
794 *
795 * Bail out so we don't get out-of-order events by
796 * continuing to read events from other CPUs' events.
797 */
798 out_of_events = true;
799 break;
800 }
801
802 /*
803 * Ignore events that have aged out due to wrapping or storage
804 * unit exhaustion while merging events.
805 */
806 if (t < barrier_min) {
807 kdsp_actual->kds_readlast++;
808 if (kdbg_debug) {
809 printf("kdebug: PAST EVENT: debugid %#8x: "
810 "time %lld from CPU %u "
811 "(barrier at time %lld)\n",
812 kdsp_actual->kds_records[rcursor].debugid,
813 t, cpu, barrier_min);
814 }
815
816 if (kdsp_actual->kds_readlast >= kd_ctrl_page->kdebug_events_per_storage_unit) {
817 release_storage_unit(kd_ctrl_page, kd_data_page, cpu, kdsp.raw);
818
819 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
820 goto next_cpu;
821 }
822 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
823 }
824 goto next_event;
825 }
826
827 /*
828 * Don't worry about merging any events -- just walk through
829 * the CPUs and find the latest timestamp of lost events.
830 */
831 if (lostevents) {
832 continue;
833 }
834 skip_record_checks:
835 if (t < earliest_time) {
836 earliest_time = t;
837 min_kdbp = kdbp;
838 min_cpu = cpu;
839 }
840 }
841 if (lostevents) {
842 /*
843 * If any lost events were hit in the buffers, emit an event
844 * with the latest timestamp.
845 */
846 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu);
847 *tempbuf = lostevent;
848 tempbuf->arg1 = 1;
849 goto nextevent;
850 }
851 if (min_kdbp == NULL) {
852 /* All buffers ran empty. */
853 out_of_events = true;
854 }
855 if (out_of_events) {
856 break;
857 }
858
859 kdsp = min_kdbp->kd_list_head;
860 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
861
862 /* Copy earliest event into merged events scratch buffer. */
863 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
864 kd_buf *earliest_event = tempbuf;
865 if (kd_ctrl_page_trace.kdebug_flags & KDBG_MATCH_DISABLE) {
866 kd_event_matcher *match = &kd_ctrl_page_trace.disable_event_match;
867 kd_event_matcher *mask = &kd_ctrl_page_trace.disable_event_mask;
868 if ((earliest_event->debugid & mask->kem_debugid) == match->kem_debugid &&
869 (earliest_event->arg1 & mask->kem_args[0]) == match->kem_args[0] &&
870 (earliest_event->arg2 & mask->kem_args[1]) == match->kem_args[1] &&
871 (earliest_event->arg3 & mask->kem_args[2]) == match->kem_args[2] &&
872 (earliest_event->arg4 & mask->kem_args[3]) == match->kem_args[3]) {
873 should_disable = true;
874 }
875 }
876
877 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
878 if (kdsp_actual->kds_readlast == kd_ctrl_page->kdebug_events_per_storage_unit) {
879 release_storage_unit(kd_ctrl_page, kd_data_page, min_cpu, kdsp.raw);
880 }
881 }
882
883 /*
884 * Watch for out of order timestamps (from IOPs).
885 */
886 if (earliest_time < min_kdbp->kd_prev_timebase) {
887 /*
888 * If we haven't already, emit a retrograde events event.
889 * Otherwise, ignore this event.
890 */
891 if (traced_retrograde) {
892 continue;
893 }
894 if (kdbg_debug) {
895 printf("kdebug: RETRO EVENT: debugid %#8x: "
896 "time %lld from CPU %u "
897 "(barrier at time %lld)\n",
898 kdsp_actual->kds_records[rcursor].debugid,
899 t, cpu, barrier_min);
900 }
901
902 kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
903 tempbuf->arg1 = tempbuf->debugid;
904 tempbuf->arg2 = (kd_buf_argtype)earliest_time;
905 tempbuf->arg3 = 0;
906 tempbuf->arg4 = 0;
907 tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
908 traced_retrograde = true;
909 } else {
910 min_kdbp->kd_prev_timebase = earliest_time;
911 }
912 nextevent:
913 tempbuf_count--;
914 tempbuf_number++;
915 tempbuf++;
916
917 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE && (RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) {
918 break;
919 }
920 }
921
922 if (tempbuf_number) {
923 /*
924 * Remember the latest timestamp of events that we've merged so we
925 * don't think we've lost events later.
926 */
927 uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1);
928 if (kd_ctrl_page->oldest_time < latest_time) {
929 kd_ctrl_page->oldest_time = latest_time;
930 }
931
932 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
933 error = kernel_debug_trace_write_to_file(&buffer, number, &count, tempbuf_number, vp, ctx, file_version);
934 if (error) {
935 goto check_error;
936 }
937 } else if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
938 memcpy((void*)buffer, kd_data_page->kdcopybuf, tempbuf_number * sizeof(kd_buf));
939
940 buffer += (tempbuf_number * sizeof(kd_buf));
941 } else {
942 panic("kernel_debug_read: Invalid Mode");
943 }
944 check_error:
945 if (error) {
946 *number = 0;
947 error = EINVAL;
948 break;
949 }
950 count -= tempbuf_number;
951 *number += tempbuf_number;
952 }
953 if (out_of_events) {
954 /*
955 * all trace buffers are empty
956 */
957 break;
958 }
959
960 if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
961 tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
962 }
963 }
964 if (!(old_kdebug_flags & KDBG_NOWRAP)) {
965 enable_wrap(kd_ctrl_page, old_kdebug_slowcheck);
966 }
967
968 if (set_preempt) {
969 thread_clear_eager_preempt(current_thread());
970 }
971
972 if (should_disable) {
973 kernel_debug_disable();
974 }
975
976 return error;
977 }
978