1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/kdebug_common.h>
24
25 LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug");
26 int kdbg_debug = 0;
27
28 extern struct kd_ctrl_page_t kd_ctrl_page_trace, kd_ctrl_page_triage;
29
30 void
kdebug_lck_init(void)31 kdebug_lck_init(void)
32 {
33 lck_spin_init(&kd_ctrl_page_trace.kds_spin_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
34 lck_spin_init(&kd_ctrl_page_triage.kds_spin_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
35 }
36
37 inline int
_storage_lock(struct kd_ctrl_page_t * kd_ctrl_page)38 _storage_lock(struct kd_ctrl_page_t *kd_ctrl_page)
39 {
40 int intrs_en = ml_set_interrupts_enabled(false);
41 lck_spin_lock_grp(&kd_ctrl_page->kds_spin_lock, &kdebug_lck_grp);
42 return intrs_en;
43 }
44
45 inline void
_storage_unlock(struct kd_ctrl_page_t * kd_ctrl_page,int intrs_en)46 _storage_unlock(struct kd_ctrl_page_t *kd_ctrl_page, int intrs_en)
47 {
48 lck_spin_unlock(&kd_ctrl_page->kds_spin_lock);
49 ml_set_interrupts_enabled(intrs_en);
50 }
51
52
53 extern int kernel_debug_trace_write_to_file(user_addr_t *buffer, size_t *number, size_t *count, size_t tempbuf_number, vnode_t vp, vfs_context_t ctx, uint32_t file_version);
54
55 uint32_t
kdbg_cpu_count(bool early_trace)56 kdbg_cpu_count(bool early_trace)
57 {
58 if (early_trace) {
59 #if defined(__x86_64__)
60 return max_ncpus;
61 #else /* defined(__x86_64__) */
62 return ml_get_cpu_count();
63 #endif /* !defined(__x86_64__) */
64 }
65
66 #if defined(__x86_64__)
67 host_basic_info_data_t hinfo;
68 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
69 host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
70 assert(hinfo.logical_cpu_max > 0);
71 return hinfo.logical_cpu_max;
72 #else /* defined(__x86_64__) */
73 return ml_get_topology_info()->max_cpu_id + 1;
74 #endif /* !defined(__x86_64__) */
75 }
76
77 /*
78 * Both kdebug_timestamp and kdebug_using_continuous_time are known
79 * to kexts. And going forward we always want to use mach_continuous_time().
80 * So we keep these 2 routines as-is to keep the TRACE mode use outside
81 * the kernel intact. TRIAGE mode will explicitly only use mach_continuous_time()
82 * for its timestamp.
83 */
84 bool
kdebug_using_continuous_time(void)85 kdebug_using_continuous_time(void)
86 {
87 return kd_ctrl_page_trace.kdebug_flags & KDBG_CONTINUOUS_TIME;
88 }
89
90 uint64_t
kdebug_timestamp(void)91 kdebug_timestamp(void)
92 {
93 if (kdebug_using_continuous_time()) {
94 return mach_continuous_time();
95 } else {
96 return mach_absolute_time();
97 }
98 }
99
100
101 int
create_buffers(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,vm_tag_t tag)102 create_buffers(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, vm_tag_t tag)
103 {
104 unsigned int i;
105 unsigned int p_buffer_size;
106 unsigned int f_buffer_size;
107 unsigned int f_buffers;
108 int error = 0;
109 int ncpus, count_storage_units = 0;
110 vm_size_t kd_buf_size;
111
112 struct kd_bufinfo *kdbip = NULL;
113 struct kd_storage_buffers *kd_bufs = NULL;
114 int n_storage_units = kd_data_page->n_storage_units;
115
116 ncpus = kd_ctrl_page->kdebug_cpus;
117
118 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_data_page->kdbip, sizeof(struct kd_bufinfo) * ncpus, tag) != KERN_SUCCESS) {
119 error = ENOSPC;
120 goto out;
121 }
122 kdbip = kd_data_page->kdbip;
123
124 f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
125 kd_data_page->n_storage_buffer = f_buffers;
126
127 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
128 p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
129
130 if (p_buffer_size) {
131 kd_data_page->n_storage_buffer++;
132 }
133
134 if (kd_data_page->kdcopybuf == 0) {
135 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_data_page->kdcopybuf, (vm_size_t) kd_ctrl_page->kdebug_kdcopybuf_size, tag) != KERN_SUCCESS) {
136 error = ENOSPC;
137 goto out;
138 }
139 }
140
141 kd_buf_size = (vm_size_t) (kd_data_page->n_storage_buffer * sizeof(struct kd_storage_buffers));
142 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_data_page->kd_bufs, kd_buf_size, tag) != KERN_SUCCESS) {
143 error = ENOSPC;
144 goto out;
145 }
146 kd_bufs = kd_data_page->kd_bufs;
147 bzero(kd_bufs, kd_buf_size);
148
149 for (i = 0; i < f_buffers; i++) {
150 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size, tag) != KERN_SUCCESS) {
151 error = ENOSPC;
152 goto out;
153 }
154 bzero(kd_bufs[i].kdsb_addr, f_buffer_size);
155
156 kd_bufs[i].kdsb_size = f_buffer_size;
157 }
158 if (p_buffer_size) {
159 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size, tag) != KERN_SUCCESS) {
160 error = ENOSPC;
161 goto out;
162 }
163 bzero(kd_bufs[i].kdsb_addr, p_buffer_size);
164
165 kd_bufs[i].kdsb_size = p_buffer_size;
166 }
167
168 count_storage_units = 0;
169 for (i = 0; i < kd_data_page->n_storage_buffer; i++) {
170 struct kd_storage *kds;
171 uint16_t n_elements;
172 static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX);
173 assert(kd_bufs[i].kdsb_size <= N_STORAGE_UNITS_PER_BUFFER *
174 sizeof(struct kd_storage));
175
176 n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
177 kds = kd_bufs[i].kdsb_addr;
178
179 for (uint16_t n = 0; n < n_elements; n++) {
180 kds[n].kds_next.buffer_index = kd_ctrl_page->kds_free_list.buffer_index;
181 kds[n].kds_next.offset = kd_ctrl_page->kds_free_list.offset;
182
183 kd_ctrl_page->kds_free_list.buffer_index = i;
184 kd_ctrl_page->kds_free_list.offset = n;
185 }
186 count_storage_units += n_elements;
187 }
188
189 kd_data_page->n_storage_units = count_storage_units;
190
191 bzero((char *)kdbip, sizeof(struct kd_bufinfo) * ncpus);
192
193 for (i = 0; i < ncpus; i++) {
194 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
195 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
196 kdbip[i].kd_lostevents = false;
197 kdbip[i].num_bufs = 0;
198 }
199
200 kd_ctrl_page->kdebug_flags |= KDBG_BUFINIT;
201
202 kd_ctrl_page->kds_inuse_count = 0;
203 out:
204 if (error) {
205 delete_buffers(kd_ctrl_page, kd_data_page);
206 }
207
208 return error;
209 }
210
211 void
delete_buffers(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page)212 delete_buffers(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page)
213 {
214 unsigned int i;
215 int n_storage_buffer = kd_data_page->n_storage_buffer;
216
217 struct kd_bufinfo *kdbip = kd_data_page->kdbip;
218 struct kd_storage_buffers *kd_bufs = kd_data_page->kd_bufs;
219
220 if (kd_bufs) {
221 for (i = 0; i < n_storage_buffer; i++) {
222 if (kd_bufs[i].kdsb_addr) {
223 kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
224 }
225 }
226 kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffer * sizeof(struct kd_storage_buffers)));
227
228 kd_data_page->kd_bufs = NULL;
229 kd_data_page->n_storage_buffer = 0;
230 }
231 if (kd_data_page->kdcopybuf) {
232 kmem_free(kernel_map, (vm_offset_t)kd_data_page->kdcopybuf, kd_ctrl_page->kdebug_kdcopybuf_size);
233
234 kd_data_page->kdcopybuf = NULL;
235 }
236 kd_ctrl_page->kds_free_list.raw = KDS_PTR_NULL;
237
238 if (kdbip) {
239 kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page->kdebug_cpus);
240
241 kd_data_page->kdbip = NULL;
242 }
243 kd_ctrl_page->kdebug_iops = NULL;
244 kd_ctrl_page->kdebug_cpus = 0;
245 kd_ctrl_page->kdebug_flags &= ~KDBG_BUFINIT;
246 }
247
248 bool
allocate_storage_unit(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,int cpu)249 allocate_storage_unit(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, int cpu)
250 {
251 union kds_ptr kdsp;
252 struct kd_storage *kdsp_actual, *kdsp_next_actual;
253 struct kd_bufinfo *kdbip, *kdbp, *kdbp_vict, *kdbp_try;
254 uint64_t oldest_ts, ts;
255 bool retval = true;
256 struct kd_storage_buffers *kd_bufs;
257
258 int intrs_en = _storage_lock(kd_ctrl_page);
259
260 kdbp = &kd_data_page->kdbip[cpu];
261 kd_bufs = kd_data_page->kd_bufs;
262 kdbip = kd_data_page->kdbip;
263
264 /* If someone beat us to the allocate, return success */
265 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
266 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail);
267
268 if (kdsp_actual->kds_bufindx < kd_ctrl_page->kdebug_events_per_storage_unit) {
269 goto out;
270 }
271 }
272
273 if ((kdsp = kd_ctrl_page->kds_free_list).raw != KDS_PTR_NULL) {
274 /*
275 * If there's a free page, grab it from the free list.
276 */
277 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
278 kd_ctrl_page->kds_free_list = kdsp_actual->kds_next;
279
280 kd_ctrl_page->kds_inuse_count++;
281 } else {
282 /*
283 * Otherwise, we're going to lose events and repurpose the oldest
284 * storage unit we can find.
285 */
286 if (kd_ctrl_page->kdebug_flags & KDBG_NOWRAP) {
287 kd_ctrl_page->kdebug_slowcheck |= SLOW_NOLOG;
288 kdbp->kd_lostevents = true;
289 retval = false;
290 goto out;
291 }
292 kdbp_vict = NULL;
293 oldest_ts = UINT64_MAX;
294
295 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page->kdebug_cpus]; kdbp_try++) {
296 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
297 /*
298 * no storage unit to steal
299 */
300 continue;
301 }
302
303 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_try->kd_list_head);
304
305 if (kdsp_actual->kds_bufcnt < kd_ctrl_page->kdebug_events_per_storage_unit) {
306 /*
307 * make sure we don't steal the storage unit
308 * being actively recorded to... need to
309 * move on because we don't want an out-of-order
310 * set of events showing up later
311 */
312 continue;
313 }
314
315 /*
316 * When wrapping, steal the storage unit with the
317 * earliest timestamp on its last event, instead of the
318 * earliest timestamp on the first event. This allows a
319 * storage unit with more recent events to be preserved,
320 * even if the storage unit contains events that are
321 * older than those found in other CPUs.
322 */
323 ts = kdbg_get_timestamp(&kdsp_actual->kds_records[kd_ctrl_page->kdebug_events_per_storage_unit - 1]);
324 if (ts < oldest_ts) {
325 oldest_ts = ts;
326 kdbp_vict = kdbp_try;
327 }
328 }
329 if (kdbp_vict == NULL && kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
330 kdebug_enable = 0;
331 kd_ctrl_page->enabled = 0;
332 commpage_update_kdebug_state();
333 retval = false;
334 goto out;
335 }
336 kdsp = kdbp_vict->kd_list_head;
337 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
338 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
339
340 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
341 kdsp_next_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_vict->kd_list_head);
342 kdsp_next_actual->kds_lostevents = true;
343 } else {
344 kdbp_vict->kd_lostevents = true;
345 }
346
347 if (kd_ctrl_page->oldest_time < oldest_ts) {
348 kd_ctrl_page->oldest_time = oldest_ts;
349 }
350 kd_ctrl_page->kdebug_flags |= KDBG_WRAPPED;
351 }
352
353 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
354 kdsp_actual->kds_timestamp = kdebug_timestamp();
355 } else {
356 kdsp_actual->kds_timestamp = mach_continuous_time();
357 }
358
359 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
360 kdsp_actual->kds_bufcnt = 0;
361 kdsp_actual->kds_readlast = 0;
362
363 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
364 kdbp->kd_lostevents = false;
365 kdsp_actual->kds_bufindx = 0;
366
367 if (kdbp->kd_list_head.raw == KDS_PTR_NULL) {
368 kdbp->kd_list_head = kdsp;
369 } else {
370 POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail)->kds_next = kdsp;
371 }
372 kdbp->kd_list_tail = kdsp;
373 out:
374 _storage_unlock(kd_ctrl_page, intrs_en);
375
376 return retval;
377 }
378
379 void
release_storage_unit(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,int cpu,uint32_t kdsp_raw)380 release_storage_unit(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, int cpu, uint32_t kdsp_raw)
381 {
382 struct kd_storage *kdsp_actual;
383 struct kd_bufinfo *kdbp;
384 union kds_ptr kdsp;
385
386 kdbp = &kd_data_page->kdbip[cpu];
387
388 kdsp.raw = kdsp_raw;
389
390 int intrs_en = _storage_lock(kd_ctrl_page);
391
392 if (kdsp.raw == kdbp->kd_list_head.raw) {
393 /*
394 * it's possible for the storage unit pointed to
395 * by kdsp to have already been stolen... so
396 * check to see if it's still the head of the list
397 * now that we're behind the lock that protects
398 * adding and removing from the queue...
399 * since we only ever release and steal units from
400 * that position, if it's no longer the head
401 * we having nothing to do in this context
402 */
403 kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kdsp);
404 kdbp->kd_list_head = kdsp_actual->kds_next;
405
406 kdsp_actual->kds_next = kd_ctrl_page->kds_free_list;
407 kd_ctrl_page->kds_free_list = kdsp;
408
409 kd_ctrl_page->kds_inuse_count--;
410 }
411
412 _storage_unlock(kd_ctrl_page, intrs_en);
413 }
414
415
416 /*
417 * Disable wrapping and return true if trace wrapped, false otherwise.
418 */
419 bool
disable_wrap(struct kd_ctrl_page_t * kd_ctrl_page,uint32_t * old_slowcheck,uint32_t * old_flags)420 disable_wrap(struct kd_ctrl_page_t *kd_ctrl_page, uint32_t *old_slowcheck, uint32_t *old_flags)
421 {
422 bool wrapped;
423 int intrs_en = _storage_lock(kd_ctrl_page);
424
425 *old_slowcheck = kd_ctrl_page->kdebug_slowcheck;
426 *old_flags = kd_ctrl_page->kdebug_flags;
427
428 wrapped = kd_ctrl_page->kdebug_flags & KDBG_WRAPPED;
429 kd_ctrl_page->kdebug_flags &= ~KDBG_WRAPPED;
430 kd_ctrl_page->kdebug_flags |= KDBG_NOWRAP;
431
432 _storage_unlock(kd_ctrl_page, intrs_en);
433
434 return wrapped;
435 }
436
437 void
enable_wrap(struct kd_ctrl_page_t * kd_ctrl_page,uint32_t old_slowcheck)438 enable_wrap(struct kd_ctrl_page_t *kd_ctrl_page, uint32_t old_slowcheck)
439 {
440 int intrs_en = _storage_lock(kd_ctrl_page);
441
442 kd_ctrl_page->kdebug_flags &= ~KDBG_NOWRAP;
443
444 if (!(old_slowcheck & SLOW_NOLOG)) {
445 kd_ctrl_page->kdebug_slowcheck &= ~SLOW_NOLOG;
446 }
447 _storage_unlock(kd_ctrl_page, intrs_en);
448 }
449
450 __attribute__((always_inline))
451 void
kernel_debug_write(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,struct kd_record kd_rec)452 kernel_debug_write(struct kd_ctrl_page_t *kd_ctrl_page,
453 struct kd_data_page_t *kd_data_page,
454 struct kd_record kd_rec)
455 {
456 uint64_t now = 0;
457 uint32_t bindx;
458 kd_buf *kd;
459 int cpu;
460 struct kd_bufinfo *kdbp;
461 struct kd_storage *kdsp_actual;
462 union kds_ptr kds_raw;
463
464 disable_preemption();
465
466 if (kd_ctrl_page->enabled == 0) {
467 goto out;
468 }
469
470 if (kd_rec.cpu == -1) {
471 cpu = cpu_number();
472 } else {
473 cpu = kd_rec.cpu;
474 }
475
476 kdbp = &kd_data_page->kdbip[cpu];
477
478 bool timestamp_is_continuous = kdbp->continuous_timestamps;
479
480 if (kd_rec.timestamp != -1) {
481 if (kdebug_using_continuous_time()) {
482 if (!timestamp_is_continuous) {
483 kd_rec.timestamp = absolutetime_to_continuoustime(kd_rec.timestamp);
484 }
485 } else {
486 if (timestamp_is_continuous) {
487 kd_rec.timestamp = continuoustime_to_absolutetime(kd_rec.timestamp);
488 }
489 }
490
491 if (kd_rec.timestamp < kd_ctrl_page->oldest_time) {
492 goto out;
493 }
494
495 kd_rec.timestamp &= KDBG_TIMESTAMP_MASK;
496 }
497
498 retry_q:
499 kds_raw = kdbp->kd_list_tail;
500
501 if (kds_raw.raw != KDS_PTR_NULL) {
502 kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kds_raw);
503 bindx = kdsp_actual->kds_bufindx;
504 } else {
505 kdsp_actual = NULL;
506 bindx = kd_ctrl_page->kdebug_events_per_storage_unit;
507 }
508
509 if (kdsp_actual == NULL || bindx >= kd_ctrl_page->kdebug_events_per_storage_unit) {
510 if (allocate_storage_unit(kd_ctrl_page, kd_data_page, cpu) == false) {
511 /*
512 * this can only happen if wrapping
513 * has been disabled
514 */
515 goto out;
516 }
517 goto retry_q;
518 }
519
520 if (kd_rec.timestamp != -1) {
521 /*
522 * IOP entries can be allocated before xnu allocates and inits the buffer
523 * And, Intel uses a special 0 value as a early tracing timestamp sentinel
524 * to set the start of trace-time-start-of-interest.
525 */
526 if (kd_rec.timestamp < kdsp_actual->kds_timestamp) {
527 kdsp_actual->kds_timestamp = kd_rec.timestamp;
528 }
529 now = kd_rec.timestamp;
530 } else {
531 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
532 now = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
533 } else {
534 now = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
535 }
536 }
537
538 if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
539 goto retry_q;
540 }
541
542 kd = &kdsp_actual->kds_records[bindx];
543
544 if (kd_ctrl_page->kdebug_flags & KDBG_DEBUGID_64) {
545 /*DebugID has been passed in arg 4*/
546 kd->debugid = 0;
547 } else {
548 kd->debugid = kd_rec.debugid;
549 }
550
551 kd->arg1 = kd_rec.arg1;
552 kd->arg2 = kd_rec.arg2;
553 kd->arg3 = kd_rec.arg3;
554 kd->arg4 = kd_rec.arg4;
555 kd->arg5 = kd_rec.arg5; /* thread ID */
556
557 kdbg_set_timestamp_and_cpu(kd, now, cpu);
558
559 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
560
561 out:
562 enable_preemption();
563 }
564
565 /*
566 * This is a general purpose routine to read the kd_bufs from the storage units.
567 * So it is the responsibility of the caller to make sure they have acquired
568 * the right synchronization primitive so they can retrieve the data in the correct
569 * way.
570 */
571 int
kernel_debug_read(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,user_addr_t buffer,size_t * number,vnode_t vp,vfs_context_t ctx,uint32_t file_version)572 kernel_debug_read(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
573 {
574 size_t count;
575 unsigned int cpu, min_cpu;
576 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
577 int error = 0;
578 kd_buf *tempbuf;
579 uint32_t rcursor;
580 kd_buf lostevent;
581 union kds_ptr kdsp;
582 bool traced_retrograde = false;
583 struct kd_storage *kdsp_actual;
584 struct kd_bufinfo *kdbp;
585 struct kd_bufinfo *min_kdbp;
586 size_t tempbuf_count;
587 uint32_t tempbuf_number;
588 uint32_t old_kdebug_flags;
589 uint32_t old_kdebug_slowcheck;
590 bool out_of_events = false;
591 bool wrapped = false, set_preempt = true;
592
593 struct kd_bufinfo *kdbip = kd_data_page->kdbip;
594 struct kd_storage_buffers *kd_bufs = kd_data_page->kd_bufs;
595
596 assert(number != NULL);
597 count = *number / sizeof(kd_buf);
598 *number = 0;
599
600 if (count == 0 || !(kd_ctrl_page->kdebug_flags & KDBG_BUFINIT) || kd_data_page->kdcopybuf == 0) {
601 return EINVAL;
602 }
603
604 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
605 /*
606 * A corpse can be created due to 'TASK_HAS_TOO_MANY_THREADS'
607 * and that can be handled by a callout thread that already
608 * has the eager-preemption set.
609 * So check to see if we are dealing with one such thread.
610 */
611 set_preempt = !(thread_is_eager_preempt(current_thread()));
612 }
613
614 if (set_preempt) {
615 thread_set_eager_preempt(current_thread());
616 }
617
618 memset(&lostevent, 0, sizeof(lostevent));
619 lostevent.debugid = TRACE_LOST_EVENTS;
620
621 /*
622 * Capture the current time. Only sort events that have occured
623 * before now. Since the IOPs are being flushed here, it is possible
624 * that events occur on the AP while running live tracing.
625 */
626 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
627 barrier_max = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
628 } else {
629 barrier_max = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
630 }
631
632 /*
633 * Disable wrap so storage units cannot be stolen out from underneath us
634 * while merging events.
635 *
636 * Because we hold ktrace_lock, no other control threads can be playing
637 * with kdebug_flags. The code that emits new events could be running,
638 * but it grabs kds_spin_lock if it needs to acquire a new storage
639 * chunk, which is where it examines kdebug_flags. If it is adding to
640 * the same chunk we're reading from, check for that below.
641 */
642 wrapped = disable_wrap(kd_ctrl_page, &old_kdebug_slowcheck, &old_kdebug_flags);
643
644 if (count > kd_data_page->nkdbufs) {
645 count = kd_data_page->nkdbufs;
646 }
647
648 if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
649 tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
650 }
651
652 /*
653 * If the buffers have wrapped, do not emit additional lost events for the
654 * oldest storage units.
655 */
656 if (wrapped) {
657 kd_ctrl_page->kdebug_flags &= ~KDBG_WRAPPED;
658
659 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
660 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
661 continue;
662 }
663 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
664 kdsp_actual->kds_lostevents = false;
665 }
666 }
667
668 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
669 /*
670 * In TRIAGE mode we want to extract all the current
671 * records regardless of where we stopped reading last
672 * time so that we have the best shot at getting older
673 * records for threads before the buffers are wrapped.
674 * So set readlast to 0 to initiate the search from the
675 * 1st record.
676 */
677 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
678 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
679 continue;
680 }
681 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
682 kdsp_actual->kds_readlast = 0;
683 }
684 }
685
686 /*
687 * Capture the earliest time where there are events for all CPUs and don't
688 * emit events with timestamps prior.
689 */
690 barrier_min = kd_ctrl_page->oldest_time;
691
692 while (count) {
693 tempbuf = kd_data_page->kdcopybuf;
694 tempbuf_number = 0;
695
696 if (wrapped) {
697 /*
698 * Emit a lost events tracepoint to indicate that previous events
699 * were lost -- the thread map cannot be trusted. A new one must
700 * be taken so tools can analyze the trace in a backwards-facing
701 * fashion.
702 */
703 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
704 *tempbuf = lostevent;
705 wrapped = false;
706 goto nextevent;
707 }
708
709 /* While space left in merged events scratch buffer. */
710 while (tempbuf_count) {
711 bool lostevents = false;
712 int lostcpu = 0;
713 earliest_time = UINT64_MAX;
714 min_kdbp = NULL;
715 min_cpu = 0;
716
717 /* Check each CPU's buffers for the earliest event. */
718 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
719 /* Skip CPUs without data in their oldest storage unit. */
720 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
721 next_cpu:
722 continue;
723 }
724 /* From CPU data to buffer header to buffer. */
725 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
726
727 next_event:
728 /* The next event to be read from this buffer. */
729 rcursor = kdsp_actual->kds_readlast;
730
731 /* Skip this buffer if there are no events left. */
732 if (rcursor == kdsp_actual->kds_bufindx) {
733 continue;
734 }
735
736 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
737 /*
738 * TRIAGE mode record keeping doesn't (currently)
739 * use lostevent markers. It also doesn't want to
740 * call release_storage_unit() in this read call.
741 * It expects the buffers to wrap and records reclaimed
742 * in that way solely.
743 */
744 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
745 goto skip_record_checks;
746 }
747
748 /*
749 * Check that this storage unit wasn't stolen and events were
750 * lost. This must have happened while wrapping was disabled
751 * in this function.
752 */
753 if (kdsp_actual->kds_lostevents) {
754 lostevents = true;
755 kdsp_actual->kds_lostevents = false;
756
757 /*
758 * The earliest event we can trust is the first one in this
759 * stolen storage unit.
760 */
761 uint64_t lost_time =
762 kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
763 if (kd_ctrl_page->oldest_time < lost_time) {
764 /*
765 * If this is the first time we've seen lost events for
766 * this gap, record its timestamp as the oldest
767 * timestamp we're willing to merge for the lost events
768 * tracepoint.
769 */
770 kd_ctrl_page->oldest_time = barrier_min = lost_time;
771 lostcpu = cpu;
772 }
773 }
774
775 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
776
777 if (t > barrier_max) {
778 if (kdbg_debug) {
779 printf("kdebug: FUTURE EVENT: debugid %#8x: "
780 "time %lld from CPU %u "
781 "(barrier at time %lld, read %lu events)\n",
782 kdsp_actual->kds_records[rcursor].debugid,
783 t, cpu, barrier_max, *number + tempbuf_number);
784 }
785 goto next_cpu;
786 }
787 if (t < kdsp_actual->kds_timestamp) {
788 /*
789 * This indicates the event emitter hasn't completed
790 * filling in the event (becuase we're looking at the
791 * buffer that the record head is using). The max barrier
792 * timestamp should have saved us from seeing these kinds
793 * of things, but other CPUs might be slow on the up-take.
794 *
795 * Bail out so we don't get out-of-order events by
796 * continuing to read events from other CPUs' events.
797 */
798 out_of_events = true;
799 break;
800 }
801
802 /*
803 * Ignore events that have aged out due to wrapping or storage
804 * unit exhaustion while merging events.
805 */
806 if (t < barrier_min) {
807 kdsp_actual->kds_readlast++;
808 if (kdbg_debug) {
809 printf("kdebug: PAST EVENT: debugid %#8x: "
810 "time %lld from CPU %u "
811 "(barrier at time %lld)\n",
812 kdsp_actual->kds_records[rcursor].debugid,
813 t, cpu, barrier_min);
814 }
815
816 if (kdsp_actual->kds_readlast >= kd_ctrl_page->kdebug_events_per_storage_unit) {
817 release_storage_unit(kd_ctrl_page, kd_data_page, cpu, kdsp.raw);
818
819 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
820 goto next_cpu;
821 }
822 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
823 }
824 goto next_event;
825 }
826
827 /*
828 * Don't worry about merging any events -- just walk through
829 * the CPUs and find the latest timestamp of lost events.
830 */
831 if (lostevents) {
832 continue;
833 }
834 skip_record_checks:
835 if (t < earliest_time) {
836 earliest_time = t;
837 min_kdbp = kdbp;
838 min_cpu = cpu;
839 }
840 }
841 if (lostevents) {
842 /*
843 * If any lost events were hit in the buffers, emit an event
844 * with the latest timestamp.
845 */
846 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu);
847 *tempbuf = lostevent;
848 tempbuf->arg1 = 1;
849 goto nextevent;
850 }
851 if (min_kdbp == NULL) {
852 /* All buffers ran empty. */
853 out_of_events = true;
854 }
855 if (out_of_events) {
856 break;
857 }
858
859 kdsp = min_kdbp->kd_list_head;
860 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
861
862 /* Copy earliest event into merged events scratch buffer. */
863 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
864
865 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
866 if (kdsp_actual->kds_readlast == kd_ctrl_page->kdebug_events_per_storage_unit) {
867 release_storage_unit(kd_ctrl_page, kd_data_page, min_cpu, kdsp.raw);
868 }
869 }
870
871 /*
872 * Watch for out of order timestamps (from IOPs).
873 */
874 if (earliest_time < min_kdbp->kd_prev_timebase) {
875 /*
876 * If we haven't already, emit a retrograde events event.
877 * Otherwise, ignore this event.
878 */
879 if (traced_retrograde) {
880 continue;
881 }
882 if (kdbg_debug) {
883 printf("kdebug: RETRO EVENT: debugid %#8x: "
884 "time %lld from CPU %u "
885 "(barrier at time %lld)\n",
886 kdsp_actual->kds_records[rcursor].debugid,
887 t, cpu, barrier_min);
888 }
889
890 kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
891 tempbuf->arg1 = tempbuf->debugid;
892 tempbuf->arg2 = (kd_buf_argtype)earliest_time;
893 tempbuf->arg3 = 0;
894 tempbuf->arg4 = 0;
895 tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
896 traced_retrograde = true;
897 } else {
898 min_kdbp->kd_prev_timebase = earliest_time;
899 }
900 nextevent:
901 tempbuf_count--;
902 tempbuf_number++;
903 tempbuf++;
904
905 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE && (RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) {
906 break;
907 }
908 }
909
910 if (tempbuf_number) {
911 /*
912 * Remember the latest timestamp of events that we've merged so we
913 * don't think we've lost events later.
914 */
915 uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1);
916 if (kd_ctrl_page->oldest_time < latest_time) {
917 kd_ctrl_page->oldest_time = latest_time;
918 }
919
920 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
921 error = kernel_debug_trace_write_to_file(&buffer, number, &count, tempbuf_number, vp, ctx, file_version);
922 if (error) {
923 goto check_error;
924 }
925 } else if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
926 memcpy((void*)buffer, kd_data_page->kdcopybuf, tempbuf_number * sizeof(kd_buf));
927
928 buffer += (tempbuf_number * sizeof(kd_buf));
929 } else {
930 panic("kernel_debug_read: Invalid Mode");
931 }
932 check_error:
933 if (error) {
934 *number = 0;
935 error = EINVAL;
936 break;
937 }
938 count -= tempbuf_number;
939 *number += tempbuf_number;
940 }
941 if (out_of_events == true) {
942 /*
943 * all trace buffers are empty
944 */
945 break;
946 }
947
948 if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
949 tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
950 }
951 }
952 if (!(old_kdebug_flags & KDBG_NOWRAP)) {
953 enable_wrap(kd_ctrl_page, old_kdebug_slowcheck);
954 }
955
956 if (set_preempt) {
957 thread_clear_eager_preempt(current_thread());
958 }
959
960 return error;
961 }
962