xref: /xnu-10002.61.3/bsd/kern/kdebug_common.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @Apple_LICENSE_HEADER_START@
5  *
6  * The contents of this file constitute Original Code as defined in and
7  * are subject to the Apple Public Source License Version 1.1 (the
8  * "License").  You may not use this file except in compliance with the
9  * License.  Please obtain a copy of the License at
10  * http://www.apple.com/publicsource and read it before using this file.
11  *
12  * This Original Code and all software distributed under the License are
13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
17  * License for the specific language governing rights and limitations
18  * under the License.
19  *
20  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21  */
22 
23 #include <sys/kdebug_common.h>
24 
25 LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug");
26 int kdbg_debug = 0;
27 
28 extern struct kd_control kd_control_trace, kd_control_triage;
29 
30 int
kdebug_storage_lock(struct kd_control * kd_ctrl_page)31 kdebug_storage_lock(struct kd_control *kd_ctrl_page)
32 {
33 	int intrs_en = ml_set_interrupts_enabled(false);
34 	lck_spin_lock_grp(&kd_ctrl_page->kdc_storage_lock, &kdebug_lck_grp);
35 	return intrs_en;
36 }
37 
38 void
kdebug_storage_unlock(struct kd_control * kd_ctrl_page,int intrs_en)39 kdebug_storage_unlock(struct kd_control *kd_ctrl_page, int intrs_en)
40 {
41 	lck_spin_unlock(&kd_ctrl_page->kdc_storage_lock);
42 	ml_set_interrupts_enabled(intrs_en);
43 }
44 
45 // Turn on boot tracing and set the number of events.
46 static TUNABLE(unsigned int, new_nkdbufs, "trace", 0);
47 // Enable wrapping during boot tracing.
48 TUNABLE(unsigned int, trace_wrap, "trace_wrap", 0);
49 // The filter description to apply to boot tracing.
50 static TUNABLE_STR(trace_typefilter, 256, "trace_typefilter", "");
51 
52 // Turn on wake tracing and set the number of events.
53 TUNABLE(unsigned int, wake_nkdbufs, "trace_wake", 0);
54 // Write trace events to a file in the event of a panic.
55 TUNABLE(unsigned int, write_trace_on_panic, "trace_panic", 0);
56 
57 // Obsolete leak logging system.
58 TUNABLE(int, log_leaks, "-l", 0);
59 
60 void
kdebug_startup(void)61 kdebug_startup(void)
62 {
63 	lck_spin_init(&kd_control_trace.kdc_storage_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
64 	lck_spin_init(&kd_control_triage.kdc_storage_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
65 	kdebug_init(new_nkdbufs, trace_typefilter,
66 	    (trace_wrap ? KDOPT_WRAPPING : 0) | KDOPT_ATBOOT);
67 	create_buffers_triage();
68 }
69 
70 uint32_t
kdbg_cpu_count(void)71 kdbg_cpu_count(void)
72 {
73 #if defined(__x86_64__)
74 	return ml_early_cpu_max_number() + 1;
75 #else // defined(__x86_64__)
76 	return ml_get_cpu_count();
77 #endif // !defined(__x86_64__)
78 }
79 
80 /*
81  * Both kdebug_timestamp and kdebug_using_continuous_time are known
82  * to kexts. And going forward we always want to use mach_continuous_time().
83  * So we keep these 2 routines as-is to keep the TRACE mode use outside
84  * the kernel intact. TRIAGE mode will explicitly only use mach_continuous_time()
85  * for its timestamp.
86  */
87 bool
kdebug_using_continuous_time(void)88 kdebug_using_continuous_time(void)
89 {
90 	return kd_control_trace.kdc_flags & KDBG_CONTINUOUS_TIME;
91 }
92 
93 uint64_t
kdebug_timestamp(void)94 kdebug_timestamp(void)
95 {
96 	if (kdebug_using_continuous_time()) {
97 		return mach_continuous_time();
98 	} else {
99 		return mach_absolute_time();
100 	}
101 }
102 
103 int
create_buffers(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,vm_tag_t tag)104 create_buffers(
105 	struct kd_control *kd_ctrl_page,
106 	struct kd_buffer *kd_data_page,
107 	vm_tag_t tag)
108 {
109 	unsigned int i;
110 	unsigned int p_buffer_size;
111 	unsigned int f_buffer_size;
112 	unsigned int f_buffers;
113 	int error = 0;
114 	int ncpus, count_storage_units = 0;
115 
116 	struct kd_bufinfo *kdbip = NULL;
117 	struct kd_region *kd_bufs = NULL;
118 	int kdb_storage_count = kd_data_page->kdb_storage_count;
119 
120 	ncpus = kd_ctrl_page->alloc_cpus;
121 
122 	kdbip = kalloc_type_tag(struct kd_bufinfo, ncpus, Z_WAITOK | Z_ZERO, tag);
123 	if (kdbip == NULL) {
124 		error = ENOSPC;
125 		goto out;
126 	}
127 	kd_data_page->kdb_info = kdbip;
128 
129 	f_buffers = kdb_storage_count / N_STORAGE_UNITS_PER_BUFFER;
130 	kd_data_page->kdb_region_count = f_buffers;
131 
132 	f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
133 	p_buffer_size = (kdb_storage_count % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
134 
135 	if (p_buffer_size) {
136 		kd_data_page->kdb_region_count++;
137 	}
138 
139 	if (kd_data_page->kdcopybuf == 0) {
140 		if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_data_page->kdcopybuf,
141 		    (vm_size_t) kd_ctrl_page->kdebug_kdcopybuf_size,
142 		    KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
143 			error = ENOSPC;
144 			goto out;
145 		}
146 	}
147 
148 	kd_bufs = kalloc_type_tag(struct kd_region, kd_data_page->kdb_region_count,
149 	    Z_WAITOK | Z_ZERO, tag);
150 	if (kd_bufs == NULL) {
151 		error = ENOSPC;
152 		goto out;
153 	}
154 	kd_data_page->kd_bufs = kd_bufs;
155 
156 	for (i = 0; i < f_buffers; i++) {
157 		if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdr_addr,
158 		    (vm_size_t)f_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
159 			error = ENOSPC;
160 			goto out;
161 		}
162 
163 		kd_bufs[i].kdr_size = f_buffer_size;
164 	}
165 	if (p_buffer_size) {
166 		if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdr_addr,
167 		    (vm_size_t)p_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
168 			error = ENOSPC;
169 			goto out;
170 		}
171 
172 		kd_bufs[i].kdr_size = p_buffer_size;
173 	}
174 
175 	count_storage_units = 0;
176 	for (i = 0; i < kd_data_page->kdb_region_count; i++) {
177 		struct kd_storage *kds;
178 		uint16_t n_elements;
179 		static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX);
180 		assert(kd_bufs[i].kdr_size <= N_STORAGE_UNITS_PER_BUFFER *
181 		    sizeof(struct kd_storage));
182 
183 		n_elements = kd_bufs[i].kdr_size / sizeof(struct kd_storage);
184 		kds = kd_bufs[i].kdr_addr;
185 
186 		for (uint16_t n = 0; n < n_elements; n++) {
187 			kds[n].kds_next.buffer_index = kd_ctrl_page->kds_free_list.buffer_index;
188 			kds[n].kds_next.offset = kd_ctrl_page->kds_free_list.offset;
189 
190 			kd_ctrl_page->kds_free_list.buffer_index = i;
191 			kd_ctrl_page->kds_free_list.offset = n;
192 		}
193 		count_storage_units += n_elements;
194 	}
195 
196 	kd_data_page->kdb_storage_count = count_storage_units;
197 
198 	for (i = 0; i < ncpus; i++) {
199 		kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
200 		kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
201 		kdbip[i].kd_lostevents = false;
202 		kdbip[i].num_bufs = 0;
203 	}
204 
205 	kd_ctrl_page->kdc_flags |= KDBG_BUFINIT;
206 
207 	kd_ctrl_page->kdc_storage_used = 0;
208 out:
209 	if (error) {
210 		delete_buffers(kd_ctrl_page, kd_data_page);
211 	}
212 
213 	return error;
214 }
215 
216 void
delete_buffers(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page)217 delete_buffers(struct kd_control *kd_ctrl_page,
218     struct kd_buffer *kd_data_page)
219 {
220 	unsigned int i;
221 	int kdb_region_count = kd_data_page->kdb_region_count;
222 
223 	struct kd_bufinfo *kdbip = kd_data_page->kdb_info;
224 	struct kd_region *kd_bufs = kd_data_page->kd_bufs;
225 
226 	if (kd_bufs) {
227 		for (i = 0; i < kdb_region_count; i++) {
228 			if (kd_bufs[i].kdr_addr) {
229 				kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdr_addr, (vm_size_t)kd_bufs[i].kdr_size);
230 			}
231 		}
232 		kfree_type(struct kd_region, kdb_region_count, kd_bufs);
233 
234 		kd_data_page->kd_bufs = NULL;
235 		kd_data_page->kdb_region_count = 0;
236 	}
237 	if (kd_data_page->kdcopybuf) {
238 		kmem_free(kernel_map, (vm_offset_t)kd_data_page->kdcopybuf, kd_ctrl_page->kdebug_kdcopybuf_size);
239 
240 		kd_data_page->kdcopybuf = NULL;
241 	}
242 	kd_ctrl_page->kds_free_list.raw = KDS_PTR_NULL;
243 
244 	if (kdbip) {
245 		kfree_type(struct kd_bufinfo, kd_ctrl_page->alloc_cpus, kdbip);
246 		kd_data_page->kdb_info = NULL;
247 	}
248 	kd_ctrl_page->kdc_coprocs = NULL;
249 	kd_ctrl_page->kdebug_cpus = 0;
250 	kd_ctrl_page->alloc_cpus = 0;
251 	kd_ctrl_page->kdc_flags &= ~KDBG_BUFINIT;
252 }
253 
254 static bool
allocate_storage_unit(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,int cpu)255 allocate_storage_unit(struct kd_control *kd_ctrl_page,
256     struct kd_buffer *kd_data_page, int cpu)
257 {
258 	union kds_ptr kdsp;
259 	struct kd_storage *kdsp_actual, *kdsp_next_actual;
260 	struct kd_bufinfo *kdbip, *kdbp, *kdbp_vict, *kdbp_try;
261 	uint64_t oldest_ts, ts;
262 	bool retval = true;
263 	struct kd_region *kd_bufs;
264 
265 	int intrs_en = kdebug_storage_lock(kd_ctrl_page);
266 
267 	kdbp = &kd_data_page->kdb_info[cpu];
268 	kd_bufs = kd_data_page->kd_bufs;
269 	kdbip = kd_data_page->kdb_info;
270 
271 	/* If someone beat us to the allocate, return success */
272 	if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
273 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail);
274 
275 		if (kdsp_actual->kds_bufindx < kd_ctrl_page->kdebug_events_per_storage_unit) {
276 			goto out;
277 		}
278 	}
279 
280 	if ((kdsp = kd_ctrl_page->kds_free_list).raw != KDS_PTR_NULL) {
281 		/*
282 		 * If there's a free page, grab it from the free list.
283 		 */
284 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
285 		kd_ctrl_page->kds_free_list = kdsp_actual->kds_next;
286 
287 		kd_ctrl_page->kdc_storage_used++;
288 	} else {
289 		/*
290 		 * Otherwise, we're going to lose events and repurpose the oldest
291 		 * storage unit we can find.
292 		 */
293 		if (kd_ctrl_page->kdc_live_flags & KDBG_NOWRAP) {
294 			kd_ctrl_page->kdc_emit = KDEMIT_DISABLE;
295 			kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED;
296 			kdebug_enable = 0;
297 			kd_ctrl_page->enabled = 0;
298 			commpage_update_kdebug_state();
299 			kdbp->kd_lostevents = true;
300 			retval = false;
301 			goto out;
302 		}
303 		kdbp_vict = NULL;
304 		oldest_ts = UINT64_MAX;
305 
306 		for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page->kdebug_cpus]; kdbp_try++) {
307 			if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
308 				/*
309 				 * no storage unit to steal
310 				 */
311 				continue;
312 			}
313 
314 			kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_try->kd_list_head);
315 
316 			if (kdsp_actual->kds_bufcnt < kd_ctrl_page->kdebug_events_per_storage_unit) {
317 				/*
318 				 * make sure we don't steal the storage unit
319 				 * being actively recorded to...  need to
320 				 * move on because we don't want an out-of-order
321 				 * set of events showing up later
322 				 */
323 				continue;
324 			}
325 
326 			/*
327 			 * When wrapping, steal the storage unit with the
328 			 * earliest timestamp on its last event, instead of the
329 			 * earliest timestamp on the first event.  This allows a
330 			 * storage unit with more recent events to be preserved,
331 			 * even if the storage unit contains events that are
332 			 * older than those found in other CPUs.
333 			 */
334 			ts = kdbg_get_timestamp(&kdsp_actual->kds_records[kd_ctrl_page->kdebug_events_per_storage_unit - 1]);
335 			if (ts < oldest_ts) {
336 				oldest_ts = ts;
337 				kdbp_vict = kdbp_try;
338 			}
339 		}
340 		if (kdbp_vict == NULL && kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
341 			kd_ctrl_page->kdc_emit = KDEMIT_DISABLE;
342 			kdebug_enable = 0;
343 			kd_ctrl_page->enabled = 0;
344 			commpage_update_kdebug_state();
345 			retval = false;
346 			goto out;
347 		}
348 		kdsp = kdbp_vict->kd_list_head;
349 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
350 		kdbp_vict->kd_list_head = kdsp_actual->kds_next;
351 
352 		if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
353 			kdsp_next_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_vict->kd_list_head);
354 			kdsp_next_actual->kds_lostevents = true;
355 		} else {
356 			kdbp_vict->kd_lostevents = true;
357 		}
358 
359 		if (kd_ctrl_page->kdc_oldest_time < oldest_ts) {
360 			kd_ctrl_page->kdc_oldest_time = oldest_ts;
361 		}
362 		kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED;
363 	}
364 
365 	if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
366 		kdsp_actual->kds_timestamp = kdebug_timestamp();
367 	} else {
368 		kdsp_actual->kds_timestamp = mach_continuous_time();
369 	}
370 
371 	kdsp_actual->kds_next.raw = KDS_PTR_NULL;
372 	kdsp_actual->kds_bufcnt   = 0;
373 	kdsp_actual->kds_readlast = 0;
374 
375 	kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
376 	kdbp->kd_lostevents = false;
377 	kdsp_actual->kds_bufindx = 0;
378 
379 	if (kdbp->kd_list_head.raw == KDS_PTR_NULL) {
380 		kdbp->kd_list_head = kdsp;
381 	} else {
382 		POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail)->kds_next = kdsp;
383 	}
384 	kdbp->kd_list_tail = kdsp;
385 out:
386 	kdebug_storage_unlock(kd_ctrl_page, intrs_en);
387 
388 	return retval;
389 }
390 
391 static void
release_storage_unit(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,int cpu,uint32_t kdsp_raw)392 release_storage_unit(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, int cpu, uint32_t kdsp_raw)
393 {
394 	struct  kd_storage *kdsp_actual;
395 	struct kd_bufinfo *kdbp;
396 	union kds_ptr kdsp;
397 
398 	kdbp = &kd_data_page->kdb_info[cpu];
399 
400 	kdsp.raw = kdsp_raw;
401 
402 	int intrs_en = kdebug_storage_lock(kd_ctrl_page);
403 
404 	if (kdsp.raw == kdbp->kd_list_head.raw) {
405 		/*
406 		 * it's possible for the storage unit pointed to
407 		 * by kdsp to have already been stolen... so
408 		 * check to see if it's still the head of the list
409 		 * now that we're behind the lock that protects
410 		 * adding and removing from the queue...
411 		 * since we only ever release and steal units from
412 		 * that position, if it's no longer the head
413 		 * we having nothing to do in this context
414 		 */
415 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kdsp);
416 		kdbp->kd_list_head = kdsp_actual->kds_next;
417 
418 		kdsp_actual->kds_next = kd_ctrl_page->kds_free_list;
419 		kd_ctrl_page->kds_free_list = kdsp;
420 
421 		kd_ctrl_page->kdc_storage_used--;
422 	}
423 
424 	kdebug_storage_unlock(kd_ctrl_page, intrs_en);
425 }
426 
427 bool
kdebug_disable_wrap(struct kd_control * ctl,kdebug_emit_filter_t * old_emit,kdebug_live_flags_t * old_live)428 kdebug_disable_wrap(struct kd_control *ctl,
429     kdebug_emit_filter_t *old_emit, kdebug_live_flags_t *old_live)
430 {
431 	int intrs_en = kdebug_storage_lock(ctl);
432 
433 	*old_emit = ctl->kdc_emit;
434 	*old_live = ctl->kdc_live_flags;
435 
436 	bool wrapped = ctl->kdc_live_flags & KDBG_WRAPPED;
437 	ctl->kdc_live_flags &= ~KDBG_WRAPPED;
438 	ctl->kdc_live_flags |= KDBG_NOWRAP;
439 
440 	kdebug_storage_unlock(ctl, intrs_en);
441 
442 	return wrapped;
443 }
444 
445 static void
_enable_wrap(struct kd_control * kd_ctrl_page,kdebug_emit_filter_t emit)446 _enable_wrap(struct kd_control *kd_ctrl_page, kdebug_emit_filter_t emit)
447 {
448 	int intrs_en = kdebug_storage_lock(kd_ctrl_page);
449 	kd_ctrl_page->kdc_live_flags &= ~KDBG_NOWRAP;
450 	if (emit) {
451 		kd_ctrl_page->kdc_emit = emit;
452 	}
453 	kdebug_storage_unlock(kd_ctrl_page, intrs_en);
454 }
455 
456 __attribute__((always_inline))
457 void
kernel_debug_write(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,struct kd_record kd_rec)458 kernel_debug_write(struct kd_control *kd_ctrl_page,
459     struct kd_buffer *kd_data_page,
460     struct kd_record      kd_rec)
461 {
462 	uint64_t now = 0;
463 	uint32_t bindx;
464 	kd_buf *kd;
465 	int cpu;
466 	struct kd_bufinfo *kdbp;
467 	struct kd_storage *kdsp_actual;
468 	union kds_ptr kds_raw;
469 
470 	disable_preemption();
471 
472 	if (kd_ctrl_page->enabled == 0) {
473 		goto out;
474 	}
475 
476 	if (kd_rec.cpu == -1) {
477 		cpu = cpu_number();
478 	} else {
479 		cpu = kd_rec.cpu;
480 	}
481 
482 	kdbp = &kd_data_page->kdb_info[cpu];
483 
484 	bool timestamp_is_continuous = kdbp->continuous_timestamps;
485 
486 	if (kd_rec.timestamp != -1) {
487 		if (kdebug_using_continuous_time()) {
488 			if (!timestamp_is_continuous) {
489 				kd_rec.timestamp = absolutetime_to_continuoustime(kd_rec.timestamp);
490 			}
491 		} else {
492 			if (timestamp_is_continuous) {
493 				kd_rec.timestamp = continuoustime_to_absolutetime(kd_rec.timestamp);
494 			}
495 		}
496 		kd_rec.timestamp &= KDBG_TIMESTAMP_MASK;
497 		if (kd_rec.timestamp < kd_ctrl_page->kdc_oldest_time) {
498 			if (kdbp->latest_past_event_timestamp < kd_rec.timestamp) {
499 				kdbp->latest_past_event_timestamp = kd_rec.timestamp;
500 			}
501 			goto out;
502 		}
503 	}
504 
505 retry_q:
506 	kds_raw = kdbp->kd_list_tail;
507 
508 	if (kds_raw.raw != KDS_PTR_NULL) {
509 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kds_raw);
510 		bindx = kdsp_actual->kds_bufindx;
511 	} else {
512 		kdsp_actual = NULL;
513 		bindx = kd_ctrl_page->kdebug_events_per_storage_unit;
514 	}
515 
516 	if (kdsp_actual == NULL || bindx >= kd_ctrl_page->kdebug_events_per_storage_unit) {
517 		if (allocate_storage_unit(kd_ctrl_page, kd_data_page, cpu) == false) {
518 			/*
519 			 * this can only happen if wrapping
520 			 * has been disabled
521 			 */
522 			goto out;
523 		}
524 		goto retry_q;
525 	}
526 
527 	if (kd_rec.timestamp != -1) {
528 		/*
529 		 * IOP entries can be allocated before xnu allocates and inits the buffer
530 		 * And, Intel uses a special 0 value as a early tracing timestamp sentinel
531 		 * to set the start of trace-time-start-of-interest.
532 		 */
533 		if (kd_rec.timestamp < kdsp_actual->kds_timestamp) {
534 			kdsp_actual->kds_timestamp = kd_rec.timestamp;
535 		}
536 		now = kd_rec.timestamp;
537 	} else {
538 		if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
539 			now = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
540 		} else {
541 			now = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
542 		}
543 	}
544 
545 	if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
546 		goto retry_q;
547 	}
548 
549 	kd = &kdsp_actual->kds_records[bindx];
550 
551 	if (kd_ctrl_page->kdc_flags & KDBG_DEBUGID_64) {
552 		/*DebugID has been passed in arg 4*/
553 		kd->debugid = 0;
554 	} else {
555 		kd->debugid = kd_rec.debugid;
556 	}
557 
558 	kd->arg1 = kd_rec.arg1;
559 	kd->arg2 = kd_rec.arg2;
560 	kd->arg3 = kd_rec.arg3;
561 	kd->arg4 = kd_rec.arg4;
562 	kd->arg5 = kd_rec.arg5;
563 
564 	kdbg_set_timestamp_and_cpu(kd, now, cpu);
565 
566 	OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
567 
568 out:
569 	enable_preemption();
570 }
571 
572 // Read events from kdebug storage units into a user space buffer or file.
573 //
574 // This code runs while events are emitted -- storage unit allocation and
575 // deallocation wll synchronize with the emitters.  Only one reader per control
576 // structure is allowed.
577 int
kernel_debug_read(struct kd_control * kd_ctrl_page,struct kd_buffer * kd_data_page,user_addr_t buffer,size_t * number,vnode_t vp,vfs_context_t ctx,uint32_t file_version)578 kernel_debug_read(struct kd_control *kd_ctrl_page,
579     struct kd_buffer *kd_data_page, user_addr_t buffer, size_t *number,
580     vnode_t vp, vfs_context_t ctx, uint32_t file_version)
581 {
582 	size_t count;
583 	unsigned int cpu, min_cpu;
584 	uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
585 	int error = 0;
586 	kd_buf *tempbuf;
587 	uint32_t rcursor;
588 	kd_buf lostevent;
589 	union kds_ptr kdsp;
590 	bool traced_retrograde = false;
591 	struct kd_storage *kdsp_actual;
592 	struct kd_bufinfo *kdbp;
593 	struct kd_bufinfo *min_kdbp;
594 	size_t tempbuf_count;
595 	uint32_t tempbuf_number;
596 	kdebug_emit_filter_t old_emit;
597 	uint32_t old_live_flags;
598 	bool out_of_events = false;
599 	bool wrapped = false;
600 	bool set_preempt = true;
601 	bool should_disable = false;
602 
603 	struct kd_bufinfo *kdbip = kd_data_page->kdb_info;
604 	struct kd_region *kd_bufs = kd_data_page->kd_bufs;
605 
606 	assert(number != NULL);
607 	count = *number / sizeof(kd_buf);
608 	*number = 0;
609 
610 	if (count == 0 || !(kd_ctrl_page->kdc_flags & KDBG_BUFINIT) || kd_data_page->kdcopybuf == 0) {
611 		return EINVAL;
612 	}
613 
614 	if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
615 		/*
616 		 * A corpse can be created due to 'TASK_HAS_TOO_MANY_THREADS'
617 		 * and that can be handled by a callout thread that already
618 		 * has the eager-preemption set.
619 		 * So check to see if we are dealing with one such thread.
620 		 */
621 		set_preempt = !(thread_is_eager_preempt(current_thread()));
622 	}
623 
624 	if (set_preempt) {
625 		thread_set_eager_preempt(current_thread());
626 	}
627 
628 	memset(&lostevent, 0, sizeof(lostevent));
629 	lostevent.debugid = TRACE_LOST_EVENTS;
630 
631 	/*
632 	 * Capture the current time.  Only sort events that have occured
633 	 * before now.  Since the IOPs are being flushed here, it is possible
634 	 * that events occur on the AP while running live tracing.
635 	 */
636 	if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
637 		barrier_max = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
638 	} else {
639 		barrier_max = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
640 	}
641 
642 	/*
643 	 * Disable wrap so storage units cannot be stolen out from underneath us
644 	 * while merging events.
645 	 *
646 	 * Because we hold ktrace_lock, no other control threads can be playing
647 	 * with kdc_flags.  The code that emits new events could be running,
648 	 * but it grabs kdc_storage_lock if it needs to acquire a new storage
649 	 * chunk, which is where it examines kdc_flags.  If it is adding to
650 	 * the same chunk we're reading from, check for that below.
651 	 */
652 	wrapped = kdebug_disable_wrap(kd_ctrl_page, &old_emit, &old_live_flags);
653 
654 	if (count > kd_data_page->kdb_event_count) {
655 		count = kd_data_page->kdb_event_count;
656 	}
657 
658 	if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
659 		tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
660 	}
661 
662 	/*
663 	 * If the buffers have wrapped, do not emit additional lost events for the
664 	 * oldest storage units.
665 	 */
666 	if (wrapped) {
667 		kd_ctrl_page->kdc_live_flags &= ~KDBG_WRAPPED;
668 
669 		for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
670 			if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
671 				continue;
672 			}
673 			kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
674 			kdsp_actual->kds_lostevents = false;
675 		}
676 	}
677 
678 	if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
679 		/*
680 		 * In TRIAGE mode we want to extract all the current
681 		 * records regardless of where we stopped reading last
682 		 * time so that we have the best shot at getting older
683 		 * records for threads before the buffers are wrapped.
684 		 * So set:-
685 		 * a) kd_prev_timebase to 0 so we (re-)consider older records
686 		 * b) readlast to 0 to initiate the search from the
687 		 * 1st record.
688 		 */
689 		for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
690 			kdbp->kd_prev_timebase = 0;
691 			if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
692 				continue;
693 			}
694 			kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
695 			kdsp_actual->kds_readlast = 0;
696 		}
697 	}
698 
699 	/*
700 	 * Capture the earliest time where there are events for all CPUs and don't
701 	 * emit events with timestamps prior.
702 	 */
703 	barrier_min = kd_ctrl_page->kdc_oldest_time;
704 
705 	while (count) {
706 		tempbuf = kd_data_page->kdcopybuf;
707 		tempbuf_number = 0;
708 
709 		if (wrapped) {
710 			/*
711 			 * Emit a lost events tracepoint to indicate that previous events
712 			 * were lost -- the thread map cannot be trusted.  A new one must
713 			 * be taken so tools can analyze the trace in a backwards-facing
714 			 * fashion.
715 			 */
716 			kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
717 			*tempbuf = lostevent;
718 			wrapped = false;
719 			goto nextevent;
720 		}
721 
722 		/* While space left in merged events scratch buffer. */
723 		while (tempbuf_count) {
724 			bool lostevents = false;
725 			int lostcpu = 0;
726 			earliest_time = UINT64_MAX;
727 			min_kdbp = NULL;
728 			min_cpu = 0;
729 
730 			/* Check each CPU's buffers for the earliest event. */
731 			for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
732 				/* Skip CPUs without data in their oldest storage unit. */
733 				if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
734 next_cpu:
735 					continue;
736 				}
737 				/* From CPU data to buffer header to buffer. */
738 				kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
739 
740 next_event:
741 				/* The next event to be read from this buffer. */
742 				rcursor = kdsp_actual->kds_readlast;
743 
744 				/* Skip this buffer if there are no events left. */
745 				if (rcursor == kdsp_actual->kds_bufindx) {
746 					continue;
747 				}
748 
749 				if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
750 					/*
751 					 * TRIAGE mode record keeping doesn't (currently)
752 					 * use lostevent markers. It also doesn't want to
753 					 * call release_storage_unit() in this read call.
754 					 * It expects the buffers to wrap and records reclaimed
755 					 * in that way solely.
756 					 */
757 					t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
758 					goto skip_record_checks;
759 				}
760 
761 				/*
762 				 * Check that this storage unit wasn't stolen and events were
763 				 * lost.  This must have happened while wrapping was disabled
764 				 * in this function.
765 				 */
766 				if (kdsp_actual->kds_lostevents) {
767 					lostevents = true;
768 					kdsp_actual->kds_lostevents = false;
769 
770 					/*
771 					 * The earliest event we can trust is the first one in this
772 					 * stolen storage unit.
773 					 */
774 					uint64_t lost_time =
775 					    kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
776 					if (kd_ctrl_page->kdc_oldest_time < lost_time) {
777 						/*
778 						 * If this is the first time we've seen lost events for
779 						 * this gap, record its timestamp as the oldest
780 						 * timestamp we're willing to merge for the lost events
781 						 * tracepoint.
782 						 */
783 						kd_ctrl_page->kdc_oldest_time = barrier_min = lost_time;
784 						lostcpu = cpu;
785 					}
786 				}
787 
788 				t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
789 
790 				if (t > barrier_max) {
791 					goto next_cpu;
792 				}
793 				if (t < kdsp_actual->kds_timestamp) {
794 					/*
795 					 * This indicates the event emitter hasn't completed
796 					 * filling in the event (becuase we're looking at the
797 					 * buffer that the record head is using).  The max barrier
798 					 * timestamp should have saved us from seeing these kinds
799 					 * of things, but other CPUs might be slow on the up-take.
800 					 *
801 					 * Bail out so we don't get out-of-order events by
802 					 * continuing to read events from other CPUs' events.
803 					 */
804 					out_of_events = true;
805 					break;
806 				}
807 
808 				/*
809 				 * Ignore events that have aged out due to wrapping or storage
810 				 * unit exhaustion while merging events.
811 				 */
812 				if (t < barrier_min) {
813 					kdsp_actual->kds_readlast++;
814 					if (kdsp_actual->kds_readlast >= kd_ctrl_page->kdebug_events_per_storage_unit) {
815 						release_storage_unit(kd_ctrl_page, kd_data_page, cpu, kdsp.raw);
816 
817 						if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
818 							goto next_cpu;
819 						}
820 						kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
821 					}
822 					goto next_event;
823 				}
824 
825 				/*
826 				 * Don't worry about merging any events -- just walk through
827 				 * the CPUs and find the latest timestamp of lost events.
828 				 */
829 				if (lostevents) {
830 					continue;
831 				}
832 skip_record_checks:
833 				if (t < earliest_time) {
834 					earliest_time = t;
835 					min_kdbp = kdbp;
836 					min_cpu = cpu;
837 				}
838 			}
839 			if (lostevents) {
840 				/*
841 				 * If any lost events were hit in the buffers, emit an event
842 				 * with the latest timestamp.
843 				 */
844 				kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu);
845 				*tempbuf = lostevent;
846 				tempbuf->arg1 = 1;
847 				goto nextevent;
848 			}
849 			if (min_kdbp == NULL) {
850 				/* All buffers ran empty. */
851 				out_of_events = true;
852 			}
853 			if (out_of_events) {
854 				break;
855 			}
856 
857 			kdsp = min_kdbp->kd_list_head;
858 			kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
859 
860 			if (min_kdbp->latest_past_event_timestamp != 0) {
861 				if (kdbg_debug) {
862 					printf("kdebug: PAST EVENT: debugid %#8x: "
863 					    "time %lld from CPU %u "
864 					    "(barrier at time %lld)\n",
865 					    kdsp_actual->kds_records[rcursor].debugid,
866 					    t, cpu, barrier_min);
867 				}
868 
869 				kdbg_set_timestamp_and_cpu(tempbuf, earliest_time, min_cpu);
870 				tempbuf->arg1 = (kd_buf_argtype)min_kdbp->latest_past_event_timestamp;
871 				tempbuf->arg2 = 0;
872 				tempbuf->arg3 = 0;
873 				tempbuf->arg4 = 0;
874 				tempbuf->debugid = TRACE_PAST_EVENTS;
875 				min_kdbp->latest_past_event_timestamp = 0;
876 				goto nextevent;
877 			}
878 
879 			/* Copy earliest event into merged events scratch buffer. */
880 			*tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
881 			kd_buf *earliest_event = tempbuf;
882 			if (kd_control_trace.kdc_flags & KDBG_MATCH_DISABLE) {
883 				kd_event_matcher *match = &kd_control_trace.disable_event_match;
884 				kd_event_matcher *mask = &kd_control_trace.disable_event_mask;
885 				if ((earliest_event->debugid & mask->kem_debugid) == match->kem_debugid &&
886 				    (earliest_event->arg1 & mask->kem_args[0]) == match->kem_args[0] &&
887 				    (earliest_event->arg2 & mask->kem_args[1]) == match->kem_args[1] &&
888 				    (earliest_event->arg3 & mask->kem_args[2]) == match->kem_args[2] &&
889 				    (earliest_event->arg4 & mask->kem_args[3]) == match->kem_args[3]) {
890 					should_disable = true;
891 				}
892 			}
893 
894 			if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
895 				if (kdsp_actual->kds_readlast == kd_ctrl_page->kdebug_events_per_storage_unit) {
896 					release_storage_unit(kd_ctrl_page, kd_data_page, min_cpu, kdsp.raw);
897 				}
898 			}
899 
900 			/*
901 			 * Watch for out of order timestamps (from IOPs).
902 			 */
903 			if (earliest_time < min_kdbp->kd_prev_timebase) {
904 				/*
905 				 * If we haven't already, emit a retrograde events event.
906 				 * Otherwise, ignore this event.
907 				 */
908 				if (traced_retrograde) {
909 					continue;
910 				}
911 				if (kdbg_debug) {
912 					printf("kdebug: RETRO EVENT: debugid %#8x: "
913 					    "time %lld from CPU %u "
914 					    "(barrier at time %lld)\n",
915 					    kdsp_actual->kds_records[rcursor].debugid,
916 					    t, cpu, barrier_min);
917 				}
918 
919 				kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase,
920 				    kdbg_get_cpu(tempbuf));
921 				tempbuf->arg1 = tempbuf->debugid;
922 				tempbuf->arg2 = (kd_buf_argtype)earliest_time;
923 				tempbuf->arg3 = 0;
924 				tempbuf->arg4 = 0;
925 				tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
926 				traced_retrograde = true;
927 			} else {
928 				min_kdbp->kd_prev_timebase = earliest_time;
929 			}
930 nextevent:
931 			tempbuf_count--;
932 			tempbuf_number++;
933 			tempbuf++;
934 
935 			if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE &&
936 			    (RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) {
937 				break;
938 			}
939 		}
940 
941 		if (tempbuf_number) {
942 			/*
943 			 * Remember the latest timestamp of events that we've merged so we
944 			 * don't think we've lost events later.
945 			 */
946 			uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1);
947 			if (kd_ctrl_page->kdc_oldest_time < latest_time) {
948 				kd_ctrl_page->kdc_oldest_time = latest_time;
949 			}
950 
951 			if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
952 				extern int kernel_debug_trace_write_to_file(user_addr_t *buffer,
953 				    size_t *number, size_t *count, size_t tempbuf_number,
954 				    vnode_t vp, vfs_context_t ctx, uint32_t file_version);
955 				error = kernel_debug_trace_write_to_file(&buffer, number,
956 				    &count, tempbuf_number, vp, ctx, file_version);
957 			} else if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
958 				memcpy((void*)buffer, kd_data_page->kdcopybuf,
959 				    tempbuf_number * sizeof(kd_buf));
960 				buffer += tempbuf_number * sizeof(kd_buf);
961 			} else {
962 				panic("kdebug: invalid kdebug mode %d", kd_ctrl_page->mode);
963 			}
964 			if (error) {
965 				*number = 0;
966 				error = EINVAL;
967 				break;
968 			}
969 			count   -= tempbuf_number;
970 			*number += tempbuf_number;
971 		}
972 		if (out_of_events) {
973 			break;
974 		}
975 
976 		if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
977 			tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
978 		}
979 	}
980 	if ((old_live_flags & KDBG_NOWRAP) == 0) {
981 		_enable_wrap(kd_ctrl_page, old_emit);
982 	}
983 
984 	if (set_preempt) {
985 		thread_clear_eager_preempt(current_thread());
986 	}
987 
988 	if (should_disable) {
989 		kernel_debug_disable();
990 	}
991 
992 	return error;
993 }
994