xref: /xnu-8020.140.41/bsd/kern/kdebug_common.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @Apple_LICENSE_HEADER_START@
5  *
6  * The contents of this file constitute Original Code as defined in and
7  * are subject to the Apple Public Source License Version 1.1 (the
8  * "License").  You may not use this file except in compliance with the
9  * License.  Please obtain a copy of the License at
10  * http://www.apple.com/publicsource and read it before using this file.
11  *
12  * This Original Code and all software distributed under the License are
13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
17  * License for the specific language governing rights and limitations
18  * under the License.
19  *
20  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21  */
22 
23 #include <sys/kdebug_common.h>
24 
25 LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug");
26 int kdbg_debug = 0;
27 
28 extern struct kd_ctrl_page_t kd_ctrl_page_trace, kd_ctrl_page_triage;
29 
30 void
kdebug_lck_init(void)31 kdebug_lck_init(void)
32 {
33 	lck_spin_init(&kd_ctrl_page_trace.kds_spin_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
34 	lck_spin_init(&kd_ctrl_page_triage.kds_spin_lock, &kdebug_lck_grp, LCK_ATTR_NULL);
35 }
36 
37 int
kdebug_storage_lock(struct kd_ctrl_page_t * kd_ctrl_page)38 kdebug_storage_lock(struct kd_ctrl_page_t *kd_ctrl_page)
39 {
40 	int intrs_en = ml_set_interrupts_enabled(false);
41 	lck_spin_lock_grp(&kd_ctrl_page->kds_spin_lock, &kdebug_lck_grp);
42 	return intrs_en;
43 }
44 
45 void
kdebug_storage_unlock(struct kd_ctrl_page_t * kd_ctrl_page,int intrs_en)46 kdebug_storage_unlock(struct kd_ctrl_page_t *kd_ctrl_page, int intrs_en)
47 {
48 	lck_spin_unlock(&kd_ctrl_page->kds_spin_lock);
49 	ml_set_interrupts_enabled(intrs_en);
50 }
51 
52 extern int kernel_debug_trace_write_to_file(user_addr_t *buffer, size_t *number, size_t *count, size_t tempbuf_number, vnode_t vp, vfs_context_t ctx, uint32_t file_version);
53 
54 uint32_t
kdbg_cpu_count(bool early_trace)55 kdbg_cpu_count(bool early_trace)
56 {
57 	if (early_trace) {
58 #if defined(__x86_64__)
59 		return max_ncpus;
60 #else /* defined(__x86_64__) */
61 		return ml_get_cpu_count();
62 #endif /* !defined(__x86_64__) */
63 	}
64 
65 #if defined(__x86_64__)
66 	host_basic_info_data_t hinfo;
67 	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
68 	host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
69 	assert(hinfo.logical_cpu_max > 0);
70 	return hinfo.logical_cpu_max;
71 #else /* defined(__x86_64__) */
72 	return ml_get_topology_info()->max_cpu_id + 1;
73 #endif /* !defined(__x86_64__) */
74 }
75 
76 /*
77  * Both kdebug_timestamp and kdebug_using_continuous_time are known
78  * to kexts. And going forward we always want to use mach_continuous_time().
79  * So we keep these 2 routines as-is to keep the TRACE mode use outside
80  * the kernel intact. TRIAGE mode will explicitly only use mach_continuous_time()
81  * for its timestamp.
82  */
83 bool
kdebug_using_continuous_time(void)84 kdebug_using_continuous_time(void)
85 {
86 	return kd_ctrl_page_trace.kdebug_flags & KDBG_CONTINUOUS_TIME;
87 }
88 
89 uint64_t
kdebug_timestamp(void)90 kdebug_timestamp(void)
91 {
92 	if (kdebug_using_continuous_time()) {
93 		return mach_continuous_time();
94 	} else {
95 		return mach_absolute_time();
96 	}
97 }
98 
99 int
create_buffers(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,vm_tag_t tag)100 create_buffers(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, vm_tag_t tag)
101 {
102 	unsigned int i;
103 	unsigned int p_buffer_size;
104 	unsigned int f_buffer_size;
105 	unsigned int f_buffers;
106 	int error = 0;
107 	int ncpus, count_storage_units = 0;
108 
109 	struct kd_bufinfo *kdbip = NULL;
110 	struct kd_storage_buffers *kd_bufs = NULL;
111 	int n_storage_units = kd_data_page->n_storage_units;
112 
113 	ncpus = kd_ctrl_page->kdebug_cpus;
114 
115 	if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip,
116 	    sizeof(struct kd_bufinfo) * ncpus,
117 	    KMA_ZERO | KMA_DATA, tag) != KERN_SUCCESS) {
118 		error = ENOSPC;
119 		goto out;
120 	}
121 	kd_data_page->kdbip = kdbip;
122 
123 	f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
124 	kd_data_page->n_storage_buffer = f_buffers;
125 
126 	f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
127 	p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
128 
129 	if (p_buffer_size) {
130 		kd_data_page->n_storage_buffer++;
131 	}
132 
133 	if (kd_data_page->kdcopybuf == 0) {
134 		if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_data_page->kdcopybuf,
135 		    (vm_size_t) kd_ctrl_page->kdebug_kdcopybuf_size,
136 		    KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
137 			error = ENOSPC;
138 			goto out;
139 		}
140 	}
141 
142 	kd_bufs = kalloc_type_tag(struct kd_storage_buffers, kd_data_page->n_storage_buffer,
143 	    Z_WAITOK | Z_ZERO, tag);
144 	if (kd_bufs == NULL) {
145 		error = ENOSPC;
146 		goto out;
147 	}
148 	kd_data_page->kd_bufs = kd_bufs;
149 
150 	for (i = 0; i < f_buffers; i++) {
151 		if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr,
152 		    (vm_size_t)f_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
153 			error = ENOSPC;
154 			goto out;
155 		}
156 
157 		kd_bufs[i].kdsb_size = f_buffer_size;
158 	}
159 	if (p_buffer_size) {
160 		if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr,
161 		    (vm_size_t)p_buffer_size, KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
162 			error = ENOSPC;
163 			goto out;
164 		}
165 
166 		kd_bufs[i].kdsb_size = p_buffer_size;
167 	}
168 
169 	count_storage_units = 0;
170 	for (i = 0; i < kd_data_page->n_storage_buffer; i++) {
171 		struct kd_storage *kds;
172 		uint16_t n_elements;
173 		static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX);
174 		assert(kd_bufs[i].kdsb_size <= N_STORAGE_UNITS_PER_BUFFER *
175 		    sizeof(struct kd_storage));
176 
177 		n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
178 		kds = kd_bufs[i].kdsb_addr;
179 
180 		for (uint16_t n = 0; n < n_elements; n++) {
181 			kds[n].kds_next.buffer_index = kd_ctrl_page->kds_free_list.buffer_index;
182 			kds[n].kds_next.offset = kd_ctrl_page->kds_free_list.offset;
183 
184 			kd_ctrl_page->kds_free_list.buffer_index = i;
185 			kd_ctrl_page->kds_free_list.offset = n;
186 		}
187 		count_storage_units += n_elements;
188 	}
189 
190 	kd_data_page->n_storage_units = count_storage_units;
191 
192 	for (i = 0; i < ncpus; i++) {
193 		kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
194 		kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
195 		kdbip[i].kd_lostevents = false;
196 		kdbip[i].num_bufs = 0;
197 	}
198 
199 	kd_ctrl_page->kdebug_flags |= KDBG_BUFINIT;
200 
201 	kd_ctrl_page->kds_inuse_count = 0;
202 out:
203 	if (error) {
204 		delete_buffers(kd_ctrl_page, kd_data_page);
205 	}
206 
207 	return error;
208 }
209 
210 void
delete_buffers(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page)211 delete_buffers(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page)
212 {
213 	unsigned int i;
214 	int n_storage_buffer = kd_data_page->n_storage_buffer;
215 
216 	struct kd_bufinfo *kdbip = kd_data_page->kdbip;
217 	struct kd_storage_buffers *kd_bufs = kd_data_page->kd_bufs;
218 
219 	if (kd_bufs) {
220 		for (i = 0; i < n_storage_buffer; i++) {
221 			if (kd_bufs[i].kdsb_addr) {
222 				kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
223 			}
224 		}
225 		kfree_type(struct kd_storage_buffers, n_storage_buffer, kd_bufs);
226 
227 		kd_data_page->kd_bufs = NULL;
228 		kd_data_page->n_storage_buffer = 0;
229 	}
230 	if (kd_data_page->kdcopybuf) {
231 		kmem_free(kernel_map, (vm_offset_t)kd_data_page->kdcopybuf, kd_ctrl_page->kdebug_kdcopybuf_size);
232 
233 		kd_data_page->kdcopybuf = NULL;
234 	}
235 	kd_ctrl_page->kds_free_list.raw = KDS_PTR_NULL;
236 
237 	if (kdbip) {
238 		kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page->kdebug_cpus);
239 		kd_data_page->kdbip = NULL;
240 	}
241 	kd_ctrl_page->kdebug_iops = NULL;
242 	kd_ctrl_page->kdebug_cpus = 0;
243 	kd_ctrl_page->kdebug_flags &= ~KDBG_BUFINIT;
244 }
245 
246 bool
allocate_storage_unit(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,int cpu)247 allocate_storage_unit(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, int cpu)
248 {
249 	union kds_ptr kdsp;
250 	struct kd_storage *kdsp_actual, *kdsp_next_actual;
251 	struct kd_bufinfo *kdbip, *kdbp, *kdbp_vict, *kdbp_try;
252 	uint64_t oldest_ts, ts;
253 	bool retval = true;
254 	struct kd_storage_buffers *kd_bufs;
255 
256 	int intrs_en = kdebug_storage_lock(kd_ctrl_page);
257 
258 	kdbp = &kd_data_page->kdbip[cpu];
259 	kd_bufs = kd_data_page->kd_bufs;
260 	kdbip = kd_data_page->kdbip;
261 
262 	/* If someone beat us to the allocate, return success */
263 	if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
264 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail);
265 
266 		if (kdsp_actual->kds_bufindx < kd_ctrl_page->kdebug_events_per_storage_unit) {
267 			goto out;
268 		}
269 	}
270 
271 	if ((kdsp = kd_ctrl_page->kds_free_list).raw != KDS_PTR_NULL) {
272 		/*
273 		 * If there's a free page, grab it from the free list.
274 		 */
275 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
276 		kd_ctrl_page->kds_free_list = kdsp_actual->kds_next;
277 
278 		kd_ctrl_page->kds_inuse_count++;
279 	} else {
280 		/*
281 		 * Otherwise, we're going to lose events and repurpose the oldest
282 		 * storage unit we can find.
283 		 */
284 		if (kd_ctrl_page->kdebug_flags & KDBG_NOWRAP) {
285 			kd_ctrl_page->kdebug_slowcheck |= SLOW_NOLOG;
286 			kdbp->kd_lostevents = true;
287 			retval = false;
288 			goto out;
289 		}
290 		kdbp_vict = NULL;
291 		oldest_ts = UINT64_MAX;
292 
293 		for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page->kdebug_cpus]; kdbp_try++) {
294 			if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
295 				/*
296 				 * no storage unit to steal
297 				 */
298 				continue;
299 			}
300 
301 			kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_try->kd_list_head);
302 
303 			if (kdsp_actual->kds_bufcnt < kd_ctrl_page->kdebug_events_per_storage_unit) {
304 				/*
305 				 * make sure we don't steal the storage unit
306 				 * being actively recorded to...  need to
307 				 * move on because we don't want an out-of-order
308 				 * set of events showing up later
309 				 */
310 				continue;
311 			}
312 
313 			/*
314 			 * When wrapping, steal the storage unit with the
315 			 * earliest timestamp on its last event, instead of the
316 			 * earliest timestamp on the first event.  This allows a
317 			 * storage unit with more recent events to be preserved,
318 			 * even if the storage unit contains events that are
319 			 * older than those found in other CPUs.
320 			 */
321 			ts = kdbg_get_timestamp(&kdsp_actual->kds_records[kd_ctrl_page->kdebug_events_per_storage_unit - 1]);
322 			if (ts < oldest_ts) {
323 				oldest_ts = ts;
324 				kdbp_vict = kdbp_try;
325 			}
326 		}
327 		if (kdbp_vict == NULL && kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
328 			kdebug_enable = 0;
329 			kd_ctrl_page->enabled = 0;
330 			commpage_update_kdebug_state();
331 			retval = false;
332 			goto out;
333 		}
334 		kdsp = kdbp_vict->kd_list_head;
335 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
336 		kdbp_vict->kd_list_head = kdsp_actual->kds_next;
337 
338 		if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
339 			kdsp_next_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_vict->kd_list_head);
340 			kdsp_next_actual->kds_lostevents = true;
341 		} else {
342 			kdbp_vict->kd_lostevents = true;
343 		}
344 
345 		if (kd_ctrl_page->oldest_time < oldest_ts) {
346 			kd_ctrl_page->oldest_time = oldest_ts;
347 		}
348 		kd_ctrl_page->kdebug_flags |= KDBG_WRAPPED;
349 	}
350 
351 	if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
352 		kdsp_actual->kds_timestamp = kdebug_timestamp();
353 	} else {
354 		kdsp_actual->kds_timestamp = mach_continuous_time();
355 	}
356 
357 	kdsp_actual->kds_next.raw = KDS_PTR_NULL;
358 	kdsp_actual->kds_bufcnt   = 0;
359 	kdsp_actual->kds_readlast = 0;
360 
361 	kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
362 	kdbp->kd_lostevents = false;
363 	kdsp_actual->kds_bufindx = 0;
364 
365 	if (kdbp->kd_list_head.raw == KDS_PTR_NULL) {
366 		kdbp->kd_list_head = kdsp;
367 	} else {
368 		POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail)->kds_next = kdsp;
369 	}
370 	kdbp->kd_list_tail = kdsp;
371 out:
372 	kdebug_storage_unlock(kd_ctrl_page, intrs_en);
373 
374 	return retval;
375 }
376 
377 void
release_storage_unit(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,int cpu,uint32_t kdsp_raw)378 release_storage_unit(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, int cpu, uint32_t kdsp_raw)
379 {
380 	struct  kd_storage *kdsp_actual;
381 	struct kd_bufinfo *kdbp;
382 	union kds_ptr kdsp;
383 
384 	kdbp = &kd_data_page->kdbip[cpu];
385 
386 	kdsp.raw = kdsp_raw;
387 
388 	int intrs_en = kdebug_storage_lock(kd_ctrl_page);
389 
390 	if (kdsp.raw == kdbp->kd_list_head.raw) {
391 		/*
392 		 * it's possible for the storage unit pointed to
393 		 * by kdsp to have already been stolen... so
394 		 * check to see if it's still the head of the list
395 		 * now that we're behind the lock that protects
396 		 * adding and removing from the queue...
397 		 * since we only ever release and steal units from
398 		 * that position, if it's no longer the head
399 		 * we having nothing to do in this context
400 		 */
401 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kdsp);
402 		kdbp->kd_list_head = kdsp_actual->kds_next;
403 
404 		kdsp_actual->kds_next = kd_ctrl_page->kds_free_list;
405 		kd_ctrl_page->kds_free_list = kdsp;
406 
407 		kd_ctrl_page->kds_inuse_count--;
408 	}
409 
410 	kdebug_storage_unlock(kd_ctrl_page, intrs_en);
411 }
412 
413 
414 /*
415  * Disable wrapping and return true if trace wrapped, false otherwise.
416  */
417 bool
disable_wrap(struct kd_ctrl_page_t * kd_ctrl_page,uint32_t * old_slowcheck,uint32_t * old_flags)418 disable_wrap(struct kd_ctrl_page_t *kd_ctrl_page, uint32_t *old_slowcheck, uint32_t *old_flags)
419 {
420 	bool wrapped;
421 	int intrs_en = kdebug_storage_lock(kd_ctrl_page);
422 
423 	*old_slowcheck = kd_ctrl_page->kdebug_slowcheck;
424 	*old_flags = kd_ctrl_page->kdebug_flags;
425 
426 	wrapped = kd_ctrl_page->kdebug_flags & KDBG_WRAPPED;
427 	kd_ctrl_page->kdebug_flags &= ~KDBG_WRAPPED;
428 	kd_ctrl_page->kdebug_flags |= KDBG_NOWRAP;
429 
430 	kdebug_storage_unlock(kd_ctrl_page, intrs_en);
431 
432 	return wrapped;
433 }
434 
435 void
enable_wrap(struct kd_ctrl_page_t * kd_ctrl_page,uint32_t old_slowcheck)436 enable_wrap(struct kd_ctrl_page_t *kd_ctrl_page, uint32_t old_slowcheck)
437 {
438 	int intrs_en = kdebug_storage_lock(kd_ctrl_page);
439 
440 	kd_ctrl_page->kdebug_flags &= ~KDBG_NOWRAP;
441 
442 	if (!(old_slowcheck & SLOW_NOLOG)) {
443 		kd_ctrl_page->kdebug_slowcheck &= ~SLOW_NOLOG;
444 	}
445 	kdebug_storage_unlock(kd_ctrl_page, intrs_en);
446 }
447 
448 __attribute__((always_inline))
449 void
kernel_debug_write(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,struct kd_record kd_rec)450 kernel_debug_write(struct kd_ctrl_page_t *kd_ctrl_page,
451     struct kd_data_page_t *kd_data_page,
452     struct kd_record      kd_rec)
453 {
454 	uint64_t now = 0;
455 	uint32_t bindx;
456 	kd_buf *kd;
457 	int cpu;
458 	struct kd_bufinfo *kdbp;
459 	struct kd_storage *kdsp_actual;
460 	union kds_ptr kds_raw;
461 
462 	disable_preemption();
463 
464 	if (kd_ctrl_page->enabled == 0) {
465 		goto out;
466 	}
467 
468 	if (kd_rec.cpu == -1) {
469 		cpu = cpu_number();
470 	} else {
471 		cpu = kd_rec.cpu;
472 	}
473 
474 	kdbp = &kd_data_page->kdbip[cpu];
475 
476 	bool timestamp_is_continuous = kdbp->continuous_timestamps;
477 
478 	if (kd_rec.timestamp != -1) {
479 		if (kdebug_using_continuous_time()) {
480 			if (!timestamp_is_continuous) {
481 				kd_rec.timestamp = absolutetime_to_continuoustime(kd_rec.timestamp);
482 			}
483 		} else {
484 			if (timestamp_is_continuous) {
485 				kd_rec.timestamp = continuoustime_to_absolutetime(kd_rec.timestamp);
486 			}
487 		}
488 
489 		if (kd_rec.timestamp < kd_ctrl_page->oldest_time) {
490 			goto out;
491 		}
492 
493 		kd_rec.timestamp &= KDBG_TIMESTAMP_MASK;
494 	}
495 
496 retry_q:
497 	kds_raw = kdbp->kd_list_tail;
498 
499 	if (kds_raw.raw != KDS_PTR_NULL) {
500 		kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kds_raw);
501 		bindx = kdsp_actual->kds_bufindx;
502 	} else {
503 		kdsp_actual = NULL;
504 		bindx = kd_ctrl_page->kdebug_events_per_storage_unit;
505 	}
506 
507 	if (kdsp_actual == NULL || bindx >= kd_ctrl_page->kdebug_events_per_storage_unit) {
508 		if (allocate_storage_unit(kd_ctrl_page, kd_data_page, cpu) == false) {
509 			/*
510 			 * this can only happen if wrapping
511 			 * has been disabled
512 			 */
513 			goto out;
514 		}
515 		goto retry_q;
516 	}
517 
518 	if (kd_rec.timestamp != -1) {
519 		/*
520 		 * IOP entries can be allocated before xnu allocates and inits the buffer
521 		 * And, Intel uses a special 0 value as a early tracing timestamp sentinel
522 		 * to set the start of trace-time-start-of-interest.
523 		 */
524 		if (kd_rec.timestamp < kdsp_actual->kds_timestamp) {
525 			kdsp_actual->kds_timestamp = kd_rec.timestamp;
526 		}
527 		now = kd_rec.timestamp;
528 	} else {
529 		if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
530 			now = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
531 		} else {
532 			now = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
533 		}
534 	}
535 
536 	if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
537 		goto retry_q;
538 	}
539 
540 	kd = &kdsp_actual->kds_records[bindx];
541 
542 	if (kd_ctrl_page->kdebug_flags & KDBG_DEBUGID_64) {
543 		/*DebugID has been passed in arg 4*/
544 		kd->debugid = 0;
545 	} else {
546 		kd->debugid = kd_rec.debugid;
547 	}
548 
549 	kd->arg1 = kd_rec.arg1;
550 	kd->arg2 = kd_rec.arg2;
551 	kd->arg3 = kd_rec.arg3;
552 	kd->arg4 = kd_rec.arg4;
553 	kd->arg5 = kd_rec.arg5; /* thread ID */
554 
555 	kdbg_set_timestamp_and_cpu(kd, now, cpu);
556 
557 	OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
558 
559 out:
560 	enable_preemption();
561 }
562 
563 /*
564  * This is a general purpose routine to read the kd_bufs from the storage units.
565  * So it is the responsibility of the caller to make sure they have acquired
566  * the right synchronization primitive so they can retrieve the data in the correct
567  * way.
568  */
569 int
kernel_debug_read(struct kd_ctrl_page_t * kd_ctrl_page,struct kd_data_page_t * kd_data_page,user_addr_t buffer,size_t * number,vnode_t vp,vfs_context_t ctx,uint32_t file_version)570 kernel_debug_read(struct kd_ctrl_page_t *kd_ctrl_page, struct kd_data_page_t *kd_data_page, user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
571 {
572 	size_t count;
573 	unsigned int cpu, min_cpu;
574 	uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
575 	int error = 0;
576 	kd_buf *tempbuf;
577 	uint32_t rcursor;
578 	kd_buf lostevent;
579 	union kds_ptr kdsp;
580 	bool traced_retrograde = false;
581 	struct kd_storage *kdsp_actual;
582 	struct kd_bufinfo *kdbp;
583 	struct kd_bufinfo *min_kdbp;
584 	size_t tempbuf_count;
585 	uint32_t tempbuf_number;
586 	uint32_t old_kdebug_flags;
587 	uint32_t old_kdebug_slowcheck;
588 	bool out_of_events = false;
589 	bool wrapped = false;
590 	bool set_preempt = true;
591 	bool should_disable = false;
592 
593 	struct kd_bufinfo *kdbip = kd_data_page->kdbip;
594 	struct kd_storage_buffers *kd_bufs = kd_data_page->kd_bufs;
595 
596 	assert(number != NULL);
597 	count = *number / sizeof(kd_buf);
598 	*number = 0;
599 
600 	if (count == 0 || !(kd_ctrl_page->kdebug_flags & KDBG_BUFINIT) || kd_data_page->kdcopybuf == 0) {
601 		return EINVAL;
602 	}
603 
604 	if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
605 		/*
606 		 * A corpse can be created due to 'TASK_HAS_TOO_MANY_THREADS'
607 		 * and that can be handled by a callout thread that already
608 		 * has the eager-preemption set.
609 		 * So check to see if we are dealing with one such thread.
610 		 */
611 		set_preempt = !(thread_is_eager_preempt(current_thread()));
612 	}
613 
614 	if (set_preempt) {
615 		thread_set_eager_preempt(current_thread());
616 	}
617 
618 	memset(&lostevent, 0, sizeof(lostevent));
619 	lostevent.debugid = TRACE_LOST_EVENTS;
620 
621 	/*
622 	 * Capture the current time.  Only sort events that have occured
623 	 * before now.  Since the IOPs are being flushed here, it is possible
624 	 * that events occur on the AP while running live tracing.
625 	 */
626 	if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
627 		barrier_max = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
628 	} else {
629 		barrier_max = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
630 	}
631 
632 	/*
633 	 * Disable wrap so storage units cannot be stolen out from underneath us
634 	 * while merging events.
635 	 *
636 	 * Because we hold ktrace_lock, no other control threads can be playing
637 	 * with kdebug_flags.  The code that emits new events could be running,
638 	 * but it grabs kds_spin_lock if it needs to acquire a new storage
639 	 * chunk, which is where it examines kdebug_flags.  If it is adding to
640 	 * the same chunk we're reading from, check for that below.
641 	 */
642 	wrapped = disable_wrap(kd_ctrl_page, &old_kdebug_slowcheck, &old_kdebug_flags);
643 
644 	if (count > kd_data_page->nkdbufs) {
645 		count = kd_data_page->nkdbufs;
646 	}
647 
648 	if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
649 		tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
650 	}
651 
652 	/*
653 	 * If the buffers have wrapped, do not emit additional lost events for the
654 	 * oldest storage units.
655 	 */
656 	if (wrapped) {
657 		kd_ctrl_page->kdebug_flags &= ~KDBG_WRAPPED;
658 
659 		for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
660 			if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
661 				continue;
662 			}
663 			kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
664 			kdsp_actual->kds_lostevents = false;
665 		}
666 	}
667 
668 	if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
669 		/*
670 		 * In TRIAGE mode we want to extract all the current
671 		 * records regardless of where we stopped reading last
672 		 * time so that we have the best shot at getting older
673 		 * records for threads before the buffers are wrapped.
674 		 * So set readlast to 0 to initiate the search from the
675 		 * 1st record.
676 		 */
677 		for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
678 			if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
679 				continue;
680 			}
681 			kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
682 			kdsp_actual->kds_readlast = 0;
683 		}
684 	}
685 
686 	/*
687 	 * Capture the earliest time where there are events for all CPUs and don't
688 	 * emit events with timestamps prior.
689 	 */
690 	barrier_min = kd_ctrl_page->oldest_time;
691 
692 	while (count) {
693 		tempbuf = kd_data_page->kdcopybuf;
694 		tempbuf_number = 0;
695 
696 		if (wrapped) {
697 			/*
698 			 * Emit a lost events tracepoint to indicate that previous events
699 			 * were lost -- the thread map cannot be trusted.  A new one must
700 			 * be taken so tools can analyze the trace in a backwards-facing
701 			 * fashion.
702 			 */
703 			kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
704 			*tempbuf = lostevent;
705 			wrapped = false;
706 			goto nextevent;
707 		}
708 
709 		/* While space left in merged events scratch buffer. */
710 		while (tempbuf_count) {
711 			bool lostevents = false;
712 			int lostcpu = 0;
713 			earliest_time = UINT64_MAX;
714 			min_kdbp = NULL;
715 			min_cpu = 0;
716 
717 			/* Check each CPU's buffers for the earliest event. */
718 			for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
719 				/* Skip CPUs without data in their oldest storage unit. */
720 				if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
721 next_cpu:
722 					continue;
723 				}
724 				/* From CPU data to buffer header to buffer. */
725 				kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
726 
727 next_event:
728 				/* The next event to be read from this buffer. */
729 				rcursor = kdsp_actual->kds_readlast;
730 
731 				/* Skip this buffer if there are no events left. */
732 				if (rcursor == kdsp_actual->kds_bufindx) {
733 					continue;
734 				}
735 
736 				if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
737 					/*
738 					 * TRIAGE mode record keeping doesn't (currently)
739 					 * use lostevent markers. It also doesn't want to
740 					 * call release_storage_unit() in this read call.
741 					 * It expects the buffers to wrap and records reclaimed
742 					 * in that way solely.
743 					 */
744 					t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
745 					goto skip_record_checks;
746 				}
747 
748 				/*
749 				 * Check that this storage unit wasn't stolen and events were
750 				 * lost.  This must have happened while wrapping was disabled
751 				 * in this function.
752 				 */
753 				if (kdsp_actual->kds_lostevents) {
754 					lostevents = true;
755 					kdsp_actual->kds_lostevents = false;
756 
757 					/*
758 					 * The earliest event we can trust is the first one in this
759 					 * stolen storage unit.
760 					 */
761 					uint64_t lost_time =
762 					    kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
763 					if (kd_ctrl_page->oldest_time < lost_time) {
764 						/*
765 						 * If this is the first time we've seen lost events for
766 						 * this gap, record its timestamp as the oldest
767 						 * timestamp we're willing to merge for the lost events
768 						 * tracepoint.
769 						 */
770 						kd_ctrl_page->oldest_time = barrier_min = lost_time;
771 						lostcpu = cpu;
772 					}
773 				}
774 
775 				t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
776 
777 				if (t > barrier_max) {
778 					if (kdbg_debug) {
779 						printf("kdebug: FUTURE EVENT: debugid %#8x: "
780 						    "time %lld from CPU %u "
781 						    "(barrier at time %lld, read %lu events)\n",
782 						    kdsp_actual->kds_records[rcursor].debugid,
783 						    t, cpu, barrier_max, *number + tempbuf_number);
784 					}
785 					goto next_cpu;
786 				}
787 				if (t < kdsp_actual->kds_timestamp) {
788 					/*
789 					 * This indicates the event emitter hasn't completed
790 					 * filling in the event (becuase we're looking at the
791 					 * buffer that the record head is using).  The max barrier
792 					 * timestamp should have saved us from seeing these kinds
793 					 * of things, but other CPUs might be slow on the up-take.
794 					 *
795 					 * Bail out so we don't get out-of-order events by
796 					 * continuing to read events from other CPUs' events.
797 					 */
798 					out_of_events = true;
799 					break;
800 				}
801 
802 				/*
803 				 * Ignore events that have aged out due to wrapping or storage
804 				 * unit exhaustion while merging events.
805 				 */
806 				if (t < barrier_min) {
807 					kdsp_actual->kds_readlast++;
808 					if (kdbg_debug) {
809 						printf("kdebug: PAST EVENT: debugid %#8x: "
810 						    "time %lld from CPU %u "
811 						    "(barrier at time %lld)\n",
812 						    kdsp_actual->kds_records[rcursor].debugid,
813 						    t, cpu, barrier_min);
814 					}
815 
816 					if (kdsp_actual->kds_readlast >= kd_ctrl_page->kdebug_events_per_storage_unit) {
817 						release_storage_unit(kd_ctrl_page, kd_data_page, cpu, kdsp.raw);
818 
819 						if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
820 							goto next_cpu;
821 						}
822 						kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
823 					}
824 					goto next_event;
825 				}
826 
827 				/*
828 				 * Don't worry about merging any events -- just walk through
829 				 * the CPUs and find the latest timestamp of lost events.
830 				 */
831 				if (lostevents) {
832 					continue;
833 				}
834 skip_record_checks:
835 				if (t < earliest_time) {
836 					earliest_time = t;
837 					min_kdbp = kdbp;
838 					min_cpu = cpu;
839 				}
840 			}
841 			if (lostevents) {
842 				/*
843 				 * If any lost events were hit in the buffers, emit an event
844 				 * with the latest timestamp.
845 				 */
846 				kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu);
847 				*tempbuf = lostevent;
848 				tempbuf->arg1 = 1;
849 				goto nextevent;
850 			}
851 			if (min_kdbp == NULL) {
852 				/* All buffers ran empty. */
853 				out_of_events = true;
854 			}
855 			if (out_of_events) {
856 				break;
857 			}
858 
859 			kdsp = min_kdbp->kd_list_head;
860 			kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
861 
862 			/* Copy earliest event into merged events scratch buffer. */
863 			*tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
864 			kd_buf *earliest_event = tempbuf;
865 			if (kd_ctrl_page_trace.kdebug_flags & KDBG_MATCH_DISABLE) {
866 				kd_event_matcher *match = &kd_ctrl_page_trace.disable_event_match;
867 				kd_event_matcher *mask = &kd_ctrl_page_trace.disable_event_mask;
868 				if ((earliest_event->debugid & mask->kem_debugid) == match->kem_debugid &&
869 				    (earliest_event->arg1 & mask->kem_args[0]) == match->kem_args[0] &&
870 				    (earliest_event->arg2 & mask->kem_args[1]) == match->kem_args[1] &&
871 				    (earliest_event->arg3 & mask->kem_args[2]) == match->kem_args[2] &&
872 				    (earliest_event->arg4 & mask->kem_args[3]) == match->kem_args[3]) {
873 					should_disable = true;
874 				}
875 			}
876 
877 			if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
878 				if (kdsp_actual->kds_readlast == kd_ctrl_page->kdebug_events_per_storage_unit) {
879 					release_storage_unit(kd_ctrl_page, kd_data_page, min_cpu, kdsp.raw);
880 				}
881 			}
882 
883 			/*
884 			 * Watch for out of order timestamps (from IOPs).
885 			 */
886 			if (earliest_time < min_kdbp->kd_prev_timebase) {
887 				/*
888 				 * If we haven't already, emit a retrograde events event.
889 				 * Otherwise, ignore this event.
890 				 */
891 				if (traced_retrograde) {
892 					continue;
893 				}
894 				if (kdbg_debug) {
895 					printf("kdebug: RETRO EVENT: debugid %#8x: "
896 					    "time %lld from CPU %u "
897 					    "(barrier at time %lld)\n",
898 					    kdsp_actual->kds_records[rcursor].debugid,
899 					    t, cpu, barrier_min);
900 				}
901 
902 				kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
903 				tempbuf->arg1 = tempbuf->debugid;
904 				tempbuf->arg2 = (kd_buf_argtype)earliest_time;
905 				tempbuf->arg3 = 0;
906 				tempbuf->arg4 = 0;
907 				tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
908 				traced_retrograde = true;
909 			} else {
910 				min_kdbp->kd_prev_timebase = earliest_time;
911 			}
912 nextevent:
913 			tempbuf_count--;
914 			tempbuf_number++;
915 			tempbuf++;
916 
917 			if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE && (RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) {
918 				break;
919 			}
920 		}
921 
922 		if (tempbuf_number) {
923 			/*
924 			 * Remember the latest timestamp of events that we've merged so we
925 			 * don't think we've lost events later.
926 			 */
927 			uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1);
928 			if (kd_ctrl_page->oldest_time < latest_time) {
929 				kd_ctrl_page->oldest_time = latest_time;
930 			}
931 
932 			if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
933 				error = kernel_debug_trace_write_to_file(&buffer, number, &count, tempbuf_number, vp, ctx, file_version);
934 				if (error) {
935 					goto check_error;
936 				}
937 			} else if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
938 				memcpy((void*)buffer, kd_data_page->kdcopybuf, tempbuf_number * sizeof(kd_buf));
939 
940 				buffer += (tempbuf_number * sizeof(kd_buf));
941 			} else {
942 				panic("kernel_debug_read: Invalid Mode");
943 			}
944 check_error:
945 			if (error) {
946 				*number = 0;
947 				error = EINVAL;
948 				break;
949 			}
950 			count   -= tempbuf_number;
951 			*number += tempbuf_number;
952 		}
953 		if (out_of_events) {
954 			/*
955 			 * all trace buffers are empty
956 			 */
957 			break;
958 		}
959 
960 		if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
961 			tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
962 		}
963 	}
964 	if (!(old_kdebug_flags & KDBG_NOWRAP)) {
965 		enable_wrap(kd_ctrl_page, old_kdebug_slowcheck);
966 	}
967 
968 	if (set_preempt) {
969 		thread_clear_eager_preempt(current_thread());
970 	}
971 
972 	if (should_disable) {
973 		kernel_debug_disable();
974 	}
975 
976 	return error;
977 }
978