xref: /xnu-8019.80.24/osfmk/kern/btlog.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2012 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <stddef.h>
30 #include <kern/btlog.h>
31 #include <kern/assert.h>
32 #include <kern/startup.h>
33 #include <vm/vm_kern.h>
34 #include <vm/vm_map.h>
35 #include <vm/pmap.h>
36 #include <mach/vm_param.h>
37 #define _SYS_TYPES_H_
38 #include <libkern/crypto/md5.h>
39 #include <libkern/crypto/crypto_internal.h>
40 
41 /*
42  * Since all records are located contiguously in memory,
43  * we use indices to them as the primary lookup mechanism,
44  * and to maintain the linked list of active records
45  * in chronological order.
46  */
47 #define BTLOG_MAX_RECORDS (0xFFFFFF /* 16777215 */ )
48 #define BTLOG_RECORDINDEX_NONE (0xFFFFFF)
49 
50 /*
51  * Each record is a stack with a reference count and a list of
52  * log elements that refer to it.
53  *
54  * Each log element is placed in a hash bucket that is contained
55  * within the btlog structure. It contains the index to the record
56  * that it references.
57  *
58  * So you can go from an address to the corresp. stack by hashing the address,
59  * finding the hash head and traversing the chain of log elements
60  * till you find the hash bucket with an address that matches your
61  * address (if it exists) or creating a new bucket to hold this new address.
62  */
63 
64 #define ELEMENT_HASH_BUCKET_COUNT (256)
65 #define BTLOG_HASHELEMINDEX_NONE BTLOG_RECORDINDEX_NONE
66 
67 #define ZELEMS_DEFAULT  (8000)
68 size_t  zelems_count = 0;
69 
70 typedef uint32_t btlog_recordindex_t; /* only 24 bits used */
71 
72 /*
73  * Queue head for the queue of elements connected to a particular record (stack).
74  * For quick removal of the oldest element referencing the least popular stack. Useful for LEAKS mode.
75  */
76 TAILQ_HEAD(_element_record_queue, btlog_element);
77 
78 /*
79  * Queue head for the queue of elements that hash to the same bucket.
80  * For quick removal of the oldest element ever logged.  Useful for CORRUPTION mode where we use only bucket i.e. FIFO.
81  */
82 TAILQ_HEAD(_element_hash_queue, btlog_element);
83 
84 typedef struct btlog_record {
85 	btlog_recordindex_t next:24,
86 	    operation:8;
87 	uint32_t            ref_count;
88 	uint32_t            bthash;
89 	struct _element_record_queue        element_record_queue;
90 	void                *bt[];/* variable sized, based on btlog_t params */
91 } btlog_record_t;
92 
93 typedef struct btlog_element {
94 	btlog_recordindex_t     recindex:24,
95 	    operation:8;
96 	uintptr_t elem;
97 	TAILQ_ENTRY(btlog_element) element_record_link; /* Links to other elements pointing to the same stack. */
98 
99 	TAILQ_ENTRY(btlog_element) element_hash_link; /* Links to other elements in the same hash chain.
100 	                                               * During LEAKS mode, this is used as a singly-linked list because
101 	                                               * we don't want to initialize ELEMENT_HASH_BUCKET_COUNT heads.
102 	                                               *
103 	                                               * During CORRUPTION mode with a single hash chain, this is used as a doubly-linked list.
104 	                                               */
105 } btlog_element_t;
106 
107 struct btlog {
108 	vm_address_t    btlog_buffer;   /* all memory for this btlog_t */
109 	vm_size_t       btlog_buffersize;
110 
111 	uintptr_t       btrecords;  /* use btlog_recordindex_t to lookup */
112 	size_t          btrecord_btdepth;/* BT entries per record */
113 	size_t          btrecord_size;
114 
115 	btlog_recordindex_t head; /* active record list */
116 	btlog_recordindex_t tail;
117 	btlog_recordindex_t activerecord;
118 	btlog_recordindex_t freelist_records;
119 
120 	size_t              active_record_count;
121 	size_t              active_element_count;
122 	btlog_element_t     *freelist_elements;
123 	union {
124 		btlog_element_t                 **elem_recindex_hashtbl; /* LEAKS mode: We use an array of ELEMENT_HASH_BUCKET_COUNT buckets. */
125 		struct _element_hash_queue      *element_hash_queue; /* CORRUPTION mode: We use a single hash bucket i.e. queue */
126 	} elem_linkage_un;
127 
128 	decl_simple_lock_data(, btlog_lock);
129 	boolean_t   caller_will_remove_entries_for_element;/* If TRUE, this means that the caller is interested in keeping track of abandoned / leaked elements.
130 	                                                    * And so they want to be in charge of explicitly removing elements. Depending on this variable we
131 	                                                    * will choose what kind of data structure to use for the elem_linkage_un union above.
132 	                                                    */
133 	boolean_t   kmem;            /* If TRUE, btlog_create allocated memory from the kernel_map */
134 	vm_offset_t freelist_buffer; /* Freelist memory. Static for btlog's lifetime. */
135 };
136 
137 #define lookup_btrecord(btlog, index) \
138 	((btlog_record_t *)(btlog->btrecords + index * btlog->btrecord_size))
139 
140 uint32_t calculate_hashidx_for_element(uintptr_t elem, btlog_t *btlog);
141 uint32_t lookup_btrecord_byhash(btlog_t *btlog, uint32_t md5_hash, void *bt[], size_t btcount);
142 
143 void btlog_add_elem_to_freelist(btlog_t *btlog, btlog_element_t *hash_elem);
144 btlog_element_t* btlog_get_elem_from_freelist(btlog_t *btlog);
145 
146 uint32_t
lookup_btrecord_byhash(btlog_t * btlog,uint32_t md5_hash,void * bt[],size_t btcount)147 lookup_btrecord_byhash(btlog_t *btlog, uint32_t md5_hash, void *bt[], size_t btcount)
148 {
149 	btlog_recordindex_t     recindex = BTLOG_RECORDINDEX_NONE;
150 	btlog_record_t          *record = NULL;
151 	size_t                  i = 0;
152 	boolean_t               stack_matched = TRUE;
153 
154 	assert(btcount);
155 	assert(bt);
156 
157 	recindex = btlog->head;
158 	record = lookup_btrecord(btlog, recindex);
159 	while (recindex != BTLOG_RECORDINDEX_NONE) {
160 		assert(!TAILQ_EMPTY(&record->element_record_queue));
161 		if (record->bthash == md5_hash) {
162 			/*
163 			 * Make sure that the incoming stack actually matches the
164 			 * stack in this record. Since we only save off a
165 			 * part of the md5 hash there can be collisions sometimes.
166 			 * This comparison isn't costly because, in case of collisions,
167 			 * usually the first few frames are different.
168 			 */
169 
170 			stack_matched = TRUE;
171 
172 			if (btcount < btlog->btrecord_btdepth) {
173 				if (record->bt[btcount] != NULL) {
174 					/*
175 					 * If the stack depth passed in is smaller than
176 					 * the recorded stack and we have a valid pointer
177 					 * in the recorded stack at that depth, then we
178 					 * don't need to do any further checks.
179 					 */
180 					stack_matched = FALSE;
181 					goto next;
182 				}
183 			}
184 
185 			for (i = 0; i < MIN(btcount, btlog->btrecord_btdepth); i++) {
186 				if (record->bt[i] != bt[i]) {
187 					stack_matched = FALSE;
188 					goto next;
189 				}
190 			}
191 
192 			if (stack_matched == TRUE) {
193 				break;
194 			}
195 		}
196 next:
197 		recindex = record->next;
198 		record = lookup_btrecord(btlog, recindex);
199 	}
200 
201 	return recindex;
202 }
203 
204 uint32_t
calculate_hashidx_for_element(uintptr_t elem,btlog_t * btlog)205 calculate_hashidx_for_element(uintptr_t elem, btlog_t *btlog)
206 {
207 	if (btlog->caller_will_remove_entries_for_element) {
208 		uint32_t addr = 0;
209 
210 		addr = (uint32_t) ((elem & 0xFF00) >> 0x8);
211 
212 		return addr;
213 	} else {
214 		return 0;
215 	}
216 }
217 
218 static void
btlog_lock(btlog_t * btlog)219 btlog_lock(btlog_t *btlog)
220 {
221 	simple_lock(&btlog->btlog_lock, LCK_GRP_NULL);
222 }
223 static void
btlog_unlock(btlog_t * btlog)224 btlog_unlock(btlog_t *btlog)
225 {
226 	simple_unlock(&btlog->btlog_lock);
227 }
228 
229 btlog_t *
btlog_create(size_t numrecords,size_t record_btdepth,boolean_t caller_will_remove_entries_for_element)230 btlog_create(size_t numrecords,
231     size_t record_btdepth,
232     boolean_t caller_will_remove_entries_for_element)
233 {
234 	btlog_t *btlog;
235 	vm_size_t buffersize_needed = 0, elemsize_needed = 0;
236 	vm_address_t buffer = 0, elem_buffer = 0, elem_hash_buffer = 0;
237 	size_t i = 0;
238 	kern_return_t ret;
239 	size_t btrecord_size = 0;
240 	uintptr_t free_elem = 0, next_free_elem = 0;
241 	boolean_t kmem = FALSE;
242 
243 	if (startup_phase >= STARTUP_SUB_VM_KERNEL &&
244 	    startup_phase < STARTUP_SUB_KMEM_ALLOC) {
245 		return NULL;
246 	}
247 
248 	if (numrecords > BTLOG_MAX_RECORDS) {
249 		return NULL;
250 	}
251 
252 	if (numrecords == 0) {
253 		return NULL;
254 	}
255 
256 	if (record_btdepth > BTLOG_MAX_DEPTH) {
257 		return NULL;
258 	}
259 
260 	/* btlog_record_t is variable-sized, calculate needs now */
261 	btrecord_size = sizeof(btlog_record_t)
262 	    + sizeof(void *) * record_btdepth;
263 
264 	buffersize_needed = sizeof(btlog_t) + numrecords * btrecord_size;
265 	buffersize_needed = round_page(buffersize_needed);
266 
267 	if (zelems_count == 0) {
268 		zelems_count = ((max_mem + (1024 * 1024 * 1024) /*GB*/) >> 30) * ZELEMS_DEFAULT;
269 
270 		if (PE_parse_boot_argn("zelems", &zelems_count, sizeof(zelems_count)) == TRUE) {
271 			/*
272 			 * Need a max? With this scheme, it should be possible to tune the default
273 			 * so that we don't need a boot-arg to request more elements.
274 			 */
275 			printf("Set number of log elements per btlog to: %ld\n", zelems_count);
276 		}
277 	}
278 	elemsize_needed = sizeof(btlog_element_t) * zelems_count;
279 	elemsize_needed = round_page(elemsize_needed);
280 
281 	/* since rounding to a page size might hold more, recalculate */
282 	numrecords = MIN(BTLOG_MAX_RECORDS,
283 	    (buffersize_needed - sizeof(btlog_t)) / btrecord_size);
284 
285 	if (__probable(startup_phase >= STARTUP_SUB_KMEM_ALLOC)) {
286 		kmem = TRUE;
287 
288 		ret = kmem_alloc(kernel_map, &buffer, buffersize_needed, VM_KERN_MEMORY_DIAG);
289 		if (ret != KERN_SUCCESS) {
290 			return NULL;
291 		}
292 
293 		ret = kmem_alloc(kernel_map, &elem_buffer, elemsize_needed, VM_KERN_MEMORY_DIAG);
294 		if (ret != KERN_SUCCESS) {
295 			kmem_free(kernel_map, buffer, buffersize_needed);
296 			buffer = 0;
297 			return NULL;
298 		}
299 
300 		if (caller_will_remove_entries_for_element == TRUE) {
301 			ret = kmem_alloc(kernel_map, &elem_hash_buffer, ELEMENT_HASH_BUCKET_COUNT * sizeof(btlog_element_t*), VM_KERN_MEMORY_DIAG);
302 		} else {
303 			ret = kmem_alloc(kernel_map, &elem_hash_buffer, 2 * sizeof(btlog_element_t*), VM_KERN_MEMORY_DIAG);
304 		}
305 
306 		if (ret != KERN_SUCCESS) {
307 			kmem_free(kernel_map, buffer, buffersize_needed);
308 			buffer = 0;
309 
310 			kmem_free(kernel_map, elem_buffer, elemsize_needed);
311 			elem_buffer = 0;
312 			return NULL;
313 		}
314 	} else {
315 		buffer = (vm_address_t)pmap_steal_memory(buffersize_needed);
316 		elem_buffer = (vm_address_t)pmap_steal_memory(elemsize_needed);
317 		if (caller_will_remove_entries_for_element == TRUE) {
318 			elem_hash_buffer = (vm_address_t)pmap_steal_memory(ELEMENT_HASH_BUCKET_COUNT * sizeof(btlog_element_t*));
319 		} else {
320 			elem_hash_buffer = (vm_address_t)pmap_steal_memory(2 * sizeof(btlog_element_t*));
321 		}
322 		ret = KERN_SUCCESS;
323 	}
324 
325 	btlog = (btlog_t *)buffer;
326 	btlog->btlog_buffer = buffer;
327 	btlog->btlog_buffersize = buffersize_needed;
328 	btlog->freelist_elements = (btlog_element_t *)elem_buffer;
329 	btlog->freelist_buffer = (vm_offset_t)elem_buffer;
330 
331 	simple_lock_init(&btlog->btlog_lock, 0);
332 
333 	btlog->caller_will_remove_entries_for_element = caller_will_remove_entries_for_element;
334 
335 	if (caller_will_remove_entries_for_element == TRUE) {
336 		btlog->elem_linkage_un.elem_recindex_hashtbl = (btlog_element_t **)elem_hash_buffer;
337 	} else {
338 		btlog->elem_linkage_un.element_hash_queue = (struct _element_hash_queue*) elem_hash_buffer;
339 		TAILQ_INIT(btlog->elem_linkage_un.element_hash_queue);
340 	}
341 
342 	btlog->btrecords = (uintptr_t)(buffer + sizeof(btlog_t));
343 	btlog->btrecord_btdepth = record_btdepth;
344 	btlog->btrecord_size = btrecord_size;
345 
346 	btlog->head = BTLOG_RECORDINDEX_NONE;
347 	btlog->tail = BTLOG_RECORDINDEX_NONE;
348 	btlog->active_record_count = 0;
349 	btlog->activerecord = BTLOG_RECORDINDEX_NONE;
350 
351 	for (i = 0; i < ELEMENT_HASH_BUCKET_COUNT; i++) {
352 		btlog->elem_linkage_un.elem_recindex_hashtbl[i] = 0;
353 	}
354 
355 	/* populate freelist_records with all records in order */
356 	btlog->freelist_records = 0;
357 	for (i = 0; i < (numrecords - 1); i++) {
358 		btlog_record_t *rec = lookup_btrecord(btlog, i);
359 		rec->next = (btlog_recordindex_t)(i + 1);
360 	}
361 	lookup_btrecord(btlog, i)->next = BTLOG_RECORDINDEX_NONE; /* terminate */
362 
363 	/* populate freelist_elements with all elements in order */
364 	free_elem = (uintptr_t)btlog->freelist_elements;
365 
366 	for (i = 0; i < (zelems_count - 1); i++) {
367 		next_free_elem = free_elem + sizeof(btlog_element_t);
368 		*(uintptr_t*)free_elem = next_free_elem;
369 		free_elem = next_free_elem;
370 	}
371 	*(uintptr_t*)next_free_elem = BTLOG_HASHELEMINDEX_NONE;
372 
373 	btlog->kmem = kmem;
374 
375 	return btlog;
376 }
377 
378 void
btlog_destroy(btlog_t * btlog)379 btlog_destroy(btlog_t *btlog)
380 {
381 	vm_size_t elemsize_needed = 0;
382 
383 	if (btlog->kmem == FALSE) {
384 		return;
385 	}
386 
387 	if (btlog->caller_will_remove_entries_for_element) {
388 		kmem_free(kernel_map, (vm_offset_t)btlog->elem_linkage_un.elem_recindex_hashtbl,
389 		    ELEMENT_HASH_BUCKET_COUNT * sizeof(btlog_element_t *));
390 	} else {
391 		kmem_free(kernel_map, (vm_offset_t)btlog->elem_linkage_un.element_hash_queue,
392 		    2 * sizeof(btlog_element_t*));
393 	}
394 
395 	elemsize_needed = sizeof(btlog_element_t) * zelems_count;
396 	elemsize_needed = round_page(elemsize_needed);
397 	kmem_free(kernel_map, btlog->freelist_buffer, elemsize_needed);
398 
399 	kmem_free(kernel_map, (vm_offset_t)btlog, btlog->btlog_buffersize);
400 }
401 
402 /* Assumes btlog is already locked */
403 static btlog_recordindex_t
btlog_get_record_from_freelist(btlog_t * btlog)404 btlog_get_record_from_freelist(btlog_t *btlog)
405 {
406 	btlog_recordindex_t     recindex = btlog->freelist_records;
407 
408 	if (recindex == BTLOG_RECORDINDEX_NONE) {
409 		/* nothing on freelist */
410 		return BTLOG_RECORDINDEX_NONE;
411 	} else {
412 		/* remove the head of the freelist_records */
413 		btlog_record_t *record = lookup_btrecord(btlog, recindex);
414 		btlog->freelist_records = record->next;
415 		return recindex;
416 	}
417 }
418 
419 static void
btlog_add_record_to_freelist(btlog_t * btlog,btlog_recordindex_t recindex)420 btlog_add_record_to_freelist(btlog_t *btlog, btlog_recordindex_t recindex)
421 {
422 	btlog_recordindex_t precindex = BTLOG_RECORDINDEX_NONE;
423 	btlog_record_t *precord = NULL, *record = NULL;
424 
425 	record = lookup_btrecord(btlog, recindex);
426 
427 	assert(TAILQ_EMPTY(&record->element_record_queue));
428 
429 	record->bthash = 0;
430 
431 	precindex = btlog->head;
432 	precord = lookup_btrecord(btlog, precindex);
433 
434 	if (precindex == recindex) {
435 		btlog->head = precord->next;
436 		btlog->active_record_count--;
437 
438 		record->next = btlog->freelist_records;
439 		btlog->freelist_records = recindex;
440 
441 		if (btlog->head == BTLOG_RECORDINDEX_NONE) {
442 			/* active list is now empty, update tail */
443 			btlog->tail = BTLOG_RECORDINDEX_NONE;
444 			assert(btlog->active_record_count == 0);
445 		}
446 	} else {
447 		while (precindex != BTLOG_RECORDINDEX_NONE) {
448 			if (precord->next == recindex) {
449 				precord->next = record->next;
450 				btlog->active_record_count--;
451 
452 				record->next = btlog->freelist_records;
453 				btlog->freelist_records = recindex;
454 
455 				if (btlog->tail == recindex) {
456 					btlog->tail = precindex;
457 				}
458 				break;
459 			} else {
460 				precindex = precord->next;
461 				precord = lookup_btrecord(btlog, precindex);
462 			}
463 		}
464 	}
465 }
466 
467 
468 /* Assumes btlog is already locked */
469 static void
btlog_evict_elements_from_record(btlog_t * btlog,int num_elements_to_evict)470 btlog_evict_elements_from_record(btlog_t *btlog, int num_elements_to_evict)
471 {
472 	btlog_recordindex_t     recindex = btlog->head;
473 	btlog_record_t          *record = NULL;
474 	btlog_element_t         *recelem = NULL;
475 
476 	if (recindex == BTLOG_RECORDINDEX_NONE) {
477 		/* nothing on active list */
478 		panic("BTLog: Eviction requested on btlog (0x%lx) with an empty active list.", (uintptr_t) btlog);
479 	} else {
480 		while (num_elements_to_evict) {
481 			/*
482 			 * LEAKS: reap the oldest element within the record with the lowest refs.
483 			 * CORRUPTION: reap the oldest element overall and drop its reference on the record
484 			 */
485 
486 			if (btlog->caller_will_remove_entries_for_element) {
487 				uint32_t                max_refs_threshold = UINT32_MAX;
488 				btlog_recordindex_t     precindex = 0, prev_evictindex = 0, evict_index = 0;
489 
490 				prev_evictindex = evict_index = btlog->head;
491 				precindex = recindex = btlog->head;
492 
493 				while (recindex != BTLOG_RECORDINDEX_NONE) {
494 					record  = lookup_btrecord(btlog, recindex);
495 
496 					if (btlog->activerecord == recindex || record->ref_count > max_refs_threshold) {
497 						/* skip this record */
498 					} else {
499 						prev_evictindex = precindex;
500 						evict_index = recindex;
501 						max_refs_threshold = record->ref_count;
502 					}
503 
504 					if (record->next != BTLOG_RECORDINDEX_NONE) {
505 						precindex = recindex;
506 					}
507 
508 					recindex = record->next;
509 				}
510 
511 				recindex = evict_index;
512 				assert(recindex != BTLOG_RECORDINDEX_NONE);
513 				record  = lookup_btrecord(btlog, recindex);
514 
515 				recelem = TAILQ_LAST(&record->element_record_queue, _element_record_queue);
516 			} else {
517 				recelem = TAILQ_LAST(btlog->elem_linkage_un.element_hash_queue, _element_hash_queue);
518 				recindex = recelem->recindex;
519 				record = lookup_btrecord(btlog, recindex);
520 			}
521 
522 			/*
523 			 * Here we have the element to drop (recelem), its record and the record index.
524 			 */
525 
526 			while (recelem && num_elements_to_evict) {
527 				TAILQ_REMOVE(&record->element_record_queue, recelem, element_record_link);
528 
529 				if (btlog->caller_will_remove_entries_for_element) {
530 					btlog_element_t *prev_hashelem = NULL, *hashelem = NULL;
531 					uint32_t                        hashidx = 0;
532 
533 					hashidx = calculate_hashidx_for_element(~recelem->elem, btlog);
534 
535 					prev_hashelem = hashelem = btlog->elem_linkage_un.elem_recindex_hashtbl[hashidx];
536 					while (hashelem != NULL) {
537 						if (hashelem == recelem) {
538 							break;
539 						} else {
540 							prev_hashelem = hashelem;
541 							hashelem = TAILQ_NEXT(hashelem, element_hash_link);
542 						}
543 					}
544 
545 					if (hashelem == NULL) {
546 						panic("BTLog: Missing hashelem for element list of record 0x%lx", (uintptr_t) record);
547 					}
548 
549 					if (prev_hashelem != hashelem) {
550 						TAILQ_NEXT(prev_hashelem, element_hash_link) = TAILQ_NEXT(hashelem, element_hash_link);
551 					} else {
552 						btlog->elem_linkage_un.elem_recindex_hashtbl[hashidx] = TAILQ_NEXT(hashelem, element_hash_link);
553 					}
554 				} else {
555 					TAILQ_REMOVE(btlog->elem_linkage_un.element_hash_queue, recelem, element_hash_link);
556 				}
557 
558 				btlog_add_elem_to_freelist(btlog, recelem);
559 				btlog->active_element_count--;
560 
561 				num_elements_to_evict--;
562 
563 				assert(record->ref_count);
564 
565 				record->ref_count--;
566 
567 				if (record->ref_count == 0) {
568 					btlog_add_record_to_freelist(btlog, recindex);
569 
570 					/*
571 					 * LEAKS: All done with this record. Need the next least popular record.
572 					 * CORRUPTION: We don't care about records. We'll just pick the next oldest element.
573 					 */
574 
575 					if (btlog->caller_will_remove_entries_for_element) {
576 						break;
577 					}
578 				}
579 
580 				if (btlog->caller_will_remove_entries_for_element) {
581 					recelem = TAILQ_LAST(&record->element_record_queue, _element_record_queue);
582 				} else {
583 					recelem = TAILQ_LAST(btlog->elem_linkage_un.element_hash_queue, _element_hash_queue);
584 					recindex = recelem->recindex;
585 					record = lookup_btrecord(btlog, recindex);
586 				}
587 			}
588 		}
589 	}
590 }
591 
592 /* Assumes btlog is already locked */
593 static void
btlog_append_record_to_activelist(btlog_t * btlog,btlog_recordindex_t recindex)594 btlog_append_record_to_activelist(btlog_t *btlog, btlog_recordindex_t recindex)
595 {
596 	assert(recindex != BTLOG_RECORDINDEX_NONE);
597 
598 	if (btlog->head == BTLOG_RECORDINDEX_NONE) {
599 		/* empty active list, update both head and tail */
600 		btlog->head = btlog->tail = recindex;
601 	} else {
602 		btlog_record_t *record = lookup_btrecord(btlog, btlog->tail);
603 		record->next = recindex;
604 		btlog->tail = recindex;
605 	}
606 	btlog->active_record_count++;
607 }
608 
609 btlog_element_t*
btlog_get_elem_from_freelist(btlog_t * btlog)610 btlog_get_elem_from_freelist(btlog_t *btlog)
611 {
612 	btlog_element_t *free_elem = NULL;
613 
614 retry:
615 	free_elem = btlog->freelist_elements;
616 
617 	if ((uintptr_t)free_elem == BTLOG_HASHELEMINDEX_NONE) {
618 		/* nothing on freelist */
619 		btlog_evict_elements_from_record(btlog, 1);
620 		goto retry;
621 	} else {
622 		/* remove the head of the freelist */
623 		uintptr_t next_elem = *(uintptr_t*)free_elem;
624 		btlog->freelist_elements = (btlog_element_t *)next_elem;
625 		return free_elem;
626 	}
627 }
628 
629 void
btlog_add_elem_to_freelist(btlog_t * btlog,btlog_element_t * elem)630 btlog_add_elem_to_freelist(btlog_t *btlog, btlog_element_t *elem)
631 {
632 	btlog_element_t *free_elem = btlog->freelist_elements;
633 
634 	TAILQ_NEXT(elem, element_hash_link) = (btlog_element_t *) BTLOG_HASHELEMINDEX_NONE;
635 	TAILQ_NEXT(elem, element_record_link) = (btlog_element_t *) BTLOG_HASHELEMINDEX_NONE;
636 
637 	*(uintptr_t*)elem = (uintptr_t)free_elem;
638 	btlog->freelist_elements = elem;
639 }
640 
641 void
btlog_add_entry(btlog_t * btlog,void * element,uint8_t operation,void * bt[],size_t btcount)642 btlog_add_entry(btlog_t *btlog,
643     void *element,
644     uint8_t operation,
645     void *bt[],
646     size_t btcount)
647 {
648 	btlog_recordindex_t     recindex = 0;
649 	btlog_record_t          *record = NULL;
650 	size_t                  i;
651 	u_int32_t               md5_buffer[4];
652 	MD5_CTX                 btlog_ctx;
653 	uint32_t                hashidx = 0;
654 
655 	btlog_element_t *hashelem = NULL;
656 
657 	if (g_crypto_funcs == NULL) {
658 		return;
659 	}
660 
661 	MD5Init(&btlog_ctx);
662 	for (i = 0; i < MIN(btcount, btlog->btrecord_btdepth); i++) {
663 		MD5Update(&btlog_ctx, (u_char *) &bt[i], sizeof(bt[i]));
664 	}
665 	MD5Final((u_char *) &md5_buffer, &btlog_ctx);
666 
667 	btlog_lock(btlog);
668 	recindex = lookup_btrecord_byhash(btlog, md5_buffer[0], bt, btcount);
669 
670 	if (recindex != BTLOG_RECORDINDEX_NONE) {
671 		record = lookup_btrecord(btlog, recindex);
672 		record->ref_count++;
673 		assert(record->operation == operation);
674 	} else {
675 retry:
676 		/* If there's a free record, use it */
677 		recindex = btlog_get_record_from_freelist(btlog);
678 		if (recindex == BTLOG_RECORDINDEX_NONE) {
679 			/* Use the first active record (FIFO age-out) */
680 			btlog_evict_elements_from_record(btlog, ((2 * sizeof(btlog_record_t)) / sizeof(btlog_element_t)));
681 			goto retry;
682 		}
683 
684 		record = lookup_btrecord(btlog, recindex);
685 
686 		/* we always add to the tail, so there is no next pointer */
687 		record->next = BTLOG_RECORDINDEX_NONE;
688 		record->operation = operation;
689 		record->bthash = md5_buffer[0];
690 		record->ref_count = 1;
691 		TAILQ_INIT(&record->element_record_queue);
692 
693 		for (i = 0; i < MIN(btcount, btlog->btrecord_btdepth); i++) {
694 			record->bt[i] = bt[i];
695 		}
696 
697 		for (; i < btlog->btrecord_btdepth; i++) {
698 			record->bt[i] = NULL;
699 		}
700 
701 		btlog_append_record_to_activelist(btlog, recindex);
702 	}
703 
704 	btlog->activerecord = recindex;
705 
706 	hashidx = calculate_hashidx_for_element((uintptr_t)element, btlog);
707 	hashelem = btlog_get_elem_from_freelist(btlog);
708 
709 	hashelem->elem = ~((uintptr_t)element);
710 	hashelem->operation = record->operation;
711 	hashelem->recindex = recindex;
712 
713 	TAILQ_INSERT_HEAD(&record->element_record_queue, hashelem, element_record_link);
714 
715 	if (btlog->caller_will_remove_entries_for_element) {
716 		TAILQ_NEXT(hashelem, element_hash_link) = btlog->elem_linkage_un.elem_recindex_hashtbl[hashidx];
717 		btlog->elem_linkage_un.elem_recindex_hashtbl[hashidx] = hashelem;
718 	} else {
719 		TAILQ_INSERT_HEAD(btlog->elem_linkage_un.element_hash_queue, hashelem, element_hash_link);
720 	}
721 
722 	btlog->active_element_count++;
723 
724 	btlog->activerecord = BTLOG_RECORDINDEX_NONE;
725 
726 	btlog_unlock(btlog);
727 }
728 
729 void
btlog_remove_entries_for_element(btlog_t * btlog,void * element)730 btlog_remove_entries_for_element(btlog_t *btlog,
731     void *element)
732 {
733 	btlog_recordindex_t     recindex = BTLOG_RECORDINDEX_NONE;
734 	btlog_record_t          *record = NULL;
735 	uint32_t                hashidx = 0;
736 
737 	btlog_element_t *prev_hashelem = NULL, *hashelem = NULL;
738 
739 	if (btlog->caller_will_remove_entries_for_element == FALSE) {
740 		panic("Explicit removal of entry is not permitted for this btlog (%p).", btlog);
741 	}
742 
743 	if (g_crypto_funcs == NULL) {
744 		return;
745 	}
746 
747 	btlog_lock(btlog);
748 
749 	hashidx = calculate_hashidx_for_element((uintptr_t) element, btlog);
750 	prev_hashelem = hashelem = btlog->elem_linkage_un.elem_recindex_hashtbl[hashidx];
751 
752 	while (hashelem != NULL) {
753 		if (~hashelem->elem == (uintptr_t)element) {
754 			break;
755 		} else {
756 			prev_hashelem = hashelem;
757 			hashelem = TAILQ_NEXT(hashelem, element_hash_link);
758 		}
759 	}
760 
761 	if (hashelem) {
762 		btlog_element_t *recelem = NULL;
763 
764 		if (prev_hashelem != hashelem) {
765 			TAILQ_NEXT(prev_hashelem, element_hash_link) = TAILQ_NEXT(hashelem, element_hash_link);
766 		} else {
767 			btlog->elem_linkage_un.elem_recindex_hashtbl[hashidx] = TAILQ_NEXT(hashelem, element_hash_link);
768 		}
769 
770 		recindex = hashelem->recindex;
771 		record = lookup_btrecord(btlog, recindex);
772 
773 		recelem = hashelem;
774 		TAILQ_REMOVE(&record->element_record_queue, recelem, element_record_link);
775 
776 		btlog_add_elem_to_freelist(btlog, hashelem);
777 		btlog->active_element_count--;
778 
779 		assert(record->ref_count);
780 
781 		record->ref_count--;
782 
783 		if (record->ref_count == 0) {
784 			btlog_add_record_to_freelist(btlog, recindex);
785 		}
786 	}
787 
788 	btlog_unlock(btlog);
789 }
790 
791 #if DEBUG || DEVELOPMENT
792 
793 void
btlog_copy_backtraces_for_elements(btlog_t * btlog,uintptr_t * instances,uint32_t * countp,uint32_t zoneSize,leak_site_proc proc,void * refCon)794 btlog_copy_backtraces_for_elements(btlog_t      * btlog,
795     uintptr_t    * instances,
796     uint32_t     * countp,
797     uint32_t       zoneSize,
798     leak_site_proc proc,
799     void         * refCon)
800 {
801 	btlog_recordindex_t       recindex;
802 	btlog_record_t          * record;
803 	btlog_element_t     * hashelem;
804 	uint32_t                      hashidx, idx, dups, numSites, siteCount;
805 	uintptr_t             element, site;
806 	uint32_t              count;
807 
808 	btlog_lock(btlog);
809 
810 	count = *countp;
811 	for (numSites = 0, idx = 0; idx < count; idx++) {
812 		element = instances[idx];
813 
814 		if (kInstanceFlagReferenced & element) {
815 			continue;
816 		}
817 		element = INSTANCE_PUT(element) & ~kInstanceFlags;
818 
819 		site = 0;
820 		hashidx = calculate_hashidx_for_element(element, btlog);
821 		hashelem = btlog->elem_linkage_un.elem_recindex_hashtbl[hashidx];
822 		while (hashelem != NULL) {
823 			if (~hashelem->elem == element) {
824 				break;
825 			}
826 			hashelem = TAILQ_NEXT(hashelem, element_hash_link);
827 		}
828 		if (hashelem) {
829 			recindex = hashelem->recindex;
830 			site = (uintptr_t) lookup_btrecord(btlog, recindex);
831 		}
832 		if (site) {
833 			element = (site | kInstanceFlagReferenced);
834 		}
835 		instances[numSites] = INSTANCE_PUT(element);
836 		numSites++;
837 	}
838 
839 	for (idx = 0; idx < numSites; idx++) {
840 		site = instances[idx];
841 		if (!site) {
842 			continue;
843 		}
844 		if (!(kInstanceFlagReferenced & site)) {
845 			continue;
846 		}
847 		for (siteCount = 1, dups = (idx + 1); dups < numSites; dups++) {
848 			if (instances[dups] == site) {
849 				siteCount++;
850 				instances[dups] = 0;
851 			}
852 		}
853 		record = (typeof(record))(INSTANCE_PUT(site) & ~kInstanceFlags);
854 		(*proc)(refCon, siteCount, zoneSize, (uintptr_t *) &record->bt[0], (uint32_t) btlog->btrecord_btdepth);
855 	}
856 
857 	*countp = numSites;
858 
859 	btlog_unlock(btlog);
860 }
861 
862 /*
863  * Returns the number of records in the btlog struct.
864  *
865  * Called by the mach_zone_get_btlog_records() MIG routine.
866  */
867 size_t
get_btlog_records_count(btlog_t * btlog)868 get_btlog_records_count(btlog_t *btlog)
869 {
870 	if (btlog->btlog_buffersize < sizeof(btlog_t)) {
871 		return 0;
872 	}
873 	return (btlog->btlog_buffersize - sizeof(btlog_t)) / btlog->btrecord_size;
874 }
875 
876 /*
877  * Copies out relevant info from btlog_record_t's to zone_btrecord_t's. 'numrecs' points to the number of records
878  * the 'records' buffer can hold. Upon return 'numrecs' points to the number of records actually copied out.
879  *
880  * Called by the mach_zone_get_btlog_records() MIG routine.
881  */
882 void
get_btlog_records(btlog_t * btlog,zone_btrecord_t * records,unsigned int * numrecs)883 get_btlog_records(btlog_t *btlog, zone_btrecord_t *records, unsigned int *numrecs)
884 {
885 	unsigned int count, recs_copied, frame;
886 	zone_btrecord_t *current_rec;
887 	btlog_record_t *zstack_record;
888 	btlog_recordindex_t     zstack_index = BTLOG_RECORDINDEX_NONE;
889 
890 	btlog_lock(btlog);
891 
892 	count = 0;
893 	if (btlog->btlog_buffersize > sizeof(btlog_t)) {
894 		count = (unsigned int)((btlog->btlog_buffersize - sizeof(btlog_t)) / btlog->btrecord_size);
895 	}
896 	/* Copy out only as many records as the pre-allocated buffer size permits. */
897 	if (count > *numrecs) {
898 		count = *numrecs;
899 	}
900 	zstack_index = btlog->head;
901 
902 	current_rec = &records[0];
903 	recs_copied = 0;
904 	while (recs_copied < count && (zstack_index != BTLOG_RECORDINDEX_NONE)) {
905 		zstack_record = lookup_btrecord(btlog, zstack_index);
906 		current_rec->operation_type = (uint32_t)(zstack_record->operation);
907 		current_rec->ref_count = zstack_record->ref_count;
908 
909 		frame = 0;
910 		while (frame < MIN(btlog->btrecord_btdepth, MAX_ZTRACE_DEPTH)) {
911 			current_rec->bt[frame] = (uint64_t)VM_KERNEL_UNSLIDE(zstack_record->bt[frame]);
912 			frame++;
913 		}
914 
915 		zstack_index = zstack_record->next;
916 		recs_copied++;
917 		current_rec++;
918 	}
919 	*numrecs = recs_copied;
920 
921 	btlog_unlock(btlog);
922 }
923 
924 #endif  /* DEBUG || DEVELOPMENT */
925