xref: /xnu-8792.41.9/osfmk/vm/vm_phantom_cache.c (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <vm/vm_page.h>
30 #include <vm/vm_object.h>
31 #include <vm/vm_kern.h>
32 #include <vm/vm_pageout.h>
33 #include <vm/vm_phantom_cache.h>
34 #include <vm/vm_compressor.h>
35 
36 
37 uint32_t phantom_cache_eval_period_in_msecs = 250;
38 uint32_t phantom_cache_thrashing_threshold_ssd = 1000;
39 #if !XNU_TARGET_OS_OSX
40 uint32_t phantom_cache_thrashing_threshold = 500;
41 #else /* !XNU_TARGET_OS_OSX */
42 uint32_t phantom_cache_thrashing_threshold = 50;
43 #endif /* !XNU_TARGET_OS_OSX */
44 
45 /*
46  * Number of consecutive thrashing periods required before
47  * vm_phantom_cache_check_pressure() returns true.
48  */
49 #if !XNU_TARGET_OS_OSX
50 unsigned phantom_cache_contiguous_periods = 4;
51 #else /* !XNU_TARGET_OS_OSX */
52 unsigned phantom_cache_contiguous_periods = 2;
53 #endif /* !XNU_TARGET_OS_OSX */
54 
55 clock_sec_t     pc_start_of_eval_period_sec = 0;
56 clock_nsec_t    pc_start_of_eval_period_nsec = 0;
57 boolean_t       pc_need_eval_reset = FALSE;
58 
59 /* One bit per recent sampling period. Bit 0 = current period. */
60 uint32_t        pc_history = 0;
61 
62 uint32_t        sample_period_ghost_added_count = 0;
63 uint32_t        sample_period_ghost_added_count_ssd = 0;
64 uint32_t        sample_period_ghost_found_count = 0;
65 uint32_t        sample_period_ghost_found_count_ssd = 0;
66 
67 uint32_t        vm_phantom_object_id = 1;
68 #define         VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
69 
70 vm_ghost_t      vm_phantom_cache;
71 uint32_t        vm_phantom_cache_nindx = 1;
72 uint32_t        vm_phantom_cache_num_entries = 0;
73 uint32_t        vm_phantom_cache_size;
74 
75 typedef uint32_t        vm_phantom_hash_entry_t;
76 vm_phantom_hash_entry_t *vm_phantom_cache_hash;
77 uint32_t        vm_phantom_cache_hash_size;
78 uint32_t        vm_ghost_hash_mask;             /* Mask for hash function */
79 uint32_t        vm_ghost_bucket_hash;           /* Basic bucket hash */
80 
81 
82 int pg_masks[4] = {
83 	0x1, 0x2, 0x4, 0x8
84 };
85 
86 
87 #define vm_phantom_hash(obj_id, offset) (\
88 	        ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
89 
90 
91 struct phantom_cache_stats {
92 	uint32_t        pcs_wrapped;
93 	uint32_t        pcs_added_page_to_entry;
94 	uint32_t        pcs_added_new_entry;
95 	uint32_t        pcs_replaced_entry;
96 
97 	uint32_t        pcs_lookup_found_page_in_cache;
98 	uint32_t        pcs_lookup_entry_not_in_cache;
99 	uint32_t        pcs_lookup_page_not_in_entry;
100 
101 	uint32_t        pcs_updated_phantom_state;
102 } phantom_cache_stats;
103 
104 
105 
106 void
vm_phantom_cache_init(void)107 vm_phantom_cache_init(void)
108 {
109 	unsigned int    num_entries;
110 	unsigned int    log1;
111 	unsigned int    size;
112 
113 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
114 		return;
115 	}
116 #if !XNU_TARGET_OS_OSX
117 	num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 10) / VM_GHOST_PAGES_PER_ENTRY);
118 #else /* !XNU_TARGET_OS_OSX */
119 	num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY);
120 #endif /* !XNU_TARGET_OS_OSX */
121 	vm_phantom_cache_num_entries = 1;
122 
123 	while (vm_phantom_cache_num_entries < num_entries) {
124 		vm_phantom_cache_num_entries <<= 1;
125 	}
126 
127 	/*
128 	 * We index this with g_next_index, so don't exceed the width of that bitfield.
129 	 */
130 	if (vm_phantom_cache_num_entries > (1 << VM_GHOST_INDEX_BITS)) {
131 		vm_phantom_cache_num_entries = (1 << VM_GHOST_INDEX_BITS);
132 	}
133 
134 	vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
135 	vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
136 
137 	kmem_alloc(kernel_map, (vm_offset_t *)&vm_phantom_cache,
138 	    vm_phantom_cache_size,
139 	    KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_ZERO | KMA_PERMANENT,
140 	    VM_KERN_MEMORY_PHANTOM_CACHE);
141 
142 	kmem_alloc(kernel_map, (vm_offset_t *)&vm_phantom_cache_hash,
143 	    vm_phantom_cache_hash_size,
144 	    KMA_NOFAIL | KMA_KOBJECT | KMA_ZERO | KMA_PERMANENT,
145 	    VM_KERN_MEMORY_PHANTOM_CACHE);
146 
147 	vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1;
148 
149 	/*
150 	 *	Calculate object_id shift value for hashing algorithm:
151 	 *		O = log2(sizeof(struct vm_object))
152 	 *		B = log2(vm_page_bucket_count)
153 	 *	        hash shifts the object_id left by
154 	 *		B/2 - O
155 	 */
156 	size = vm_phantom_cache_num_entries;
157 	for (log1 = 0; size > 1; log1++) {
158 		size /= 2;
159 	}
160 
161 	vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1);          /* Get (ceiling of sqrt of table size) */
162 	vm_ghost_bucket_hash |= 1 << ((log1 + 1) >> 2);         /* Get (ceiling of quadroot of table size) */
163 	vm_ghost_bucket_hash |= 1;                              /* Set bit and add 1 - always must be 1 to insure unique series */
164 
165 	if (vm_ghost_hash_mask & vm_phantom_cache_num_entries) {
166 		printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
167 	}
168 }
169 
170 
171 void
vm_phantom_cache_add_ghost(vm_page_t m)172 vm_phantom_cache_add_ghost(vm_page_t m)
173 {
174 	vm_ghost_t      vpce;
175 	vm_object_t     object;
176 	int             ghost_index;
177 	int             pg_mask;
178 	boolean_t       isSSD = FALSE;
179 	vm_phantom_hash_entry_t ghost_hash_index;
180 
181 	object = VM_PAGE_OBJECT(m);
182 
183 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
184 	vm_object_lock_assert_exclusive(object);
185 
186 	if (vm_phantom_cache_num_entries == 0) {
187 		return;
188 	}
189 
190 	pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
191 
192 	if (object->phantom_object_id == 0) {
193 		vnode_pager_get_isSSD(object->pager, &isSSD);
194 
195 		if (isSSD == TRUE) {
196 			object->phantom_isssd = TRUE;
197 		}
198 
199 		object->phantom_object_id = vm_phantom_object_id++;
200 
201 		if (vm_phantom_object_id == 0) {
202 			vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP;
203 		}
204 	} else {
205 		if ((vpce = vm_phantom_cache_lookup_ghost(m, 0))) {
206 			vpce->g_pages_held |= pg_mask;
207 
208 			phantom_cache_stats.pcs_added_page_to_entry++;
209 			goto done;
210 		}
211 	}
212 	/*
213 	 * if we're here then the vm_ghost_t of this vm_page_t
214 	 * is not present in the phantom cache... take the next
215 	 * available entry in the LRU first evicting the existing
216 	 * entry if we've wrapped the ring
217 	 */
218 	ghost_index = vm_phantom_cache_nindx++;
219 
220 	if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) {
221 		vm_phantom_cache_nindx = 1;
222 
223 		phantom_cache_stats.pcs_wrapped++;
224 	}
225 	vpce = &vm_phantom_cache[ghost_index];
226 
227 	if (vpce->g_obj_id) {
228 		/*
229 		 * we're going to replace an existing entry
230 		 * so first remove it from the hash
231 		 */
232 		vm_ghost_t      nvpce;
233 
234 		ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
235 
236 		nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]];
237 
238 		if (nvpce == vpce) {
239 			vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index;
240 		} else {
241 			for (;;) {
242 				if (nvpce->g_next_index == 0) {
243 					panic("didn't find ghost in hash");
244 				}
245 
246 				if (&vm_phantom_cache[nvpce->g_next_index] == vpce) {
247 					nvpce->g_next_index = vpce->g_next_index;
248 					break;
249 				}
250 				nvpce = &vm_phantom_cache[nvpce->g_next_index];
251 			}
252 		}
253 		phantom_cache_stats.pcs_replaced_entry++;
254 	} else {
255 		phantom_cache_stats.pcs_added_new_entry++;
256 	}
257 
258 	vpce->g_pages_held = pg_mask;
259 	vpce->g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
260 	vpce->g_obj_id = object->phantom_object_id;
261 
262 	ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
263 	vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index];
264 	vm_phantom_cache_hash[ghost_hash_index] = ghost_index;
265 
266 done:
267 	vm_pageout_vminfo.vm_phantom_cache_added_ghost++;
268 
269 	if (object->phantom_isssd) {
270 		OSAddAtomic(1, &sample_period_ghost_added_count_ssd);
271 	} else {
272 		OSAddAtomic(1, &sample_period_ghost_added_count);
273 	}
274 }
275 
276 
277 vm_ghost_t
vm_phantom_cache_lookup_ghost(vm_page_t m,uint32_t pg_mask)278 vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask)
279 {
280 	uint64_t        g_obj_offset;
281 	uint32_t        g_obj_id;
282 	uint32_t        ghost_index;
283 	vm_object_t     object;
284 
285 	object = VM_PAGE_OBJECT(m);
286 
287 	if ((g_obj_id = object->phantom_object_id) == 0) {
288 		/*
289 		 * no entries in phantom cache for this object
290 		 */
291 		return NULL;
292 	}
293 	g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
294 
295 	ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)];
296 
297 	while (ghost_index) {
298 		vm_ghost_t      vpce;
299 
300 		vpce = &vm_phantom_cache[ghost_index];
301 
302 		if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) {
303 			if (pg_mask == 0 || (vpce->g_pages_held & pg_mask)) {
304 				phantom_cache_stats.pcs_lookup_found_page_in_cache++;
305 
306 				return vpce;
307 			}
308 			phantom_cache_stats.pcs_lookup_page_not_in_entry++;
309 
310 			return NULL;
311 		}
312 		ghost_index = vpce->g_next_index;
313 	}
314 	phantom_cache_stats.pcs_lookup_entry_not_in_cache++;
315 
316 	return NULL;
317 }
318 
319 
320 
321 void
vm_phantom_cache_update(vm_page_t m)322 vm_phantom_cache_update(vm_page_t m)
323 {
324 	int             pg_mask;
325 	vm_ghost_t      vpce;
326 	vm_object_t     object;
327 
328 	object = VM_PAGE_OBJECT(m);
329 
330 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
331 	vm_object_lock_assert_exclusive(object);
332 
333 	if (vm_phantom_cache_num_entries == 0) {
334 		return;
335 	}
336 
337 	pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
338 
339 	if ((vpce = vm_phantom_cache_lookup_ghost(m, pg_mask))) {
340 		vpce->g_pages_held &= ~pg_mask;
341 
342 		phantom_cache_stats.pcs_updated_phantom_state++;
343 		vm_pageout_vminfo.vm_phantom_cache_found_ghost++;
344 
345 		if (object->phantom_isssd) {
346 			OSAddAtomic(1, &sample_period_ghost_found_count_ssd);
347 		} else {
348 			OSAddAtomic(1, &sample_period_ghost_found_count);
349 		}
350 	}
351 }
352 
353 
354 #define PHANTOM_CACHE_DEBUG     1
355 
356 #if     PHANTOM_CACHE_DEBUG
357 
358 int     sample_period_ghost_counts_indx = 0;
359 
360 struct {
361 	uint32_t        added;
362 	uint32_t        found;
363 	uint32_t        added_ssd;
364 	uint32_t        found_ssd;
365 	uint32_t        elapsed_ms;
366 	boolean_t       pressure_detected;
367 } sample_period_ghost_counts[256];
368 
369 #endif
370 
371 /*
372  * Determine if the file cache is thrashing from sampling interval statistics.
373  *
374  * Pages added to the phantom cache = pages evicted from the file cache.
375  * Pages found in the phantom cache = reads of pages that were recently evicted.
376  * Threshold is the latency-dependent number of reads we consider thrashing.
377  */
378 static boolean_t
is_thrashing(uint32_t added,uint32_t found,uint32_t threshold)379 is_thrashing(uint32_t added, uint32_t found, uint32_t threshold)
380 {
381 	/* Ignore normal activity below the threshold. */
382 	if (added < threshold || found < threshold) {
383 		return FALSE;
384 	}
385 
386 	/*
387 	 * When thrashing in a way that we can mitigate, most of the pages read
388 	 * into the file cache were recently evicted, and 'found' will be close
389 	 * to 'added'.
390 	 *
391 	 * When replacing the current working set because a new app is
392 	 * launched, we see very high read traffic with sporadic phantom cache
393 	 * hits.
394 	 *
395 	 * This is not thrashing, or freeing up memory wouldn't help much
396 	 * anyway.
397 	 */
398 	if (found < added / 2) {
399 		return FALSE;
400 	}
401 
402 	return TRUE;
403 }
404 
405 /*
406  * the following function is never called
407  * from multiple threads simultaneously due
408  * to a condition variable used to serialize
409  * at the compressor level... thus no need
410  * to provide locking for the sample processing
411  */
412 boolean_t
vm_phantom_cache_check_pressure()413 vm_phantom_cache_check_pressure()
414 {
415 	clock_sec_t     cur_ts_sec;
416 	clock_nsec_t    cur_ts_nsec;
417 	uint64_t        elapsed_msecs_in_eval;
418 	boolean_t       pressure_detected = FALSE;
419 
420 	clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
421 
422 	elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec);
423 
424 	/*
425 	 * Reset evaluation period after phantom_cache_eval_period_in_msecs or
426 	 * whenever vm_phantom_cache_restart_sample has been called.
427 	 */
428 	if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) {
429 		pc_need_eval_reset = TRUE;
430 	}
431 
432 	if (pc_need_eval_reset == TRUE) {
433 #if PHANTOM_CACHE_DEBUG
434 		/*
435 		 * maintain some info about the last 256 sample periods
436 		 */
437 		sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count;
438 		sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count;
439 		sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd;
440 		sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd;
441 		sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval;
442 
443 		sample_period_ghost_counts_indx++;
444 
445 		if (sample_period_ghost_counts_indx >= 256) {
446 			sample_period_ghost_counts_indx = 0;
447 		}
448 #endif
449 		sample_period_ghost_added_count = 0;
450 		sample_period_ghost_found_count = 0;
451 		sample_period_ghost_added_count_ssd = 0;
452 		sample_period_ghost_found_count_ssd = 0;
453 
454 		pc_start_of_eval_period_sec = cur_ts_sec;
455 		pc_start_of_eval_period_nsec = cur_ts_nsec;
456 		pc_history <<= 1;
457 		pc_need_eval_reset = FALSE;
458 	} else {
459 		/*
460 		 * Since the trashing rate is really a function of the read latency of the disk
461 		 * we have to consider both the SSD and spinning disk case since the file cache
462 		 * could be backed by either or even both flavors.  When the object is first
463 		 * assigned a phantom_object_id, we query the pager to determine if the backing
464 		 * backing media is an SSD and remember that answer in the vm_object.  We use
465 		 * that info to maintains counts for both the SSD and spinning disk cases.
466 		 */
467 		if (is_thrashing(sample_period_ghost_added_count,
468 		    sample_period_ghost_found_count,
469 		    phantom_cache_thrashing_threshold) ||
470 		    is_thrashing(sample_period_ghost_added_count_ssd,
471 		    sample_period_ghost_found_count_ssd,
472 		    phantom_cache_thrashing_threshold_ssd)) {
473 			/* Thrashing in the current period: Set bit 0. */
474 			pc_history |= 1;
475 		}
476 	}
477 
478 	/*
479 	 * Declare pressure_detected after phantom_cache_contiguous_periods.
480 	 *
481 	 * Create a bitmask with the N low bits set. These bits must all be set
482 	 * in pc_history. The high bits of pc_history are ignored.
483 	 */
484 	uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1;
485 	if ((pc_history & bitmask) == bitmask) {
486 		pressure_detected = TRUE;
487 	}
488 
489 	if (vm_page_external_count > ((AVAILABLE_MEMORY) * 50) / 100) {
490 		pressure_detected = FALSE;
491 	}
492 
493 #if PHANTOM_CACHE_DEBUG
494 	sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected;
495 #endif
496 	return pressure_detected;
497 }
498 
499 /*
500  * Restart the current sampling because conditions have changed significantly,
501  * and we don't want to react to old data.
502  *
503  * This function can be called from any thread.
504  */
505 void
vm_phantom_cache_restart_sample(void)506 vm_phantom_cache_restart_sample(void)
507 {
508 	pc_need_eval_reset = TRUE;
509 }
510