1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <vm/vm_page.h>
30 #include <vm/vm_object.h>
31 #include <vm/vm_kern.h>
32 #include <vm/vm_pageout.h>
33 #include <vm/vm_phantom_cache.h>
34 #include <vm/vm_compressor.h>
35
36
37 uint32_t phantom_cache_eval_period_in_msecs = 250;
38 uint32_t phantom_cache_thrashing_threshold_ssd = 1000;
39 #if !XNU_TARGET_OS_OSX
40 uint32_t phantom_cache_thrashing_threshold = 500;
41 #else /* !XNU_TARGET_OS_OSX */
42 uint32_t phantom_cache_thrashing_threshold = 50;
43 #endif /* !XNU_TARGET_OS_OSX */
44
45 /*
46 * Number of consecutive thrashing periods required before
47 * vm_phantom_cache_check_pressure() returns true.
48 */
49 #if !XNU_TARGET_OS_OSX
50 unsigned phantom_cache_contiguous_periods = 4;
51 #else /* !XNU_TARGET_OS_OSX */
52 unsigned phantom_cache_contiguous_periods = 2;
53 #endif /* !XNU_TARGET_OS_OSX */
54
55 clock_sec_t pc_start_of_eval_period_sec = 0;
56 clock_nsec_t pc_start_of_eval_period_nsec = 0;
57 boolean_t pc_need_eval_reset = FALSE;
58
59 /* One bit per recent sampling period. Bit 0 = current period. */
60 uint32_t pc_history = 0;
61
62 uint32_t sample_period_ghost_added_count = 0;
63 uint32_t sample_period_ghost_added_count_ssd = 0;
64 uint32_t sample_period_ghost_found_count = 0;
65 uint32_t sample_period_ghost_found_count_ssd = 0;
66
67 uint32_t vm_phantom_object_id = 1;
68 #define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
69
70 vm_ghost_t vm_phantom_cache;
71 uint32_t vm_phantom_cache_nindx = 1;
72 uint32_t vm_phantom_cache_num_entries = 0;
73 uint32_t vm_phantom_cache_size;
74
75 typedef uint32_t vm_phantom_hash_entry_t;
76 vm_phantom_hash_entry_t *vm_phantom_cache_hash;
77 uint32_t vm_phantom_cache_hash_size;
78 uint32_t vm_ghost_hash_mask; /* Mask for hash function */
79 uint32_t vm_ghost_bucket_hash; /* Basic bucket hash */
80
81
82 int pg_masks[4] = {
83 0x1, 0x2, 0x4, 0x8
84 };
85
86
87 #define vm_phantom_hash(obj_id, offset) (\
88 ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
89
90
91 struct phantom_cache_stats {
92 uint32_t pcs_wrapped;
93 uint32_t pcs_added_page_to_entry;
94 uint32_t pcs_added_new_entry;
95 uint32_t pcs_replaced_entry;
96
97 uint32_t pcs_lookup_found_page_in_cache;
98 uint32_t pcs_lookup_entry_not_in_cache;
99 uint32_t pcs_lookup_page_not_in_entry;
100
101 uint32_t pcs_updated_phantom_state;
102 } phantom_cache_stats;
103
104
105
106 void
vm_phantom_cache_init(void)107 vm_phantom_cache_init(void)
108 {
109 unsigned int num_entries;
110 unsigned int log1;
111 unsigned int size;
112
113 if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
114 return;
115 }
116 #if !XNU_TARGET_OS_OSX
117 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 10) / VM_GHOST_PAGES_PER_ENTRY);
118 #else /* !XNU_TARGET_OS_OSX */
119 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY);
120 #endif /* !XNU_TARGET_OS_OSX */
121 vm_phantom_cache_num_entries = 1;
122
123 while (vm_phantom_cache_num_entries < num_entries) {
124 vm_phantom_cache_num_entries <<= 1;
125 }
126
127 /*
128 * We index this with g_next_index, so don't exceed the width of that bitfield.
129 */
130 if (vm_phantom_cache_num_entries > (1 << VM_GHOST_INDEX_BITS)) {
131 vm_phantom_cache_num_entries = (1 << VM_GHOST_INDEX_BITS);
132 }
133
134 vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
135 vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
136
137 kmem_alloc(kernel_map, (vm_offset_t *)&vm_phantom_cache,
138 vm_phantom_cache_size,
139 KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_ZERO | KMA_PERMANENT,
140 VM_KERN_MEMORY_PHANTOM_CACHE);
141
142 kmem_alloc(kernel_map, (vm_offset_t *)&vm_phantom_cache_hash,
143 vm_phantom_cache_hash_size,
144 KMA_NOFAIL | KMA_KOBJECT | KMA_ZERO | KMA_PERMANENT,
145 VM_KERN_MEMORY_PHANTOM_CACHE);
146
147 vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1;
148
149 /*
150 * Calculate object_id shift value for hashing algorithm:
151 * O = log2(sizeof(struct vm_object))
152 * B = log2(vm_page_bucket_count)
153 * hash shifts the object_id left by
154 * B/2 - O
155 */
156 size = vm_phantom_cache_num_entries;
157 for (log1 = 0; size > 1; log1++) {
158 size /= 2;
159 }
160
161 vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
162 vm_ghost_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
163 vm_ghost_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
164
165 if (vm_ghost_hash_mask & vm_phantom_cache_num_entries) {
166 printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
167 }
168 }
169
170
171 void
vm_phantom_cache_add_ghost(vm_page_t m)172 vm_phantom_cache_add_ghost(vm_page_t m)
173 {
174 vm_ghost_t vpce;
175 vm_object_t object;
176 int ghost_index;
177 int pg_mask;
178 boolean_t isSSD = FALSE;
179 vm_phantom_hash_entry_t ghost_hash_index;
180
181 object = VM_PAGE_OBJECT(m);
182
183 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
184 vm_object_lock_assert_exclusive(object);
185
186 if (vm_phantom_cache_num_entries == 0) {
187 return;
188 }
189
190 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
191
192 if (object->phantom_object_id == 0) {
193 vnode_pager_get_isSSD(object->pager, &isSSD);
194
195 if (isSSD == TRUE) {
196 object->phantom_isssd = TRUE;
197 }
198
199 object->phantom_object_id = vm_phantom_object_id++;
200
201 if (vm_phantom_object_id == 0) {
202 vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP;
203 }
204 } else {
205 if ((vpce = vm_phantom_cache_lookup_ghost(m, 0))) {
206 vpce->g_pages_held |= pg_mask;
207
208 phantom_cache_stats.pcs_added_page_to_entry++;
209 goto done;
210 }
211 }
212 /*
213 * if we're here then the vm_ghost_t of this vm_page_t
214 * is not present in the phantom cache... take the next
215 * available entry in the LRU first evicting the existing
216 * entry if we've wrapped the ring
217 */
218 ghost_index = vm_phantom_cache_nindx++;
219
220 if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) {
221 vm_phantom_cache_nindx = 1;
222
223 phantom_cache_stats.pcs_wrapped++;
224 }
225 vpce = &vm_phantom_cache[ghost_index];
226
227 if (vpce->g_obj_id) {
228 /*
229 * we're going to replace an existing entry
230 * so first remove it from the hash
231 */
232 vm_ghost_t nvpce;
233
234 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
235
236 nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]];
237
238 if (nvpce == vpce) {
239 vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index;
240 } else {
241 for (;;) {
242 if (nvpce->g_next_index == 0) {
243 panic("didn't find ghost in hash");
244 }
245
246 if (&vm_phantom_cache[nvpce->g_next_index] == vpce) {
247 nvpce->g_next_index = vpce->g_next_index;
248 break;
249 }
250 nvpce = &vm_phantom_cache[nvpce->g_next_index];
251 }
252 }
253 phantom_cache_stats.pcs_replaced_entry++;
254 } else {
255 phantom_cache_stats.pcs_added_new_entry++;
256 }
257
258 vpce->g_pages_held = pg_mask;
259 vpce->g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
260 vpce->g_obj_id = object->phantom_object_id;
261
262 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
263 vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index];
264 vm_phantom_cache_hash[ghost_hash_index] = ghost_index;
265
266 done:
267 vm_pageout_vminfo.vm_phantom_cache_added_ghost++;
268
269 if (object->phantom_isssd) {
270 OSAddAtomic(1, &sample_period_ghost_added_count_ssd);
271 } else {
272 OSAddAtomic(1, &sample_period_ghost_added_count);
273 }
274 }
275
276
277 vm_ghost_t
vm_phantom_cache_lookup_ghost(vm_page_t m,uint32_t pg_mask)278 vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask)
279 {
280 uint64_t g_obj_offset;
281 uint32_t g_obj_id;
282 uint32_t ghost_index;
283 vm_object_t object;
284
285 object = VM_PAGE_OBJECT(m);
286
287 if ((g_obj_id = object->phantom_object_id) == 0) {
288 /*
289 * no entries in phantom cache for this object
290 */
291 return NULL;
292 }
293 g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
294
295 ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)];
296
297 while (ghost_index) {
298 vm_ghost_t vpce;
299
300 vpce = &vm_phantom_cache[ghost_index];
301
302 if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) {
303 if (pg_mask == 0 || (vpce->g_pages_held & pg_mask)) {
304 phantom_cache_stats.pcs_lookup_found_page_in_cache++;
305
306 return vpce;
307 }
308 phantom_cache_stats.pcs_lookup_page_not_in_entry++;
309
310 return NULL;
311 }
312 ghost_index = vpce->g_next_index;
313 }
314 phantom_cache_stats.pcs_lookup_entry_not_in_cache++;
315
316 return NULL;
317 }
318
319
320
321 void
vm_phantom_cache_update(vm_page_t m)322 vm_phantom_cache_update(vm_page_t m)
323 {
324 int pg_mask;
325 vm_ghost_t vpce;
326 vm_object_t object;
327
328 object = VM_PAGE_OBJECT(m);
329
330 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
331 vm_object_lock_assert_exclusive(object);
332
333 if (vm_phantom_cache_num_entries == 0) {
334 return;
335 }
336
337 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
338
339 if ((vpce = vm_phantom_cache_lookup_ghost(m, pg_mask))) {
340 vpce->g_pages_held &= ~pg_mask;
341
342 phantom_cache_stats.pcs_updated_phantom_state++;
343 vm_pageout_vminfo.vm_phantom_cache_found_ghost++;
344
345 if (object->phantom_isssd) {
346 OSAddAtomic(1, &sample_period_ghost_found_count_ssd);
347 } else {
348 OSAddAtomic(1, &sample_period_ghost_found_count);
349 }
350 }
351 }
352
353
354 #define PHANTOM_CACHE_DEBUG 1
355
356 #if PHANTOM_CACHE_DEBUG
357
358 int sample_period_ghost_counts_indx = 0;
359
360 struct {
361 uint32_t added;
362 uint32_t found;
363 uint32_t added_ssd;
364 uint32_t found_ssd;
365 uint32_t elapsed_ms;
366 boolean_t pressure_detected;
367 } sample_period_ghost_counts[256];
368
369 #endif
370
371 /*
372 * Determine if the file cache is thrashing from sampling interval statistics.
373 *
374 * Pages added to the phantom cache = pages evicted from the file cache.
375 * Pages found in the phantom cache = reads of pages that were recently evicted.
376 * Threshold is the latency-dependent number of reads we consider thrashing.
377 */
378 static boolean_t
is_thrashing(uint32_t added,uint32_t found,uint32_t threshold)379 is_thrashing(uint32_t added, uint32_t found, uint32_t threshold)
380 {
381 /* Ignore normal activity below the threshold. */
382 if (added < threshold || found < threshold) {
383 return FALSE;
384 }
385
386 /*
387 * When thrashing in a way that we can mitigate, most of the pages read
388 * into the file cache were recently evicted, and 'found' will be close
389 * to 'added'.
390 *
391 * When replacing the current working set because a new app is
392 * launched, we see very high read traffic with sporadic phantom cache
393 * hits.
394 *
395 * This is not thrashing, or freeing up memory wouldn't help much
396 * anyway.
397 */
398 if (found < added / 2) {
399 return FALSE;
400 }
401
402 return TRUE;
403 }
404
405 /*
406 * the following function is never called
407 * from multiple threads simultaneously due
408 * to a condition variable used to serialize
409 * at the compressor level... thus no need
410 * to provide locking for the sample processing
411 */
412 boolean_t
vm_phantom_cache_check_pressure()413 vm_phantom_cache_check_pressure()
414 {
415 clock_sec_t cur_ts_sec;
416 clock_nsec_t cur_ts_nsec;
417 uint64_t elapsed_msecs_in_eval;
418 boolean_t pressure_detected = FALSE;
419
420 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
421
422 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec);
423
424 /*
425 * Reset evaluation period after phantom_cache_eval_period_in_msecs or
426 * whenever vm_phantom_cache_restart_sample has been called.
427 */
428 if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) {
429 pc_need_eval_reset = TRUE;
430 }
431
432 if (pc_need_eval_reset == TRUE) {
433 #if PHANTOM_CACHE_DEBUG
434 /*
435 * maintain some info about the last 256 sample periods
436 */
437 sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count;
438 sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count;
439 sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd;
440 sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd;
441 sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval;
442
443 sample_period_ghost_counts_indx++;
444
445 if (sample_period_ghost_counts_indx >= 256) {
446 sample_period_ghost_counts_indx = 0;
447 }
448 #endif
449 sample_period_ghost_added_count = 0;
450 sample_period_ghost_found_count = 0;
451 sample_period_ghost_added_count_ssd = 0;
452 sample_period_ghost_found_count_ssd = 0;
453
454 pc_start_of_eval_period_sec = cur_ts_sec;
455 pc_start_of_eval_period_nsec = cur_ts_nsec;
456 pc_history <<= 1;
457 pc_need_eval_reset = FALSE;
458 } else {
459 /*
460 * Since the trashing rate is really a function of the read latency of the disk
461 * we have to consider both the SSD and spinning disk case since the file cache
462 * could be backed by either or even both flavors. When the object is first
463 * assigned a phantom_object_id, we query the pager to determine if the backing
464 * backing media is an SSD and remember that answer in the vm_object. We use
465 * that info to maintains counts for both the SSD and spinning disk cases.
466 */
467 if (is_thrashing(sample_period_ghost_added_count,
468 sample_period_ghost_found_count,
469 phantom_cache_thrashing_threshold) ||
470 is_thrashing(sample_period_ghost_added_count_ssd,
471 sample_period_ghost_found_count_ssd,
472 phantom_cache_thrashing_threshold_ssd)) {
473 /* Thrashing in the current period: Set bit 0. */
474 pc_history |= 1;
475 }
476 }
477
478 /*
479 * Declare pressure_detected after phantom_cache_contiguous_periods.
480 *
481 * Create a bitmask with the N low bits set. These bits must all be set
482 * in pc_history. The high bits of pc_history are ignored.
483 */
484 uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1;
485 if ((pc_history & bitmask) == bitmask) {
486 pressure_detected = TRUE;
487 }
488
489 if (vm_page_external_count > ((AVAILABLE_MEMORY) * 50) / 100) {
490 pressure_detected = FALSE;
491 }
492
493 #if PHANTOM_CACHE_DEBUG
494 sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected;
495 #endif
496 return pressure_detected;
497 }
498
499 /*
500 * Restart the current sampling because conditions have changed significantly,
501 * and we don't want to react to old data.
502 *
503 * This function can be called from any thread.
504 */
505 void
vm_phantom_cache_restart_sample(void)506 vm_phantom_cache_restart_sample(void)
507 {
508 pc_need_eval_reset = TRUE;
509 }
510