1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <vm/vm_page_internal.h>
30 #include <vm/vm_object_internal.h>
31 #include <vm/vm_kern_xnu.h>
32 #include <vm/vm_pageout_xnu.h>
33 #include <vm/vm_phantom_cache_internal.h>
34 #include <vm/vm_compressor_internal.h>
35 #include <vm/vm_protos_internal.h>
36
37
38 uint32_t phantom_cache_eval_period_in_msecs = 250;
39 uint32_t phantom_cache_thrashing_threshold_ssd = 1000;
40 #if !XNU_TARGET_OS_OSX
41 uint32_t phantom_cache_thrashing_threshold = 500;
42 #else /* !XNU_TARGET_OS_OSX */
43 uint32_t phantom_cache_thrashing_threshold = 50;
44 #endif /* !XNU_TARGET_OS_OSX */
45
46 /*
47 * Number of consecutive thrashing periods required before
48 * vm_phantom_cache_check_pressure() returns true.
49 */
50 #if !XNU_TARGET_OS_OSX
51 unsigned phantom_cache_contiguous_periods = 4;
52 #else /* !XNU_TARGET_OS_OSX */
53 unsigned phantom_cache_contiguous_periods = 2;
54 #endif /* !XNU_TARGET_OS_OSX */
55
56 clock_sec_t pc_start_of_eval_period_sec = 0;
57 clock_nsec_t pc_start_of_eval_period_nsec = 0;
58 boolean_t pc_need_eval_reset = FALSE;
59
60 /* One bit per recent sampling period. Bit 0 = current period. */
61 uint32_t pc_history = 0;
62
63 uint32_t sample_period_ghost_added_count = 0;
64 uint32_t sample_period_ghost_added_count_ssd = 0;
65 uint32_t sample_period_ghost_found_count = 0;
66 uint32_t sample_period_ghost_found_count_ssd = 0;
67
68 uint32_t vm_phantom_object_id = 1;
69 #define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
70
71 vm_ghost_t vm_phantom_cache;
72 uint32_t vm_phantom_cache_nindx = 1;
73 uint32_t vm_phantom_cache_num_entries = 0;
74 uint32_t vm_phantom_cache_size;
75
76 typedef uint32_t vm_phantom_hash_entry_t;
77 vm_phantom_hash_entry_t *vm_phantom_cache_hash;
78 uint32_t vm_phantom_cache_hash_size;
79 uint32_t vm_ghost_hash_mask; /* Mask for hash function */
80 uint32_t vm_ghost_bucket_hash; /* Basic bucket hash */
81
82
83 int pg_masks[4] = {
84 0x1, 0x2, 0x4, 0x8
85 };
86
87
88 #define vm_phantom_hash(obj_id, offset) (\
89 ( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
90
91
92 struct phantom_cache_stats {
93 uint32_t pcs_wrapped;
94 uint32_t pcs_added_page_to_entry;
95 uint32_t pcs_added_new_entry;
96 uint32_t pcs_replaced_entry;
97
98 uint32_t pcs_lookup_found_page_in_cache;
99 uint32_t pcs_lookup_entry_not_in_cache;
100 uint32_t pcs_lookup_page_not_in_entry;
101
102 uint32_t pcs_updated_phantom_state;
103 } phantom_cache_stats;
104
105
106
107 void
vm_phantom_cache_init(void)108 vm_phantom_cache_init(void)
109 {
110 unsigned int num_entries;
111 unsigned int log1;
112 unsigned int size;
113
114 if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
115 return;
116 }
117 #if !XNU_TARGET_OS_OSX
118 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 10) / VM_GHOST_PAGES_PER_ENTRY);
119 #else /* !XNU_TARGET_OS_OSX */
120 num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY);
121 #endif /* !XNU_TARGET_OS_OSX */
122 vm_phantom_cache_num_entries = 1;
123
124 while (vm_phantom_cache_num_entries < num_entries) {
125 vm_phantom_cache_num_entries <<= 1;
126 }
127
128 /*
129 * We index this with g_next_index, so don't exceed the width of that bitfield.
130 */
131 if (vm_phantom_cache_num_entries > (1 << VM_GHOST_INDEX_BITS)) {
132 vm_phantom_cache_num_entries = (1 << VM_GHOST_INDEX_BITS);
133 }
134
135 vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
136 vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
137
138 kmem_alloc(kernel_map, (vm_offset_t *)&vm_phantom_cache,
139 vm_phantom_cache_size,
140 KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_ZERO | KMA_PERMANENT,
141 VM_KERN_MEMORY_PHANTOM_CACHE);
142
143 kmem_alloc(kernel_map, (vm_offset_t *)&vm_phantom_cache_hash,
144 vm_phantom_cache_hash_size,
145 KMA_NOFAIL | KMA_KOBJECT | KMA_ZERO | KMA_PERMANENT,
146 VM_KERN_MEMORY_PHANTOM_CACHE);
147
148 vm_ghost_hash_mask = vm_phantom_cache_num_entries - 1;
149
150 /*
151 * Calculate object_id shift value for hashing algorithm:
152 * O = log2(sizeof(struct vm_object))
153 * B = log2(vm_page_bucket_count)
154 * hash shifts the object_id left by
155 * B/2 - O
156 */
157 size = vm_phantom_cache_num_entries;
158 for (log1 = 0; size > 1; log1++) {
159 size /= 2;
160 }
161
162 vm_ghost_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
163 vm_ghost_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
164 vm_ghost_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
165
166 if (vm_ghost_hash_mask & vm_phantom_cache_num_entries) {
167 printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
168 }
169 }
170
171
172 void
vm_phantom_cache_add_ghost(vm_page_t m)173 vm_phantom_cache_add_ghost(vm_page_t m)
174 {
175 vm_ghost_t vpce;
176 vm_object_t object;
177 int ghost_index;
178 int pg_mask;
179 boolean_t isSSD = FALSE;
180 vm_phantom_hash_entry_t ghost_hash_index;
181
182 object = VM_PAGE_OBJECT(m);
183
184 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
185 vm_object_lock_assert_exclusive(object);
186
187 if (vm_phantom_cache_num_entries == 0) {
188 return;
189 }
190 if (object->pager == MEMORY_OBJECT_NULL) {
191 /*
192 * This object must have lost its memory object due to a force-unmount
193 * or ungraft, for example; this page won't come back, so no need to
194 * track it.
195 */
196 return;
197 }
198
199 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
200
201 if (object->phantom_object_id == 0) {
202 vnode_pager_get_isSSD(object->pager, &isSSD);
203
204 if (isSSD == TRUE) {
205 object->phantom_isssd = TRUE;
206 }
207
208 object->phantom_object_id = vm_phantom_object_id++;
209
210 if (vm_phantom_object_id == 0) {
211 vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP;
212 }
213 } else {
214 if ((vpce = vm_phantom_cache_lookup_ghost(m, 0))) {
215 vpce->g_pages_held |= pg_mask;
216
217 phantom_cache_stats.pcs_added_page_to_entry++;
218 goto done;
219 }
220 }
221 /*
222 * if we're here then the vm_ghost_t of this vm_page_t
223 * is not present in the phantom cache... take the next
224 * available entry in the LRU first evicting the existing
225 * entry if we've wrapped the ring
226 */
227 ghost_index = vm_phantom_cache_nindx++;
228
229 if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) {
230 vm_phantom_cache_nindx = 1;
231
232 phantom_cache_stats.pcs_wrapped++;
233 }
234 vpce = &vm_phantom_cache[ghost_index];
235
236 if (vpce->g_obj_id) {
237 /*
238 * we're going to replace an existing entry
239 * so first remove it from the hash
240 */
241 vm_ghost_t nvpce;
242
243 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
244
245 nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]];
246
247 if (nvpce == vpce) {
248 vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index;
249 } else {
250 for (;;) {
251 if (nvpce->g_next_index == 0) {
252 panic("didn't find ghost in hash");
253 }
254
255 if (&vm_phantom_cache[nvpce->g_next_index] == vpce) {
256 nvpce->g_next_index = vpce->g_next_index;
257 break;
258 }
259 nvpce = &vm_phantom_cache[nvpce->g_next_index];
260 }
261 }
262 phantom_cache_stats.pcs_replaced_entry++;
263 } else {
264 phantom_cache_stats.pcs_added_new_entry++;
265 }
266
267 vpce->g_pages_held = pg_mask;
268 vpce->g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
269 vpce->g_obj_id = object->phantom_object_id;
270
271 ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
272 vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index];
273 vm_phantom_cache_hash[ghost_hash_index] = ghost_index;
274
275 done:
276 vm_pageout_vminfo.vm_phantom_cache_added_ghost++;
277
278 if (object->phantom_isssd) {
279 OSAddAtomic(1, &sample_period_ghost_added_count_ssd);
280 } else {
281 OSAddAtomic(1, &sample_period_ghost_added_count);
282 }
283 }
284
285
286 vm_ghost_t
vm_phantom_cache_lookup_ghost(vm_page_t m,uint32_t pg_mask)287 vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask)
288 {
289 uint64_t g_obj_offset;
290 uint32_t g_obj_id;
291 uint32_t ghost_index;
292 vm_object_t object;
293
294 object = VM_PAGE_OBJECT(m);
295
296 if ((g_obj_id = object->phantom_object_id) == 0) {
297 /*
298 * no entries in phantom cache for this object
299 */
300 return NULL;
301 }
302 g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
303
304 ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)];
305
306 while (ghost_index) {
307 vm_ghost_t vpce;
308
309 vpce = &vm_phantom_cache[ghost_index];
310
311 if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) {
312 if (pg_mask == 0 || (vpce->g_pages_held & pg_mask)) {
313 phantom_cache_stats.pcs_lookup_found_page_in_cache++;
314
315 return vpce;
316 }
317 phantom_cache_stats.pcs_lookup_page_not_in_entry++;
318
319 return NULL;
320 }
321 ghost_index = vpce->g_next_index;
322 }
323 phantom_cache_stats.pcs_lookup_entry_not_in_cache++;
324
325 return NULL;
326 }
327
328
329
330 void
vm_phantom_cache_update(vm_page_t m)331 vm_phantom_cache_update(vm_page_t m)
332 {
333 int pg_mask;
334 vm_ghost_t vpce;
335 vm_object_t object;
336
337 object = VM_PAGE_OBJECT(m);
338
339 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
340 vm_object_lock_assert_exclusive(object);
341
342 if (vm_phantom_cache_num_entries == 0) {
343 return;
344 }
345
346 pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
347
348 if ((vpce = vm_phantom_cache_lookup_ghost(m, pg_mask))) {
349 vpce->g_pages_held &= ~pg_mask;
350
351 phantom_cache_stats.pcs_updated_phantom_state++;
352 vm_pageout_vminfo.vm_phantom_cache_found_ghost++;
353
354 if (object->phantom_isssd) {
355 OSAddAtomic(1, &sample_period_ghost_found_count_ssd);
356 } else {
357 OSAddAtomic(1, &sample_period_ghost_found_count);
358 }
359 }
360 }
361
362
363 #define PHANTOM_CACHE_DEBUG 1
364
365 #if PHANTOM_CACHE_DEBUG
366
367 int sample_period_ghost_counts_indx = 0;
368
369 struct {
370 uint32_t added;
371 uint32_t found;
372 uint32_t added_ssd;
373 uint32_t found_ssd;
374 uint32_t elapsed_ms;
375 boolean_t pressure_detected;
376 } sample_period_ghost_counts[256];
377
378 #endif
379
380 /*
381 * Determine if the file cache is thrashing from sampling interval statistics.
382 *
383 * Pages added to the phantom cache = pages evicted from the file cache.
384 * Pages found in the phantom cache = reads of pages that were recently evicted.
385 * Threshold is the latency-dependent number of reads we consider thrashing.
386 */
387 static boolean_t
is_thrashing(uint32_t added,uint32_t found,uint32_t threshold)388 is_thrashing(uint32_t added, uint32_t found, uint32_t threshold)
389 {
390 /* Ignore normal activity below the threshold. */
391 if (added < threshold || found < threshold) {
392 return FALSE;
393 }
394
395 /*
396 * When thrashing in a way that we can mitigate, most of the pages read
397 * into the file cache were recently evicted, and 'found' will be close
398 * to 'added'.
399 *
400 * When replacing the current working set because a new app is
401 * launched, we see very high read traffic with sporadic phantom cache
402 * hits.
403 *
404 * This is not thrashing, or freeing up memory wouldn't help much
405 * anyway.
406 */
407 if (found < added / 2) {
408 return FALSE;
409 }
410
411 return TRUE;
412 }
413
414 /*
415 * the following function is never called
416 * from multiple threads simultaneously due
417 * to a condition variable used to serialize
418 * at the compressor level... thus no need
419 * to provide locking for the sample processing
420 */
421 boolean_t
vm_phantom_cache_check_pressure()422 vm_phantom_cache_check_pressure()
423 {
424 clock_sec_t cur_ts_sec;
425 clock_nsec_t cur_ts_nsec;
426 uint64_t elapsed_msecs_in_eval;
427 boolean_t pressure_detected = FALSE;
428
429 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
430
431 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec);
432
433 /*
434 * Reset evaluation period after phantom_cache_eval_period_in_msecs or
435 * whenever vm_phantom_cache_restart_sample has been called.
436 */
437 if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) {
438 pc_need_eval_reset = TRUE;
439 }
440
441 if (pc_need_eval_reset == TRUE) {
442 #if PHANTOM_CACHE_DEBUG
443 /*
444 * maintain some info about the last 256 sample periods
445 */
446 sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count;
447 sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count;
448 sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd;
449 sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd;
450 sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval;
451
452 sample_period_ghost_counts_indx++;
453
454 if (sample_period_ghost_counts_indx >= 256) {
455 sample_period_ghost_counts_indx = 0;
456 }
457 #endif
458 sample_period_ghost_added_count = 0;
459 sample_period_ghost_found_count = 0;
460 sample_period_ghost_added_count_ssd = 0;
461 sample_period_ghost_found_count_ssd = 0;
462
463 pc_start_of_eval_period_sec = cur_ts_sec;
464 pc_start_of_eval_period_nsec = cur_ts_nsec;
465 pc_history <<= 1;
466 pc_need_eval_reset = FALSE;
467 } else {
468 /*
469 * Since the trashing rate is really a function of the read latency of the disk
470 * we have to consider both the SSD and spinning disk case since the file cache
471 * could be backed by either or even both flavors. When the object is first
472 * assigned a phantom_object_id, we query the pager to determine if the backing
473 * backing media is an SSD and remember that answer in the vm_object. We use
474 * that info to maintains counts for both the SSD and spinning disk cases.
475 */
476 if (is_thrashing(sample_period_ghost_added_count,
477 sample_period_ghost_found_count,
478 phantom_cache_thrashing_threshold) ||
479 is_thrashing(sample_period_ghost_added_count_ssd,
480 sample_period_ghost_found_count_ssd,
481 phantom_cache_thrashing_threshold_ssd)) {
482 /* Thrashing in the current period: Set bit 0. */
483 pc_history |= 1;
484 }
485 }
486
487 /*
488 * Declare pressure_detected after phantom_cache_contiguous_periods.
489 *
490 * Create a bitmask with the N low bits set. These bits must all be set
491 * in pc_history. The high bits of pc_history are ignored.
492 */
493 uint32_t bitmask = (1u << phantom_cache_contiguous_periods) - 1;
494 if ((pc_history & bitmask) == bitmask) {
495 pressure_detected = TRUE;
496 }
497
498 if (vm_page_pageable_external_count > ((AVAILABLE_MEMORY) * 50) / 100) {
499 pressure_detected = FALSE;
500 }
501
502 #if PHANTOM_CACHE_DEBUG
503 sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected;
504 #endif
505 return pressure_detected;
506 }
507
508 /*
509 * Restart the current sampling because conditions have changed significantly,
510 * and we don't want to react to old data.
511 *
512 * This function can be called from any thread.
513 */
514 void
vm_phantom_cache_restart_sample(void)515 vm_phantom_cache_restart_sample(void)
516 {
517 pc_need_eval_reset = TRUE;
518 }
519