1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/errno.h>
30
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
39 #include <mach/upl.h>
40 #include <mach/thread_act.h>
41
42 #include <kern/assert.h>
43 #include <kern/host.h>
44 #include <kern/ledger.h>
45 #include <kern/thread.h>
46 #include <kern/ipc_kobject.h>
47 #include <os/refcnt.h>
48
49 #include <ipc/ipc_port.h>
50 #include <ipc/ipc_space.h>
51
52 #include <vm/vm_map.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
57 #include <vm/vm_purgeable_internal.h>
58
59 #include <sys/kdebug_triage.h>
60
61 /* BSD VM COMPONENT INTERFACES */
62 int
63 get_map_nentries(
64 vm_map_t);
65
66 int
get_map_nentries(vm_map_t map)67 get_map_nentries(
68 vm_map_t map)
69 {
70 return map->hdr.nentries;
71 }
72
73 /*
74 * BSD VNODE PAGER
75 */
76
77 const struct memory_object_pager_ops vnode_pager_ops = {
78 .memory_object_reference = vnode_pager_reference,
79 .memory_object_deallocate = vnode_pager_deallocate,
80 .memory_object_init = vnode_pager_init,
81 .memory_object_terminate = vnode_pager_terminate,
82 .memory_object_data_request = vnode_pager_data_request,
83 .memory_object_data_return = vnode_pager_data_return,
84 .memory_object_data_initialize = vnode_pager_data_initialize,
85 .memory_object_map = vnode_pager_map,
86 .memory_object_last_unmap = vnode_pager_last_unmap,
87 .memory_object_backing_object = NULL,
88 .memory_object_pager_name = "vnode pager"
89 };
90
91 typedef struct vnode_pager {
92 /* mandatory generic header */
93 struct memory_object vn_pgr_hdr;
94
95 /* pager-specific */
96 #if MEMORY_OBJECT_HAS_REFCOUNT
97 #define vn_pgr_hdr_ref vn_pgr_hdr.mo_ref
98 #else
99 os_ref_atomic_t vn_pgr_hdr_ref;
100 #endif
101 struct vnode *vnode_handle; /* vnode handle */
102 } *vnode_pager_t;
103
104
105 kern_return_t
106 vnode_pager_cluster_read( /* forward */
107 vnode_pager_t,
108 vm_object_offset_t,
109 vm_object_offset_t,
110 uint32_t,
111 vm_size_t);
112
113 void
114 vnode_pager_cluster_write( /* forward */
115 vnode_pager_t,
116 vm_object_offset_t,
117 vm_size_t,
118 vm_object_offset_t *,
119 int *,
120 int);
121
122
123 vnode_pager_t
124 vnode_object_create( /* forward */
125 struct vnode *);
126
127 vnode_pager_t
128 vnode_pager_lookup( /* forward */
129 memory_object_t);
130
131 struct vnode *
132 vnode_pager_lookup_vnode( /* forward */
133 memory_object_t);
134
135 ZONE_DEFINE_TYPE(vnode_pager_zone, "vnode pager structures",
136 struct vnode_pager, ZC_NOENCRYPT);
137
138 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
139
140 /* TODO: Should be set dynamically by vnode_pager_init() */
141 #define CLUSTER_SHIFT 1
142
143
144 #if DEBUG
145 int pagerdebug = 0;
146
147 #define PAGER_ALL 0xffffffff
148 #define PAGER_INIT 0x00000001
149 #define PAGER_PAGEIN 0x00000002
150
151 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
152 #else
153 #define PAGER_DEBUG(LEVEL, A)
154 #endif
155
156 extern int proc_resetpcontrol(int);
157
158
159 extern int uiomove64(addr64_t, int, void *);
160 #define MAX_RUN 32
161
162 int
memory_object_control_uiomove(memory_object_control_t control,memory_object_offset_t offset,void * uio,int start_offset,int io_requested,int mark_dirty,int take_reference)163 memory_object_control_uiomove(
164 memory_object_control_t control,
165 memory_object_offset_t offset,
166 void * uio,
167 int start_offset,
168 int io_requested,
169 int mark_dirty,
170 int take_reference)
171 {
172 vm_object_t object;
173 vm_page_t dst_page;
174 int xsize;
175 int retval = 0;
176 int cur_run;
177 int cur_needed;
178 int i;
179 int orig_offset;
180 vm_page_t page_run[MAX_RUN];
181 int dirty_count; /* keeps track of number of pages dirtied as part of this uiomove */
182
183 object = memory_object_control_to_vm_object(control);
184 if (object == VM_OBJECT_NULL) {
185 return 0;
186 }
187 assert(!object->internal);
188
189 vm_object_lock(object);
190
191 if (mark_dirty && object->copy != VM_OBJECT_NULL) {
192 /*
193 * We can't modify the pages without honoring
194 * copy-on-write obligations first, so fall off
195 * this optimized path and fall back to the regular
196 * path.
197 */
198 vm_object_unlock(object);
199 return 0;
200 }
201 orig_offset = start_offset;
202
203 dirty_count = 0;
204 while (io_requested && retval == 0) {
205 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
206
207 if (cur_needed > MAX_RUN) {
208 cur_needed = MAX_RUN;
209 }
210
211 for (cur_run = 0; cur_run < cur_needed;) {
212 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) {
213 break;
214 }
215
216
217 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
218 /*
219 * someone else is playing with the page... if we've
220 * already collected pages into this run, go ahead
221 * and process now, we can't block on this
222 * page while holding other pages in the BUSY state
223 * otherwise we will wait
224 */
225 if (cur_run) {
226 break;
227 }
228 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
229 continue;
230 }
231 if (dst_page->vmp_laundry) {
232 vm_pageout_steal_laundry(dst_page, FALSE);
233 }
234
235 if (mark_dirty) {
236 if (dst_page->vmp_dirty == FALSE) {
237 dirty_count++;
238 }
239 SET_PAGE_DIRTY(dst_page, FALSE);
240 if (dst_page->vmp_cs_validated &&
241 !dst_page->vmp_cs_tainted) {
242 /*
243 * CODE SIGNING:
244 * We're modifying a code-signed
245 * page: force revalidate
246 */
247 dst_page->vmp_cs_validated = VMP_CS_ALL_FALSE;
248
249 VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
250
251 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
252 }
253 }
254 dst_page->vmp_busy = TRUE;
255
256 page_run[cur_run++] = dst_page;
257
258 offset += PAGE_SIZE_64;
259 }
260 if (cur_run == 0) {
261 /*
262 * we hit a 'hole' in the cache or
263 * a page we don't want to try to handle,
264 * so bail at this point
265 * we'll unlock the object below
266 */
267 break;
268 }
269 vm_object_unlock(object);
270
271 for (i = 0; i < cur_run; i++) {
272 dst_page = page_run[i];
273
274 if ((xsize = PAGE_SIZE - start_offset) > io_requested) {
275 xsize = io_requested;
276 }
277
278 if ((retval = uiomove64((addr64_t)(((addr64_t)(VM_PAGE_GET_PHYS_PAGE(dst_page)) << PAGE_SHIFT) + start_offset), xsize, uio))) {
279 break;
280 }
281
282 io_requested -= xsize;
283 start_offset = 0;
284 }
285 vm_object_lock(object);
286
287 /*
288 * if we have more than 1 page to work on
289 * in the current run, or the original request
290 * started at offset 0 of the page, or we're
291 * processing multiple batches, we will move
292 * the pages to the tail of the inactive queue
293 * to implement an LRU for read/write accesses
294 *
295 * the check for orig_offset == 0 is there to
296 * mitigate the cost of small (< page_size) requests
297 * to the same page (this way we only move it once)
298 */
299 if (take_reference && (cur_run > 1 || orig_offset == 0)) {
300 vm_page_lockspin_queues();
301
302 for (i = 0; i < cur_run; i++) {
303 vm_page_lru(page_run[i]);
304 }
305
306 vm_page_unlock_queues();
307 }
308 for (i = 0; i < cur_run; i++) {
309 dst_page = page_run[i];
310
311 /*
312 * someone is explicitly referencing this page...
313 * update clustered and speculative state
314 *
315 */
316 if (dst_page->vmp_clustered) {
317 VM_PAGE_CONSUME_CLUSTERED(dst_page);
318 }
319
320 PAGE_WAKEUP_DONE(dst_page);
321 }
322 orig_offset = 0;
323 }
324 if (object->pager) {
325 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_DEFERRED, vnode_pager_lookup_vnode(object->pager));
326 }
327 vm_object_unlock(object);
328 return retval;
329 }
330
331
332 /*
333 *
334 */
335 memory_object_t
vnode_pager_setup(struct vnode * vp,__unused memory_object_t pager)336 vnode_pager_setup(
337 struct vnode *vp,
338 __unused memory_object_t pager)
339 {
340 vnode_pager_t vnode_object;
341
342 vnode_object = vnode_object_create(vp);
343 if (vnode_object == VNODE_PAGER_NULL) {
344 panic("vnode_pager_setup: vnode_object_create() failed");
345 }
346 return (memory_object_t)vnode_object;
347 }
348
349 /*
350 *
351 */
352 kern_return_t
vnode_pager_init(memory_object_t mem_obj,memory_object_control_t control,__unused memory_object_cluster_size_t pg_size)353 vnode_pager_init(memory_object_t mem_obj,
354 memory_object_control_t control,
355 #if !DEBUG
356 __unused
357 #endif
358 memory_object_cluster_size_t pg_size)
359 {
360 vnode_pager_t vnode_object;
361 kern_return_t kr;
362 memory_object_attr_info_data_t attributes;
363
364
365 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
366
367 if (control == MEMORY_OBJECT_CONTROL_NULL) {
368 return KERN_INVALID_ARGUMENT;
369 }
370
371 vnode_object = vnode_pager_lookup(mem_obj);
372
373 memory_object_control_reference(control);
374
375 vnode_object->vn_pgr_hdr.mo_control = control;
376
377 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
378 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
379 attributes.cluster_size = (1 << (PAGE_SHIFT));
380 attributes.may_cache_object = TRUE;
381 attributes.temporary = TRUE;
382
383 kr = memory_object_change_attributes(
384 control,
385 MEMORY_OBJECT_ATTRIBUTE_INFO,
386 (memory_object_info_t) &attributes,
387 MEMORY_OBJECT_ATTR_INFO_COUNT);
388 if (kr != KERN_SUCCESS) {
389 panic("vnode_pager_init: memory_object_change_attributes() failed");
390 }
391
392 return KERN_SUCCESS;
393 }
394
395 /*
396 *
397 */
398 kern_return_t
vnode_pager_data_return(memory_object_t mem_obj,memory_object_offset_t offset,memory_object_cluster_size_t data_cnt,memory_object_offset_t * resid_offset,int * io_error,__unused boolean_t dirty,__unused boolean_t kernel_copy,int upl_flags)399 vnode_pager_data_return(
400 memory_object_t mem_obj,
401 memory_object_offset_t offset,
402 memory_object_cluster_size_t data_cnt,
403 memory_object_offset_t *resid_offset,
404 int *io_error,
405 __unused boolean_t dirty,
406 __unused boolean_t kernel_copy,
407 int upl_flags)
408 {
409 vnode_pager_t vnode_object;
410
411 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
412
413 vnode_object = vnode_pager_lookup(mem_obj);
414
415 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
416
417 return KERN_SUCCESS;
418 }
419
420 kern_return_t
vnode_pager_data_initialize(__unused memory_object_t mem_obj,__unused memory_object_offset_t offset,__unused memory_object_cluster_size_t data_cnt)421 vnode_pager_data_initialize(
422 __unused memory_object_t mem_obj,
423 __unused memory_object_offset_t offset,
424 __unused memory_object_cluster_size_t data_cnt)
425 {
426 panic("vnode_pager_data_initialize");
427 return KERN_FAILURE;
428 }
429
430 void
vnode_pager_dirtied(memory_object_t mem_obj,vm_object_offset_t s_offset,vm_object_offset_t e_offset)431 vnode_pager_dirtied(
432 memory_object_t mem_obj,
433 vm_object_offset_t s_offset,
434 vm_object_offset_t e_offset)
435 {
436 vnode_pager_t vnode_object;
437
438 if (mem_obj && mem_obj->mo_pager_ops == &vnode_pager_ops) {
439 vnode_object = vnode_pager_lookup(mem_obj);
440 vnode_pager_was_dirtied(vnode_object->vnode_handle, s_offset, e_offset);
441 }
442 }
443
444 kern_return_t
vnode_pager_get_isinuse(memory_object_t mem_obj,uint32_t * isinuse)445 vnode_pager_get_isinuse(
446 memory_object_t mem_obj,
447 uint32_t *isinuse)
448 {
449 vnode_pager_t vnode_object;
450
451 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
452 *isinuse = 1;
453 return KERN_INVALID_ARGUMENT;
454 }
455
456 vnode_object = vnode_pager_lookup(mem_obj);
457
458 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
459 return KERN_SUCCESS;
460 }
461
462 kern_return_t
vnode_pager_get_throttle_io_limit(memory_object_t mem_obj,uint32_t * limit)463 vnode_pager_get_throttle_io_limit(
464 memory_object_t mem_obj,
465 uint32_t *limit)
466 {
467 vnode_pager_t vnode_object;
468
469 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
470 return KERN_INVALID_ARGUMENT;
471 }
472
473 vnode_object = vnode_pager_lookup(mem_obj);
474
475 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
476 return KERN_SUCCESS;
477 }
478
479 kern_return_t
vnode_pager_get_isSSD(memory_object_t mem_obj,boolean_t * isSSD)480 vnode_pager_get_isSSD(
481 memory_object_t mem_obj,
482 boolean_t *isSSD)
483 {
484 vnode_pager_t vnode_object;
485
486 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
487 return KERN_INVALID_ARGUMENT;
488 }
489
490 vnode_object = vnode_pager_lookup(mem_obj);
491
492 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
493 return KERN_SUCCESS;
494 }
495
496 kern_return_t
vnode_pager_get_object_size(memory_object_t mem_obj,memory_object_offset_t * length)497 vnode_pager_get_object_size(
498 memory_object_t mem_obj,
499 memory_object_offset_t *length)
500 {
501 vnode_pager_t vnode_object;
502
503 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
504 *length = 0;
505 return KERN_INVALID_ARGUMENT;
506 }
507
508 vnode_object = vnode_pager_lookup(mem_obj);
509
510 *length = vnode_pager_get_filesize(vnode_object->vnode_handle);
511 return KERN_SUCCESS;
512 }
513
514 kern_return_t
vnode_pager_get_object_name(memory_object_t mem_obj,char * pathname,vm_size_t pathname_len,char * filename,vm_size_t filename_len,boolean_t * truncated_path_p)515 vnode_pager_get_object_name(
516 memory_object_t mem_obj,
517 char *pathname,
518 vm_size_t pathname_len,
519 char *filename,
520 vm_size_t filename_len,
521 boolean_t *truncated_path_p)
522 {
523 vnode_pager_t vnode_object;
524
525 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
526 return KERN_INVALID_ARGUMENT;
527 }
528
529 vnode_object = vnode_pager_lookup(mem_obj);
530
531 return vnode_pager_get_name(vnode_object->vnode_handle,
532 pathname,
533 pathname_len,
534 filename,
535 filename_len,
536 truncated_path_p);
537 }
538
539 kern_return_t
vnode_pager_get_object_mtime(memory_object_t mem_obj,struct timespec * mtime,struct timespec * cs_mtime)540 vnode_pager_get_object_mtime(
541 memory_object_t mem_obj,
542 struct timespec *mtime,
543 struct timespec *cs_mtime)
544 {
545 vnode_pager_t vnode_object;
546
547 if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
548 return KERN_INVALID_ARGUMENT;
549 }
550
551 vnode_object = vnode_pager_lookup(mem_obj);
552
553 return vnode_pager_get_mtime(vnode_object->vnode_handle,
554 mtime,
555 cs_mtime);
556 }
557
558 #if CHECK_CS_VALIDATION_BITMAP
559 kern_return_t
vnode_pager_cs_check_validation_bitmap(memory_object_t mem_obj,memory_object_offset_t offset,int optype)560 vnode_pager_cs_check_validation_bitmap(
561 memory_object_t mem_obj,
562 memory_object_offset_t offset,
563 int optype )
564 {
565 vnode_pager_t vnode_object;
566
567 if (mem_obj == MEMORY_OBJECT_NULL ||
568 mem_obj->mo_pager_ops != &vnode_pager_ops) {
569 return KERN_INVALID_ARGUMENT;
570 }
571
572 vnode_object = vnode_pager_lookup(mem_obj);
573 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
574 }
575 #endif /* CHECK_CS_VALIDATION_BITMAP */
576
577 /*
578 *
579 */
580 kern_return_t
vnode_pager_data_request(memory_object_t mem_obj,memory_object_offset_t offset,__unused memory_object_cluster_size_t length,__unused vm_prot_t desired_access,memory_object_fault_info_t fault_info)581 vnode_pager_data_request(
582 memory_object_t mem_obj,
583 memory_object_offset_t offset,
584 __unused memory_object_cluster_size_t length,
585 __unused vm_prot_t desired_access,
586 memory_object_fault_info_t fault_info)
587 {
588 vnode_pager_t vnode_object;
589 memory_object_offset_t base_offset;
590 vm_size_t size;
591 uint32_t io_streaming = 0;
592
593 assertf(page_aligned(offset), "offset 0x%llx\n", offset);
594
595 vnode_object = vnode_pager_lookup(mem_obj);
596
597 size = MAX_UPL_TRANSFER_BYTES;
598 base_offset = offset;
599
600 if (memory_object_cluster_size(vnode_object->vn_pgr_hdr.mo_control,
601 &base_offset, &size, &io_streaming,
602 fault_info) != KERN_SUCCESS) {
603 size = PAGE_SIZE;
604 }
605
606 assert(offset >= base_offset &&
607 offset < base_offset + size);
608
609 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
610 }
611
612 /*
613 *
614 */
615 void
vnode_pager_reference(memory_object_t mem_obj)616 vnode_pager_reference(
617 memory_object_t mem_obj)
618 {
619 vnode_pager_t vnode_object;
620
621 vnode_object = vnode_pager_lookup(mem_obj);
622 os_ref_retain_raw(&vnode_object->vn_pgr_hdr_ref, NULL);
623 }
624
625 /*
626 *
627 */
628 void
vnode_pager_deallocate(memory_object_t mem_obj)629 vnode_pager_deallocate(
630 memory_object_t mem_obj)
631 {
632 vnode_pager_t vnode_object;
633
634 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
635
636 vnode_object = vnode_pager_lookup(mem_obj);
637
638 if (os_ref_release_raw(&vnode_object->vn_pgr_hdr_ref, NULL) == 0) {
639 if (vnode_object->vnode_handle != NULL) {
640 vnode_pager_vrele(vnode_object->vnode_handle);
641 }
642 zfree(vnode_pager_zone, vnode_object);
643 }
644 }
645
646 /*
647 *
648 */
649 kern_return_t
vnode_pager_terminate(__unused memory_object_t mem_obj)650 vnode_pager_terminate(
651 #if !DEBUG
652 __unused
653 #endif
654 memory_object_t mem_obj)
655 {
656 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
657
658 return KERN_SUCCESS;
659 }
660
661 /*
662 *
663 */
664 kern_return_t
vnode_pager_map(memory_object_t mem_obj,vm_prot_t prot)665 vnode_pager_map(
666 memory_object_t mem_obj,
667 vm_prot_t prot)
668 {
669 vnode_pager_t vnode_object;
670 int ret;
671 kern_return_t kr;
672
673 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
674
675 vnode_object = vnode_pager_lookup(mem_obj);
676
677 ret = ubc_map(vnode_object->vnode_handle, prot);
678
679 if (ret != 0) {
680 kr = KERN_FAILURE;
681 } else {
682 kr = KERN_SUCCESS;
683 }
684
685 return kr;
686 }
687
688 kern_return_t
vnode_pager_last_unmap(memory_object_t mem_obj)689 vnode_pager_last_unmap(
690 memory_object_t mem_obj)
691 {
692 vnode_pager_t vnode_object;
693
694 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
695
696 vnode_object = vnode_pager_lookup(mem_obj);
697
698 ubc_unmap(vnode_object->vnode_handle);
699 return KERN_SUCCESS;
700 }
701
702
703
704 /*
705 *
706 */
707 void
vnode_pager_cluster_write(vnode_pager_t vnode_object,vm_object_offset_t offset,vm_size_t cnt,vm_object_offset_t * resid_offset,int * io_error,int upl_flags)708 vnode_pager_cluster_write(
709 vnode_pager_t vnode_object,
710 vm_object_offset_t offset,
711 vm_size_t cnt,
712 vm_object_offset_t * resid_offset,
713 int * io_error,
714 int upl_flags)
715 {
716 vm_size_t size;
717 int errno;
718
719 if (upl_flags & UPL_MSYNC) {
720 upl_flags |= UPL_VNODE_PAGER;
721
722 if ((upl_flags & UPL_IOSYNC) && io_error) {
723 upl_flags |= UPL_KEEPCACHED;
724 }
725
726 while (cnt) {
727 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
728
729 assert((upl_size_t) size == size);
730 vnode_pageout(vnode_object->vnode_handle,
731 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
732
733 if ((upl_flags & UPL_KEEPCACHED)) {
734 if ((*io_error = errno)) {
735 break;
736 }
737 }
738 cnt -= size;
739 offset += size;
740 }
741 if (resid_offset) {
742 *resid_offset = offset;
743 }
744 } else {
745 vm_object_offset_t vnode_size;
746 vm_object_offset_t base_offset;
747
748 /*
749 * this is the pageout path
750 */
751 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
752
753 if (vnode_size > (offset + PAGE_SIZE)) {
754 /*
755 * preset the maximum size of the cluster
756 * and put us on a nice cluster boundary...
757 * and then clip the size to insure we
758 * don't request past the end of the underlying file
759 */
760 size = MAX_UPL_TRANSFER_BYTES;
761 base_offset = offset & ~((signed)(size - 1));
762
763 if ((base_offset + size) > vnode_size) {
764 size = round_page(((vm_size_t)(vnode_size - base_offset)));
765 }
766 } else {
767 /*
768 * we've been requested to page out a page beyond the current
769 * end of the 'file'... don't try to cluster in this case...
770 * we still need to send this page through because it might
771 * be marked precious and the underlying filesystem may need
772 * to do something with it (besides page it out)...
773 */
774 base_offset = offset;
775 size = PAGE_SIZE;
776 }
777 assert((upl_size_t) size == size);
778 vnode_pageout(vnode_object->vnode_handle,
779 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
780 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
781 }
782 }
783
784
785 /*
786 *
787 */
788 kern_return_t
vnode_pager_cluster_read(vnode_pager_t vnode_object,vm_object_offset_t base_offset,vm_object_offset_t offset,uint32_t io_streaming,vm_size_t cnt)789 vnode_pager_cluster_read(
790 vnode_pager_t vnode_object,
791 vm_object_offset_t base_offset,
792 vm_object_offset_t offset,
793 uint32_t io_streaming,
794 vm_size_t cnt)
795 {
796 int local_error = 0;
797 int kret;
798 int flags = 0;
799
800 assert(!(cnt & PAGE_MASK));
801
802 if (io_streaming) {
803 flags |= UPL_IOSTREAMING;
804 }
805
806 assert((upl_size_t) cnt == cnt);
807 kret = vnode_pagein(vnode_object->vnode_handle,
808 (upl_t) NULL,
809 (upl_offset_t) (offset - base_offset),
810 base_offset,
811 (upl_size_t) cnt,
812 flags,
813 &local_error);
814 /*
815 * if(kret == PAGER_ABSENT) {
816 * Need to work out the defs here, 1 corresponds to PAGER_ABSENT
817 * defined in bsd/vm/vm_pager.h However, we should not be including
818 * that file here it is a layering violation.
819 */
820 if (kret == 1) {
821 int uplflags;
822 upl_t upl = NULL;
823 unsigned int count = 0;
824 kern_return_t kr;
825
826 uplflags = (UPL_NO_SYNC |
827 UPL_CLEAN_IN_PLACE |
828 UPL_SET_INTERNAL);
829 count = 0;
830 assert((upl_size_t) cnt == cnt);
831 kr = memory_object_upl_request(vnode_object->vn_pgr_hdr.mo_control,
832 base_offset, (upl_size_t) cnt,
833 &upl, NULL, &count, uplflags, VM_KERN_MEMORY_NONE);
834 if (kr == KERN_SUCCESS) {
835 upl_abort(upl, 0);
836 upl_deallocate(upl);
837 } else {
838 /*
839 * We couldn't gather the page list, probably
840 * because the memory object doesn't have a link
841 * to a VM object anymore (forced unmount, for
842 * example). Just return an error to the vm_fault()
843 * path and let it handle it.
844 */
845 }
846
847 ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_VNODEPAGER_CLREAD_NO_UPL), 0 /* arg */);
848 return KERN_FAILURE;
849 }
850
851 return KERN_SUCCESS;
852 }
853
854 /*
855 *
856 */
857 vnode_pager_t
vnode_object_create(struct vnode * vp)858 vnode_object_create(
859 struct vnode *vp)
860 {
861 vnode_pager_t vnode_object;
862
863 vnode_object = zalloc_flags(vnode_pager_zone, Z_WAITOK | Z_NOFAIL);
864
865 /*
866 * The vm_map call takes both named entry ports and raw memory
867 * objects in the same parameter. We need to make sure that
868 * vm_map does not see this object as a named entry port. So,
869 * we reserve the first word in the object for a fake ip_kotype
870 * setting - that will tell vm_map to use it as a memory object.
871 */
872 vnode_object->vn_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
873 vnode_object->vn_pgr_hdr.mo_pager_ops = &vnode_pager_ops;
874 vnode_object->vn_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
875
876 os_ref_init_raw(&vnode_object->vn_pgr_hdr_ref, NULL);
877 vnode_object->vnode_handle = vp;
878
879 return vnode_object;
880 }
881
882 /*
883 *
884 */
885 vnode_pager_t
vnode_pager_lookup(memory_object_t name)886 vnode_pager_lookup(
887 memory_object_t name)
888 {
889 vnode_pager_t vnode_object;
890
891 vnode_object = (vnode_pager_t)name;
892 assert(vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops);
893 return vnode_object;
894 }
895
896
897 struct vnode *
vnode_pager_lookup_vnode(memory_object_t name)898 vnode_pager_lookup_vnode(
899 memory_object_t name)
900 {
901 vnode_pager_t vnode_object;
902 vnode_object = (vnode_pager_t)name;
903 if (vnode_object->vn_pgr_hdr.mo_pager_ops == &vnode_pager_ops) {
904 return vnode_object->vnode_handle;
905 } else {
906 return NULL;
907 }
908 }
909
910 /*********************** proc_info implementation *************/
911
912 #include <sys/bsdtask_info.h>
913
914 static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
915
916 int
fill_procregioninfo(task_t task,uint64_t arg,struct proc_regioninfo_internal * pinfo,uintptr_t * vnodeaddr,uint32_t * vid)917 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
918 {
919 vm_map_t map;
920 vm_map_offset_t address = (vm_map_offset_t)arg;
921 vm_map_entry_t tmp_entry;
922 vm_map_entry_t entry;
923 vm_map_offset_t start;
924 vm_region_extended_info_data_t extended;
925 vm_region_top_info_data_t top;
926 boolean_t do_region_footprint;
927 int effective_page_shift, effective_page_size;
928
929 task_lock(task);
930 map = task->map;
931 if (map == VM_MAP_NULL) {
932 task_unlock(task);
933 return 0;
934 }
935
936 effective_page_shift = vm_self_region_page_shift(map);
937 effective_page_size = (1 << effective_page_shift);
938
939 vm_map_reference(map);
940 task_unlock(task);
941
942 do_region_footprint = task_self_region_footprint();
943
944 vm_map_lock_read(map);
945
946 start = address;
947
948 if (!vm_map_lookup_entry_allow_pgz(map, start, &tmp_entry)) {
949 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
950 if (do_region_footprint &&
951 address == tmp_entry->vme_end) {
952 ledger_amount_t ledger_resident;
953 ledger_amount_t ledger_compressed;
954
955 /*
956 * This request is right after the last valid
957 * memory region; instead of reporting the
958 * end of the address space, report a fake
959 * memory region to account for non-volatile
960 * purgeable and/or ledger-tagged memory
961 * owned by this task.
962 */
963 task_ledgers_footprint(task->ledger,
964 &ledger_resident,
965 &ledger_compressed);
966 if (ledger_resident + ledger_compressed == 0) {
967 /* nothing to report */
968 vm_map_unlock_read(map);
969 vm_map_deallocate(map);
970 return 0;
971 }
972
973 /* provide fake region for purgeable */
974 pinfo->pri_offset = address;
975 pinfo->pri_protection = VM_PROT_DEFAULT;
976 pinfo->pri_max_protection = VM_PROT_DEFAULT;
977 pinfo->pri_inheritance = VM_INHERIT_NONE;
978 pinfo->pri_behavior = VM_BEHAVIOR_DEFAULT;
979 pinfo->pri_user_wired_count = 0;
980 pinfo->pri_user_tag = -1;
981 pinfo->pri_pages_resident =
982 (uint32_t) (ledger_resident / effective_page_size);
983 pinfo->pri_pages_shared_now_private = 0;
984 pinfo->pri_pages_swapped_out =
985 (uint32_t) (ledger_compressed / effective_page_size);
986 pinfo->pri_pages_dirtied =
987 (uint32_t) (ledger_resident / effective_page_size);
988 pinfo->pri_ref_count = 1;
989 pinfo->pri_shadow_depth = 0;
990 pinfo->pri_share_mode = SM_PRIVATE;
991 pinfo->pri_private_pages_resident =
992 (uint32_t) (ledger_resident / effective_page_size);
993 pinfo->pri_shared_pages_resident = 0;
994 pinfo->pri_obj_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
995 pinfo->pri_address = address;
996 pinfo->pri_size =
997 (uint64_t) (ledger_resident + ledger_compressed);
998 pinfo->pri_depth = 0;
999
1000 vm_map_unlock_read(map);
1001 vm_map_deallocate(map);
1002 return 1;
1003 }
1004 vm_map_unlock_read(map);
1005 vm_map_deallocate(map);
1006 return 0;
1007 }
1008 } else {
1009 entry = tmp_entry;
1010 }
1011
1012 start = entry->vme_start;
1013
1014 pinfo->pri_offset = VME_OFFSET(entry);
1015 pinfo->pri_protection = entry->protection;
1016 pinfo->pri_max_protection = entry->max_protection;
1017 pinfo->pri_inheritance = entry->inheritance;
1018 pinfo->pri_behavior = entry->behavior;
1019 pinfo->pri_user_wired_count = entry->user_wired_count;
1020 pinfo->pri_user_tag = VME_ALIAS(entry);
1021
1022 if (entry->is_sub_map) {
1023 pinfo->pri_flags |= PROC_REGION_SUBMAP;
1024 } else {
1025 if (entry->is_shared) {
1026 pinfo->pri_flags |= PROC_REGION_SHARED;
1027 }
1028 }
1029
1030
1031 extended.protection = entry->protection;
1032 extended.user_tag = VME_ALIAS(entry);
1033 extended.pages_resident = 0;
1034 extended.pages_swapped_out = 0;
1035 extended.pages_shared_now_private = 0;
1036 extended.pages_dirtied = 0;
1037 extended.external_pager = 0;
1038 extended.shadow_depth = 0;
1039
1040 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended, TRUE, VM_REGION_EXTENDED_INFO_COUNT);
1041
1042 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) {
1043 extended.share_mode = SM_PRIVATE;
1044 }
1045
1046 top.private_pages_resident = 0;
1047 top.shared_pages_resident = 0;
1048 vm_map_region_top_walk(entry, &top);
1049
1050
1051 pinfo->pri_pages_resident = extended.pages_resident;
1052 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1053 pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1054 pinfo->pri_pages_dirtied = extended.pages_dirtied;
1055 pinfo->pri_ref_count = extended.ref_count;
1056 pinfo->pri_shadow_depth = extended.shadow_depth;
1057 pinfo->pri_share_mode = extended.share_mode;
1058
1059 pinfo->pri_private_pages_resident = top.private_pages_resident;
1060 pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1061 pinfo->pri_obj_id = top.obj_id;
1062
1063 pinfo->pri_address = (uint64_t)start;
1064 pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1065 pinfo->pri_depth = 0;
1066
1067 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1068 *vnodeaddr = (uintptr_t)0;
1069
1070 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) == 0) {
1071 vm_map_unlock_read(map);
1072 vm_map_deallocate(map);
1073 return 1;
1074 }
1075 }
1076
1077 vm_map_unlock_read(map);
1078 vm_map_deallocate(map);
1079 return 1;
1080 }
1081
1082 int
fill_procregioninfo_onlymappedvnodes(task_t task,uint64_t arg,struct proc_regioninfo_internal * pinfo,uintptr_t * vnodeaddr,uint32_t * vid)1083 fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid)
1084 {
1085 vm_map_t map;
1086 vm_map_offset_t address = (vm_map_offset_t)arg;
1087 vm_map_entry_t tmp_entry;
1088 vm_map_entry_t entry;
1089
1090 task_lock(task);
1091 map = task->map;
1092 if (map == VM_MAP_NULL) {
1093 task_unlock(task);
1094 return 0;
1095 }
1096 vm_map_reference(map);
1097 task_unlock(task);
1098
1099 vm_map_lock_read(map);
1100
1101 if (!vm_map_lookup_entry_allow_pgz(map, address, &tmp_entry)) {
1102 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1103 vm_map_unlock_read(map);
1104 vm_map_deallocate(map);
1105 return 0;
1106 }
1107 } else {
1108 entry = tmp_entry;
1109 }
1110
1111 while (entry != vm_map_to_entry(map)) {
1112 *vnodeaddr = 0;
1113 *vid = 0;
1114
1115 if (entry->is_sub_map == 0) {
1116 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1117 pinfo->pri_offset = VME_OFFSET(entry);
1118 pinfo->pri_protection = entry->protection;
1119 pinfo->pri_max_protection = entry->max_protection;
1120 pinfo->pri_inheritance = entry->inheritance;
1121 pinfo->pri_behavior = entry->behavior;
1122 pinfo->pri_user_wired_count = entry->user_wired_count;
1123 pinfo->pri_user_tag = VME_ALIAS(entry);
1124
1125 if (entry->is_shared) {
1126 pinfo->pri_flags |= PROC_REGION_SHARED;
1127 }
1128
1129 pinfo->pri_pages_resident = 0;
1130 pinfo->pri_pages_shared_now_private = 0;
1131 pinfo->pri_pages_swapped_out = 0;
1132 pinfo->pri_pages_dirtied = 0;
1133 pinfo->pri_ref_count = 0;
1134 pinfo->pri_shadow_depth = 0;
1135 pinfo->pri_share_mode = 0;
1136
1137 pinfo->pri_private_pages_resident = 0;
1138 pinfo->pri_shared_pages_resident = 0;
1139 pinfo->pri_obj_id = 0;
1140
1141 pinfo->pri_address = (uint64_t)entry->vme_start;
1142 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1143 pinfo->pri_depth = 0;
1144
1145 vm_map_unlock_read(map);
1146 vm_map_deallocate(map);
1147 return 1;
1148 }
1149 }
1150
1151 /* Keep searching for a vnode-backed mapping */
1152 entry = entry->vme_next;
1153 }
1154
1155 vm_map_unlock_read(map);
1156 vm_map_deallocate(map);
1157 return 0;
1158 }
1159
1160 int
find_region_details(task_t task,vm_map_offset_t offset,uintptr_t * vnodeaddr,uint32_t * vid,uint64_t * start,uint64_t * len)1161 find_region_details(task_t task, vm_map_offset_t offset,
1162 uintptr_t *vnodeaddr, uint32_t *vid,
1163 uint64_t *start, uint64_t *len)
1164 {
1165 vm_map_t map;
1166 vm_map_entry_t tmp_entry, entry;
1167 int rc = 0;
1168
1169 task_lock(task);
1170 map = task->map;
1171 if (map == VM_MAP_NULL) {
1172 task_unlock(task);
1173 return 0;
1174 }
1175 vm_map_reference(map);
1176 task_unlock(task);
1177
1178 vm_map_lock_read(map);
1179 if (!vm_map_lookup_entry_allow_pgz(map, offset, &tmp_entry)) {
1180 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1181 rc = 0;
1182 goto ret;
1183 }
1184 } else {
1185 entry = tmp_entry;
1186 }
1187
1188 while (entry != vm_map_to_entry(map)) {
1189 *vnodeaddr = 0;
1190 *vid = 0;
1191 *start = 0;
1192 *len = 0;
1193
1194 if (entry->is_sub_map == 0) {
1195 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1196 *start = entry->vme_start;
1197 *len = entry->vme_end - entry->vme_start;
1198 rc = 1;
1199 goto ret;
1200 }
1201 }
1202
1203 entry = entry->vme_next;
1204 }
1205
1206 ret:
1207 vm_map_unlock_read(map);
1208 vm_map_deallocate(map);
1209 return rc;
1210 }
1211
1212 static int
fill_vnodeinfoforaddr(vm_map_entry_t entry,uintptr_t * vnodeaddr,uint32_t * vid)1213 fill_vnodeinfoforaddr(
1214 vm_map_entry_t entry,
1215 uintptr_t * vnodeaddr,
1216 uint32_t * vid)
1217 {
1218 vm_object_t top_object, object;
1219 memory_object_t memory_object;
1220 memory_object_pager_ops_t pager_ops;
1221 kern_return_t kr;
1222 int shadow_depth;
1223
1224
1225 if (entry->is_sub_map) {
1226 return 0;
1227 } else {
1228 /*
1229 * The last object in the shadow chain has the
1230 * relevant pager information.
1231 */
1232 top_object = VME_OBJECT(entry);
1233 if (top_object == VM_OBJECT_NULL) {
1234 object = VM_OBJECT_NULL;
1235 shadow_depth = 0;
1236 } else {
1237 vm_object_lock(top_object);
1238 for (object = top_object, shadow_depth = 0;
1239 object->shadow != VM_OBJECT_NULL;
1240 object = object->shadow, shadow_depth++) {
1241 vm_object_lock(object->shadow);
1242 vm_object_unlock(object);
1243 }
1244 }
1245 }
1246
1247 if (object == VM_OBJECT_NULL) {
1248 return 0;
1249 } else if (object->internal) {
1250 vm_object_unlock(object);
1251 return 0;
1252 } else if (!object->pager_ready ||
1253 object->terminating ||
1254 !object->alive ||
1255 object->pager == NULL) {
1256 vm_object_unlock(object);
1257 return 0;
1258 } else {
1259 memory_object = object->pager;
1260 pager_ops = memory_object->mo_pager_ops;
1261 if (pager_ops == &vnode_pager_ops) {
1262 kr = vnode_pager_get_object_vnode(
1263 memory_object,
1264 vnodeaddr, vid);
1265 if (kr != KERN_SUCCESS) {
1266 vm_object_unlock(object);
1267 return 0;
1268 }
1269 } else {
1270 vm_object_unlock(object);
1271 return 0;
1272 }
1273 }
1274 vm_object_unlock(object);
1275 return 1;
1276 }
1277
1278 kern_return_t
vnode_pager_get_object_vnode(memory_object_t mem_obj,uintptr_t * vnodeaddr,uint32_t * vid)1279 vnode_pager_get_object_vnode(
1280 memory_object_t mem_obj,
1281 uintptr_t * vnodeaddr,
1282 uint32_t * vid)
1283 {
1284 vnode_pager_t vnode_object;
1285
1286 vnode_object = vnode_pager_lookup(mem_obj);
1287 if (vnode_object->vnode_handle) {
1288 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1289 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1290
1291 return KERN_SUCCESS;
1292 }
1293
1294 return KERN_FAILURE;
1295 }
1296
1297 #if CONFIG_IOSCHED
1298 kern_return_t
vnode_pager_get_object_devvp(memory_object_t mem_obj,uintptr_t * devvp)1299 vnode_pager_get_object_devvp(
1300 memory_object_t mem_obj,
1301 uintptr_t *devvp)
1302 {
1303 struct vnode *vp;
1304 uint32_t vid;
1305
1306 if (vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS) {
1307 return KERN_FAILURE;
1308 }
1309 *devvp = (uintptr_t)vnode_mountdevvp(vp);
1310 if (*devvp) {
1311 return KERN_SUCCESS;
1312 }
1313 return KERN_FAILURE;
1314 }
1315 #endif
1316
1317 /*
1318 * Find the underlying vnode object for the given vm_map_entry. If found, return with the
1319 * object locked, otherwise return NULL with nothing locked.
1320 */
1321
1322 vm_object_t
find_vnode_object(vm_map_entry_t entry)1323 find_vnode_object(
1324 vm_map_entry_t entry
1325 )
1326 {
1327 vm_object_t top_object, object;
1328 memory_object_t memory_object;
1329 memory_object_pager_ops_t pager_ops;
1330
1331 if (!entry->is_sub_map) {
1332 /*
1333 * The last object in the shadow chain has the
1334 * relevant pager information.
1335 */
1336
1337 top_object = VME_OBJECT(entry);
1338
1339 if (top_object) {
1340 vm_object_lock(top_object);
1341
1342 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1343 vm_object_lock(object->shadow);
1344 vm_object_unlock(object);
1345 }
1346
1347 if (object &&
1348 !object->internal &&
1349 object->pager_ready &&
1350 !object->terminating &&
1351 object->alive &&
1352 object->pager != NULL) {
1353 memory_object = object->pager;
1354 pager_ops = memory_object->mo_pager_ops;
1355
1356 /*
1357 * If this object points to the vnode_pager_ops, then we found what we're
1358 * looking for. Otherwise, this vm_map_entry doesn't have an underlying
1359 * vnode and so we fall through to the bottom and return NULL.
1360 */
1361
1362 if (pager_ops == &vnode_pager_ops) {
1363 return object; /* we return with the object locked */
1364 }
1365 }
1366
1367 vm_object_unlock(object);
1368 }
1369 }
1370
1371 return VM_OBJECT_NULL;
1372 }
1373