1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/memory_object.c
60 * Author: Michael Wayne Young
61 *
62 * External memory management interface control functions.
63 */
64
65 /*
66 * Interface dependencies:
67 */
68
69 #include <mach/std_types.h> /* For pointer_t */
70 #include <mach/mach_types.h>
71
72 #include <mach/mig.h>
73 #include <mach/kern_return.h>
74 #include <mach/memory_object.h>
75 #include <mach/memory_object_control.h>
76 #include <mach/host_priv_server.h>
77 #include <mach/boolean.h>
78 #include <mach/vm_prot.h>
79 #include <mach/message.h>
80
81 /*
82 * Implementation dependencies:
83 */
84 #include <string.h> /* For memcpy() */
85
86 #include <kern/host.h>
87 #include <kern/thread.h> /* For current_thread() */
88 #include <kern/ipc_mig.h>
89 #include <kern/misc_protos.h>
90
91 #include <vm/vm_object.h>
92 #include <vm/vm_fault.h>
93 #include <vm/memory_object.h>
94 #include <vm/vm_page.h>
95 #include <vm/vm_pageout.h>
96 #include <vm/pmap.h> /* For pmap_clear_modify */
97 #include <vm/vm_kern.h> /* For kernel_map, vm_move */
98 #include <vm/vm_map.h> /* For vm_map_pageable */
99 #include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
100 #include <vm/vm_shared_region.h>
101
102 #include <vm/vm_external.h>
103
104 #include <vm/vm_protos.h>
105
106 memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
107 LCK_MTX_DECLARE(memory_manager_default_lock, &vm_object_lck_grp);
108
109
110 /*
111 * Routine: memory_object_should_return_page
112 *
113 * Description:
114 * Determine whether the given page should be returned,
115 * based on the page's state and on the given return policy.
116 *
117 * We should return the page if one of the following is true:
118 *
119 * 1. Page is dirty and should_return is not RETURN_NONE.
120 * 2. Page is precious and should_return is RETURN_ALL.
121 * 3. Should_return is RETURN_ANYTHING.
122 *
123 * As a side effect, m->vmp_dirty will be made consistent
124 * with pmap_is_modified(m), if should_return is not
125 * MEMORY_OBJECT_RETURN_NONE.
126 */
127
128 #define memory_object_should_return_page(m, should_return) \
129 (should_return != MEMORY_OBJECT_RETURN_NONE && \
130 (((m)->vmp_dirty || ((m)->vmp_dirty = pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))) || \
131 ((m)->vmp_precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
132 (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
133
134 typedef int memory_object_lock_result_t;
135
136 #define MEMORY_OBJECT_LOCK_RESULT_DONE 0
137 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
138 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 2
139 #define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE 3
140
141 memory_object_lock_result_t memory_object_lock_page(
142 vm_page_t m,
143 memory_object_return_t should_return,
144 boolean_t should_flush,
145 vm_prot_t prot);
146
147 /*
148 * Routine: memory_object_lock_page
149 *
150 * Description:
151 * Perform the appropriate lock operations on the
152 * given page. See the description of
153 * "memory_object_lock_request" for the meanings
154 * of the arguments.
155 *
156 * Returns an indication that the operation
157 * completed, blocked, or that the page must
158 * be cleaned.
159 */
160 memory_object_lock_result_t
memory_object_lock_page(vm_page_t m,memory_object_return_t should_return,boolean_t should_flush,vm_prot_t prot)161 memory_object_lock_page(
162 vm_page_t m,
163 memory_object_return_t should_return,
164 boolean_t should_flush,
165 vm_prot_t prot)
166 {
167 if (prot == VM_PROT_NO_CHANGE_LEGACY) {
168 prot = VM_PROT_NO_CHANGE;
169 }
170
171 if (m->vmp_busy || m->vmp_cleaning) {
172 return MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK;
173 }
174
175 if (m->vmp_laundry) {
176 vm_pageout_steal_laundry(m, FALSE);
177 }
178
179 /*
180 * Don't worry about pages for which the kernel
181 * does not have any data.
182 */
183 if (m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_restart) {
184 if (VMP_ERROR_GET(m) && should_flush && !VM_PAGE_WIRED(m)) {
185 /*
186 * dump the page, pager wants us to
187 * clean it up and there is no
188 * relevant data to return
189 */
190 return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE;
191 }
192 return MEMORY_OBJECT_LOCK_RESULT_DONE;
193 }
194 assert(!m->vmp_fictitious);
195
196 if (VM_PAGE_WIRED(m)) {
197 /*
198 * The page is wired... just clean or return the page if needed.
199 * Wired pages don't get flushed or disconnected from the pmap.
200 */
201 if (memory_object_should_return_page(m, should_return)) {
202 return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN;
203 }
204
205 return MEMORY_OBJECT_LOCK_RESULT_DONE;
206 }
207
208 if (should_flush) {
209 /*
210 * must do the pmap_disconnect before determining the
211 * need to return the page... otherwise it's possible
212 * for the page to go from the clean to the dirty state
213 * after we've made our decision
214 */
215 if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
216 SET_PAGE_DIRTY(m, FALSE);
217 }
218 } else {
219 /*
220 * If we are decreasing permission, do it now;
221 * let the fault handler take care of increases
222 * (pmap_page_protect may not increase protection).
223 */
224 if (prot != VM_PROT_NO_CHANGE) {
225 pmap_page_protect(VM_PAGE_GET_PHYS_PAGE(m), VM_PROT_ALL & ~prot);
226 }
227 }
228 /*
229 * Handle returning dirty or precious pages
230 */
231 if (memory_object_should_return_page(m, should_return)) {
232 /*
233 * we use to do a pmap_disconnect here in support
234 * of memory_object_lock_request, but that routine
235 * no longer requires this... in any event, in
236 * our world, it would turn into a big noop since
237 * we don't lock the page in any way and as soon
238 * as we drop the object lock, the page can be
239 * faulted back into an address space
240 *
241 * if (!should_flush)
242 * pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
243 */
244 return MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN;
245 }
246
247 /*
248 * Handle flushing clean pages
249 */
250 if (should_flush) {
251 return MEMORY_OBJECT_LOCK_RESULT_MUST_FREE;
252 }
253
254 /*
255 * we use to deactivate clean pages at this point,
256 * but we do not believe that an msync should change
257 * the 'age' of a page in the cache... here is the
258 * original comment and code concerning this...
259 *
260 * XXX Make clean but not flush a paging hint,
261 * and deactivate the pages. This is a hack
262 * because it overloads flush/clean with
263 * implementation-dependent meaning. This only
264 * happens to pages that are already clean.
265 *
266 * if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
267 * return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
268 */
269
270 return MEMORY_OBJECT_LOCK_RESULT_DONE;
271 }
272
273
274
275 /*
276 * Routine: memory_object_lock_request [user interface]
277 *
278 * Description:
279 * Control use of the data associated with the given
280 * memory object. For each page in the given range,
281 * perform the following operations, in order:
282 * 1) restrict access to the page (disallow
283 * forms specified by "prot");
284 * 2) return data to the manager (if "should_return"
285 * is RETURN_DIRTY and the page is dirty, or
286 * "should_return" is RETURN_ALL and the page
287 * is either dirty or precious); and,
288 * 3) flush the cached copy (if "should_flush"
289 * is asserted).
290 * The set of pages is defined by a starting offset
291 * ("offset") and size ("size"). Only pages with the
292 * same page alignment as the starting offset are
293 * considered.
294 *
295 * A single acknowledgement is sent (to the "reply_to"
296 * port) when these actions are complete. If successful,
297 * the naked send right for reply_to is consumed.
298 */
299
300 kern_return_t
memory_object_lock_request(memory_object_control_t control,memory_object_offset_t offset,memory_object_size_t size,memory_object_offset_t * resid_offset,int * io_errno,memory_object_return_t should_return,int flags,vm_prot_t prot)301 memory_object_lock_request(
302 memory_object_control_t control,
303 memory_object_offset_t offset,
304 memory_object_size_t size,
305 memory_object_offset_t * resid_offset,
306 int * io_errno,
307 memory_object_return_t should_return,
308 int flags,
309 vm_prot_t prot)
310 {
311 vm_object_t object;
312
313 if (prot == VM_PROT_NO_CHANGE_LEGACY) {
314 prot = VM_PROT_NO_CHANGE;
315 }
316
317 /*
318 * Check for bogus arguments.
319 */
320 object = memory_object_control_to_vm_object(control);
321 if (object == VM_OBJECT_NULL) {
322 return KERN_INVALID_ARGUMENT;
323 }
324
325 if ((prot & ~(VM_PROT_ALL | VM_PROT_ALLEXEC)) != 0 && prot != VM_PROT_NO_CHANGE) {
326 return KERN_INVALID_ARGUMENT;
327 }
328
329 size = round_page_64(size);
330
331 /*
332 * Lock the object, and acquire a paging reference to
333 * prevent the memory_object reference from being released.
334 */
335 vm_object_lock(object);
336 vm_object_paging_begin(object);
337
338 if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
339 if ((should_return != MEMORY_OBJECT_RETURN_NONE) || offset || object->copy) {
340 flags &= ~MEMORY_OBJECT_DATA_FLUSH_ALL;
341 flags |= MEMORY_OBJECT_DATA_FLUSH;
342 }
343 }
344 offset -= object->paging_offset;
345
346 if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) {
347 vm_object_reap_pages(object, REAP_DATA_FLUSH);
348 } else {
349 (void)vm_object_update(object, offset, size, resid_offset,
350 io_errno, should_return, flags, prot);
351 }
352
353 vm_object_paging_end(object);
354 vm_object_unlock(object);
355
356 return KERN_SUCCESS;
357 }
358
359 /*
360 * Routine: memory_object_destroy [user interface]
361 * Purpose:
362 * Shut down a memory object, despite the
363 * presence of address map (or other) references
364 * to the vm_object.
365 */
366 kern_return_t
memory_object_destroy(memory_object_control_t control,kern_return_t reason)367 memory_object_destroy(
368 memory_object_control_t control,
369 kern_return_t reason)
370 {
371 vm_object_t object;
372
373 object = memory_object_control_to_vm_object(control);
374 if (object == VM_OBJECT_NULL) {
375 return KERN_INVALID_ARGUMENT;
376 }
377
378 return vm_object_destroy(object, reason);
379 }
380
381 /*
382 * Routine: vm_object_sync
383 *
384 * Kernel internal function to synch out pages in a given
385 * range within an object to its memory manager. Much the
386 * same as memory_object_lock_request but page protection
387 * is not changed.
388 *
389 * If the should_flush and should_return flags are true pages
390 * are flushed, that is dirty & precious pages are written to
391 * the memory manager and then discarded. If should_return
392 * is false, only precious pages are returned to the memory
393 * manager.
394 *
395 * If should flush is false and should_return true, the memory
396 * manager's copy of the pages is updated. If should_return
397 * is also false, only the precious pages are updated. This
398 * last option is of limited utility.
399 *
400 * Returns:
401 * FALSE if no pages were returned to the pager
402 * TRUE otherwise.
403 */
404
405 boolean_t
vm_object_sync(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,boolean_t should_flush,boolean_t should_return,boolean_t should_iosync)406 vm_object_sync(
407 vm_object_t object,
408 vm_object_offset_t offset,
409 vm_object_size_t size,
410 boolean_t should_flush,
411 boolean_t should_return,
412 boolean_t should_iosync)
413 {
414 boolean_t rv;
415 int flags;
416
417 /*
418 * Lock the object, and acquire a paging reference to
419 * prevent the memory_object and control ports from
420 * being destroyed.
421 */
422 vm_object_lock(object);
423 vm_object_paging_begin(object);
424
425 if (should_flush) {
426 flags = MEMORY_OBJECT_DATA_FLUSH;
427 /*
428 * This flush is from an msync(), not a truncate(), so the
429 * contents of the file are not affected.
430 * MEMORY_OBECT_DATA_NO_CHANGE lets vm_object_update() know
431 * that the data is not changed and that there's no need to
432 * push the old contents to a copy object.
433 */
434 flags |= MEMORY_OBJECT_DATA_NO_CHANGE;
435 } else {
436 flags = 0;
437 }
438
439 if (should_iosync) {
440 flags |= MEMORY_OBJECT_IO_SYNC;
441 }
442
443 rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
444 (should_return) ?
445 MEMORY_OBJECT_RETURN_ALL :
446 MEMORY_OBJECT_RETURN_NONE,
447 flags,
448 VM_PROT_NO_CHANGE);
449
450
451 vm_object_paging_end(object);
452 vm_object_unlock(object);
453 return rv;
454 }
455
456
457
458 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync) \
459 MACRO_BEGIN \
460 \
461 int upl_flags; \
462 memory_object_t pager; \
463 \
464 if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) { \
465 vm_object_paging_begin(object); \
466 vm_object_unlock(object); \
467 \
468 if (iosync) \
469 upl_flags = UPL_MSYNC | UPL_IOSYNC; \
470 else \
471 upl_flags = UPL_MSYNC; \
472 \
473 (void) memory_object_data_return(pager, \
474 po, \
475 (memory_object_cluster_size_t)data_cnt, \
476 ro, \
477 ioerr, \
478 FALSE, \
479 FALSE, \
480 upl_flags); \
481 \
482 vm_object_lock(object); \
483 vm_object_paging_end(object); \
484 } \
485 MACRO_END
486
487 extern struct vnode *
488 vnode_pager_lookup_vnode(memory_object_t);
489
490 static int
vm_object_update_extent(vm_object_t object,vm_object_offset_t offset,vm_object_offset_t offset_end,vm_object_offset_t * offset_resid,int * io_errno,boolean_t should_flush,memory_object_return_t should_return,boolean_t should_iosync,vm_prot_t prot)491 vm_object_update_extent(
492 vm_object_t object,
493 vm_object_offset_t offset,
494 vm_object_offset_t offset_end,
495 vm_object_offset_t *offset_resid,
496 int *io_errno,
497 boolean_t should_flush,
498 memory_object_return_t should_return,
499 boolean_t should_iosync,
500 vm_prot_t prot)
501 {
502 vm_page_t m;
503 int retval = 0;
504 vm_object_offset_t paging_offset = 0;
505 vm_object_offset_t next_offset = offset;
506 memory_object_lock_result_t page_lock_result;
507 memory_object_cluster_size_t data_cnt = 0;
508 struct vm_page_delayed_work dw_array;
509 struct vm_page_delayed_work *dwp, *dwp_start;
510 bool dwp_finish_ctx = TRUE;
511 int dw_count;
512 int dw_limit;
513 int dirty_count;
514
515 dwp_start = dwp = NULL;
516 dw_count = 0;
517 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
518 dwp_start = vm_page_delayed_work_get_ctx();
519 if (dwp_start == NULL) {
520 dwp_start = &dw_array;
521 dw_limit = 1;
522 dwp_finish_ctx = FALSE;
523 }
524 dwp = dwp_start;
525
526 dirty_count = 0;
527
528 for (;
529 offset < offset_end && object->resident_page_count;
530 offset += PAGE_SIZE_64) {
531 /*
532 * Limit the number of pages to be cleaned at once to a contiguous
533 * run, or at most MAX_UPL_TRANSFER_BYTES
534 */
535 if (data_cnt) {
536 if ((data_cnt >= MAX_UPL_TRANSFER_BYTES) || (next_offset != offset)) {
537 if (dw_count) {
538 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
539 dwp = dwp_start;
540 dw_count = 0;
541 }
542 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
543 paging_offset, offset_resid, io_errno, should_iosync);
544 data_cnt = 0;
545 }
546 }
547 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
548 dwp->dw_mask = 0;
549
550 page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
551
552 if (data_cnt && page_lock_result != MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN) {
553 /*
554 * End of a run of dirty/precious pages.
555 */
556 if (dw_count) {
557 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
558 dwp = dwp_start;
559 dw_count = 0;
560 }
561 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
562 paging_offset, offset_resid, io_errno, should_iosync);
563 /*
564 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
565 * allow the state of page 'm' to change... we need to re-lookup
566 * the current offset
567 */
568 data_cnt = 0;
569 continue;
570 }
571
572 switch (page_lock_result) {
573 case MEMORY_OBJECT_LOCK_RESULT_DONE:
574 break;
575
576 case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
577 if (m->vmp_dirty == TRUE) {
578 dirty_count++;
579 }
580 dwp->dw_mask |= DW_vm_page_free;
581 break;
582
583 case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
584 PAGE_SLEEP(object, m, THREAD_UNINT);
585 continue;
586
587 case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
588 if (data_cnt == 0) {
589 paging_offset = offset;
590 }
591
592 data_cnt += PAGE_SIZE;
593 next_offset = offset + PAGE_SIZE_64;
594
595 /*
596 * wired pages shouldn't be flushed and
597 * since they aren't on any queue,
598 * no need to remove them
599 */
600 if (!VM_PAGE_WIRED(m)) {
601 if (should_flush) {
602 /*
603 * add additional state for the flush
604 */
605 m->vmp_free_when_done = TRUE;
606 }
607 /*
608 * we use to remove the page from the queues at this
609 * point, but we do not believe that an msync
610 * should cause the 'age' of a page to be changed
611 *
612 * else
613 * dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
614 */
615 }
616 retval = 1;
617 break;
618 }
619 if (dwp->dw_mask) {
620 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
621
622 if (dw_count >= dw_limit) {
623 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
624 dwp = dwp_start;
625 dw_count = 0;
626 }
627 }
628 break;
629 }
630 }
631
632 if (object->pager) {
633 task_update_logical_writes(current_task(), (dirty_count * PAGE_SIZE), TASK_WRITE_INVALIDATED, vnode_pager_lookup_vnode(object->pager));
634 }
635 /*
636 * We have completed the scan for applicable pages.
637 * Clean any pages that have been saved.
638 */
639 if (dw_count) {
640 vm_page_do_delayed_work(object, VM_KERN_MEMORY_NONE, dwp_start, dw_count);
641 }
642
643 if (data_cnt) {
644 LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
645 paging_offset, offset_resid, io_errno, should_iosync);
646 }
647
648 if (dwp_start && dwp_finish_ctx) {
649 vm_page_delayed_work_finish_ctx(dwp_start);
650 dwp_start = dwp = NULL;
651 }
652
653 return retval;
654 }
655
656
657
658 /*
659 * Routine: vm_object_update
660 * Description:
661 * Work function for m_o_lock_request(), vm_o_sync().
662 *
663 * Called with object locked and paging ref taken.
664 */
665 kern_return_t
vm_object_update(vm_object_t object,vm_object_offset_t offset,vm_object_size_t size,vm_object_offset_t * resid_offset,int * io_errno,memory_object_return_t should_return,int flags,vm_prot_t protection)666 vm_object_update(
667 vm_object_t object,
668 vm_object_offset_t offset,
669 vm_object_size_t size,
670 vm_object_offset_t *resid_offset,
671 int *io_errno,
672 memory_object_return_t should_return,
673 int flags,
674 vm_prot_t protection)
675 {
676 vm_object_t copy_object = VM_OBJECT_NULL;
677 boolean_t data_returned = FALSE;
678 boolean_t update_cow;
679 boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
680 boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
681 vm_fault_return_t result;
682 int num_of_extents;
683 int n;
684 #define MAX_EXTENTS 8
685 #define EXTENT_SIZE (1024 * 1024 * 256)
686 #define RESIDENT_LIMIT (1024 * 32)
687 struct extent {
688 vm_object_offset_t e_base;
689 vm_object_offset_t e_min;
690 vm_object_offset_t e_max;
691 } extents[MAX_EXTENTS];
692
693 /*
694 * To avoid blocking while scanning for pages, save
695 * dirty pages to be cleaned all at once.
696 *
697 * XXXO A similar strategy could be used to limit the
698 * number of times that a scan must be restarted for
699 * other reasons. Those pages that would require blocking
700 * could be temporarily collected in another list, or
701 * their offsets could be recorded in a small array.
702 */
703
704 /*
705 * XXX NOTE: May want to consider converting this to a page list
706 * XXX vm_map_copy interface. Need to understand object
707 * XXX coalescing implications before doing so.
708 */
709
710 update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
711 && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
712 !(flags & MEMORY_OBJECT_DATA_PURGE)))
713 || (flags & MEMORY_OBJECT_COPY_SYNC);
714
715 if (update_cow || (flags & (MEMORY_OBJECT_DATA_PURGE | MEMORY_OBJECT_DATA_SYNC))) {
716 int collisions = 0;
717
718 while ((copy_object = object->copy) != VM_OBJECT_NULL) {
719 /*
720 * need to do a try here since we're swimming upstream
721 * against the normal lock ordering... however, we need
722 * to hold the object stable until we gain control of the
723 * copy object so we have to be careful how we approach this
724 */
725 if (vm_object_lock_try(copy_object)) {
726 /*
727 * we 'won' the lock on the copy object...
728 * no need to hold the object lock any longer...
729 * take a real reference on the copy object because
730 * we're going to call vm_fault_page on it which may
731 * under certain conditions drop the lock and the paging
732 * reference we're about to take... the reference
733 * will keep the copy object from going away if that happens
734 */
735 vm_object_unlock(object);
736 vm_object_reference_locked(copy_object);
737 break;
738 }
739 vm_object_unlock(object);
740
741 collisions++;
742 mutex_pause(collisions);
743
744 vm_object_lock(object);
745 }
746 }
747 if ((copy_object != VM_OBJECT_NULL && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) {
748 vm_object_offset_t i;
749 vm_object_size_t copy_size;
750 vm_object_offset_t copy_offset;
751 vm_prot_t prot;
752 vm_page_t page;
753 vm_page_t top_page;
754 kern_return_t error = 0;
755 struct vm_object_fault_info fault_info = {};
756
757 if (copy_object != VM_OBJECT_NULL) {
758 /*
759 * translate offset with respect to shadow's offset
760 */
761 copy_offset = (offset >= copy_object->vo_shadow_offset) ?
762 (offset - copy_object->vo_shadow_offset) : 0;
763
764 if (copy_offset > copy_object->vo_size) {
765 copy_offset = copy_object->vo_size;
766 }
767
768 /*
769 * clip size with respect to shadow offset
770 */
771 if (offset >= copy_object->vo_shadow_offset) {
772 copy_size = size;
773 } else if (size >= copy_object->vo_shadow_offset - offset) {
774 copy_size = (size - (copy_object->vo_shadow_offset - offset));
775 } else {
776 copy_size = 0;
777 }
778
779 if (copy_offset + copy_size > copy_object->vo_size) {
780 if (copy_object->vo_size >= copy_offset) {
781 copy_size = copy_object->vo_size - copy_offset;
782 } else {
783 copy_size = 0;
784 }
785 }
786 copy_size += copy_offset;
787 } else {
788 copy_object = object;
789
790 copy_size = offset + size;
791 copy_offset = offset;
792 }
793 fault_info.interruptible = THREAD_UNINT;
794 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
795 fault_info.lo_offset = copy_offset;
796 fault_info.hi_offset = copy_size;
797 fault_info.stealth = TRUE;
798 assert(fault_info.cs_bypass == FALSE);
799 assert(fault_info.csm_associated == FALSE);
800
801 vm_object_paging_begin(copy_object);
802
803 for (i = copy_offset; i < copy_size; i += PAGE_SIZE) {
804 RETRY_COW_OF_LOCK_REQUEST:
805 fault_info.cluster_size = (vm_size_t) (copy_size - i);
806 assert(fault_info.cluster_size == copy_size - i);
807
808 prot = VM_PROT_WRITE | VM_PROT_READ;
809 page = VM_PAGE_NULL;
810 result = vm_fault_page(copy_object, i,
811 VM_PROT_WRITE | VM_PROT_READ,
812 FALSE,
813 FALSE, /* page not looked up */
814 &prot,
815 &page,
816 &top_page,
817 (int *)0,
818 &error,
819 FALSE,
820 &fault_info);
821
822 switch (result) {
823 case VM_FAULT_SUCCESS:
824 if (top_page) {
825 vm_fault_cleanup(
826 VM_PAGE_OBJECT(page), top_page);
827 vm_object_lock(copy_object);
828 vm_object_paging_begin(copy_object);
829 }
830 if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
831 vm_page_lockspin_queues();
832
833 if ((!VM_PAGE_NON_SPECULATIVE_PAGEABLE(page))) {
834 vm_page_deactivate(page);
835 }
836 vm_page_unlock_queues();
837 }
838 PAGE_WAKEUP_DONE(page);
839 break;
840 case VM_FAULT_RETRY:
841 prot = VM_PROT_WRITE | VM_PROT_READ;
842 vm_object_lock(copy_object);
843 vm_object_paging_begin(copy_object);
844 goto RETRY_COW_OF_LOCK_REQUEST;
845 case VM_FAULT_INTERRUPTED:
846 prot = VM_PROT_WRITE | VM_PROT_READ;
847 vm_object_lock(copy_object);
848 vm_object_paging_begin(copy_object);
849 goto RETRY_COW_OF_LOCK_REQUEST;
850 case VM_FAULT_MEMORY_SHORTAGE:
851 VM_PAGE_WAIT();
852 prot = VM_PROT_WRITE | VM_PROT_READ;
853 vm_object_lock(copy_object);
854 vm_object_paging_begin(copy_object);
855 goto RETRY_COW_OF_LOCK_REQUEST;
856 case VM_FAULT_SUCCESS_NO_VM_PAGE:
857 /* success but no VM page: fail */
858 vm_object_paging_end(copy_object);
859 vm_object_unlock(copy_object);
860 OS_FALLTHROUGH;
861 case VM_FAULT_MEMORY_ERROR:
862 if (object != copy_object) {
863 vm_object_deallocate(copy_object);
864 }
865 vm_object_lock(object);
866 goto BYPASS_COW_COPYIN;
867 default:
868 panic("vm_object_update: unexpected error 0x%x"
869 " from vm_fault_page()\n", result);
870 }
871 }
872 vm_object_paging_end(copy_object);
873 }
874 if ((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
875 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
876 vm_object_unlock(copy_object);
877 vm_object_deallocate(copy_object);
878 vm_object_lock(object);
879 }
880 return KERN_SUCCESS;
881 }
882 if (copy_object != VM_OBJECT_NULL && copy_object != object) {
883 if ((flags & MEMORY_OBJECT_DATA_PURGE)) {
884 vm_object_lock_assert_exclusive(copy_object);
885 copy_object->shadow_severed = TRUE;
886 copy_object->shadowed = FALSE;
887 copy_object->shadow = NULL;
888 /*
889 * delete the ref the COW was holding on the target object
890 */
891 vm_object_deallocate(object);
892 }
893 vm_object_unlock(copy_object);
894 vm_object_deallocate(copy_object);
895 vm_object_lock(object);
896 }
897 BYPASS_COW_COPYIN:
898
899 /*
900 * when we have a really large range to check relative
901 * to the number of actual resident pages, we'd like
902 * to use the resident page list to drive our checks
903 * however, the object lock will get dropped while processing
904 * the page which means the resident queue can change which
905 * means we can't walk the queue as we process the pages
906 * we also want to do the processing in offset order to allow
907 * 'runs' of pages to be collected if we're being told to
908 * flush to disk... the resident page queue is NOT ordered.
909 *
910 * a temporary solution (until we figure out how to deal with
911 * large address spaces more generically) is to pre-flight
912 * the resident page queue (if it's small enough) and develop
913 * a collection of extents (that encompass actual resident pages)
914 * to visit. This will at least allow us to deal with some of the
915 * more pathological cases in a more efficient manner. The current
916 * worst case (a single resident page at the end of an extremely large
917 * range) can take minutes to complete for ranges in the terrabyte
918 * category... since this routine is called when truncating a file,
919 * and we currently support files up to 16 Tbytes in size, this
920 * is not a theoretical problem
921 */
922
923 if ((object->resident_page_count < RESIDENT_LIMIT) &&
924 (atop_64(size) > (unsigned)(object->resident_page_count / (8 * MAX_EXTENTS)))) {
925 vm_page_t next;
926 vm_object_offset_t start;
927 vm_object_offset_t end;
928 vm_object_size_t e_mask;
929 vm_page_t m;
930
931 start = offset;
932 end = offset + size;
933 num_of_extents = 0;
934 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
935
936 m = (vm_page_t) vm_page_queue_first(&object->memq);
937
938 while (!vm_page_queue_end(&object->memq, (vm_page_queue_entry_t) m)) {
939 next = (vm_page_t) vm_page_queue_next(&m->vmp_listq);
940
941 if ((m->vmp_offset >= start) && (m->vmp_offset < end)) {
942 /*
943 * this is a page we're interested in
944 * try to fit it into a current extent
945 */
946 for (n = 0; n < num_of_extents; n++) {
947 if ((m->vmp_offset & e_mask) == extents[n].e_base) {
948 /*
949 * use (PAGE_SIZE - 1) to determine the
950 * max offset so that we don't wrap if
951 * we're at the last page of the space
952 */
953 if (m->vmp_offset < extents[n].e_min) {
954 extents[n].e_min = m->vmp_offset;
955 } else if ((m->vmp_offset + (PAGE_SIZE - 1)) > extents[n].e_max) {
956 extents[n].e_max = m->vmp_offset + (PAGE_SIZE - 1);
957 }
958 break;
959 }
960 }
961 if (n == num_of_extents) {
962 /*
963 * didn't find a current extent that can encompass
964 * this page
965 */
966 if (n < MAX_EXTENTS) {
967 /*
968 * if we still have room,
969 * create a new extent
970 */
971 extents[n].e_base = m->vmp_offset & e_mask;
972 extents[n].e_min = m->vmp_offset;
973 extents[n].e_max = m->vmp_offset + (PAGE_SIZE - 1);
974
975 num_of_extents++;
976 } else {
977 /*
978 * no room to create a new extent...
979 * fall back to a single extent based
980 * on the min and max page offsets
981 * we find in the range we're interested in...
982 * first, look through the extent list and
983 * develop the overall min and max for the
984 * pages we've looked at up to this point
985 */
986 for (n = 1; n < num_of_extents; n++) {
987 if (extents[n].e_min < extents[0].e_min) {
988 extents[0].e_min = extents[n].e_min;
989 }
990 if (extents[n].e_max > extents[0].e_max) {
991 extents[0].e_max = extents[n].e_max;
992 }
993 }
994 /*
995 * now setup to run through the remaining pages
996 * to determine the overall min and max
997 * offset for the specified range
998 */
999 extents[0].e_base = 0;
1000 e_mask = 0;
1001 num_of_extents = 1;
1002
1003 /*
1004 * by continuing, we'll reprocess the
1005 * page that forced us to abandon trying
1006 * to develop multiple extents
1007 */
1008 continue;
1009 }
1010 }
1011 }
1012 m = next;
1013 }
1014 } else {
1015 extents[0].e_min = offset;
1016 extents[0].e_max = offset + (size - 1);
1017
1018 num_of_extents = 1;
1019 }
1020 for (n = 0; n < num_of_extents; n++) {
1021 if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
1022 should_flush, should_return, should_iosync, protection)) {
1023 data_returned = TRUE;
1024 }
1025 }
1026 return data_returned;
1027 }
1028
1029
1030 static kern_return_t
vm_object_set_attributes_common(vm_object_t object,boolean_t may_cache,memory_object_copy_strategy_t copy_strategy)1031 vm_object_set_attributes_common(
1032 vm_object_t object,
1033 boolean_t may_cache,
1034 memory_object_copy_strategy_t copy_strategy)
1035 {
1036 boolean_t object_became_ready;
1037
1038 if (object == VM_OBJECT_NULL) {
1039 return KERN_INVALID_ARGUMENT;
1040 }
1041
1042 /*
1043 * Verify the attributes of importance
1044 */
1045
1046 switch (copy_strategy) {
1047 case MEMORY_OBJECT_COPY_NONE:
1048 case MEMORY_OBJECT_COPY_DELAY:
1049 break;
1050 default:
1051 return KERN_INVALID_ARGUMENT;
1052 }
1053
1054 if (may_cache) {
1055 may_cache = TRUE;
1056 }
1057
1058 vm_object_lock(object);
1059
1060 /*
1061 * Copy the attributes
1062 */
1063 assert(!object->internal);
1064 object_became_ready = !object->pager_ready;
1065 object->copy_strategy = copy_strategy;
1066 object->can_persist = may_cache;
1067
1068 /*
1069 * Wake up anyone waiting for the ready attribute
1070 * to become asserted.
1071 */
1072
1073 if (object_became_ready) {
1074 object->pager_ready = TRUE;
1075 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1076 }
1077
1078 vm_object_unlock(object);
1079
1080 return KERN_SUCCESS;
1081 }
1082
1083
1084 /*
1085 * Set the memory object attribute as provided.
1086 *
1087 * XXX This routine cannot be completed until the vm_msync, clean
1088 * in place, and cluster work is completed. See ifdef notyet
1089 * below and note that vm_object_set_attributes_common()
1090 * may have to be expanded.
1091 */
1092 kern_return_t
memory_object_change_attributes(memory_object_control_t control,memory_object_flavor_t flavor,memory_object_info_t attributes,mach_msg_type_number_t count)1093 memory_object_change_attributes(
1094 memory_object_control_t control,
1095 memory_object_flavor_t flavor,
1096 memory_object_info_t attributes,
1097 mach_msg_type_number_t count)
1098 {
1099 vm_object_t object;
1100 kern_return_t result = KERN_SUCCESS;
1101 boolean_t may_cache;
1102 boolean_t invalidate;
1103 memory_object_copy_strategy_t copy_strategy;
1104
1105 object = memory_object_control_to_vm_object(control);
1106 if (object == VM_OBJECT_NULL) {
1107 return KERN_INVALID_ARGUMENT;
1108 }
1109
1110 vm_object_lock(object);
1111
1112 may_cache = object->can_persist;
1113 copy_strategy = object->copy_strategy;
1114 #if notyet
1115 invalidate = object->invalidate;
1116 #endif
1117 vm_object_unlock(object);
1118
1119 switch (flavor) {
1120 case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1121 {
1122 old_memory_object_behave_info_t behave;
1123
1124 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1125 result = KERN_INVALID_ARGUMENT;
1126 break;
1127 }
1128
1129 behave = (old_memory_object_behave_info_t) attributes;
1130
1131 invalidate = behave->invalidate;
1132 copy_strategy = behave->copy_strategy;
1133
1134 break;
1135 }
1136
1137 case MEMORY_OBJECT_BEHAVIOR_INFO:
1138 {
1139 memory_object_behave_info_t behave;
1140
1141 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1142 result = KERN_INVALID_ARGUMENT;
1143 break;
1144 }
1145
1146 behave = (memory_object_behave_info_t) attributes;
1147
1148 invalidate = behave->invalidate;
1149 copy_strategy = behave->copy_strategy;
1150 break;
1151 }
1152
1153 case MEMORY_OBJECT_PERFORMANCE_INFO:
1154 {
1155 memory_object_perf_info_t perf;
1156
1157 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1158 result = KERN_INVALID_ARGUMENT;
1159 break;
1160 }
1161
1162 perf = (memory_object_perf_info_t) attributes;
1163
1164 may_cache = perf->may_cache;
1165
1166 break;
1167 }
1168
1169 case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1170 {
1171 old_memory_object_attr_info_t attr;
1172
1173 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1174 result = KERN_INVALID_ARGUMENT;
1175 break;
1176 }
1177
1178 attr = (old_memory_object_attr_info_t) attributes;
1179
1180 may_cache = attr->may_cache;
1181 copy_strategy = attr->copy_strategy;
1182
1183 break;
1184 }
1185
1186 case MEMORY_OBJECT_ATTRIBUTE_INFO:
1187 {
1188 memory_object_attr_info_t attr;
1189
1190 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1191 result = KERN_INVALID_ARGUMENT;
1192 break;
1193 }
1194
1195 attr = (memory_object_attr_info_t) attributes;
1196
1197 copy_strategy = attr->copy_strategy;
1198 may_cache = attr->may_cache_object;
1199
1200 break;
1201 }
1202
1203 default:
1204 result = KERN_INVALID_ARGUMENT;
1205 break;
1206 }
1207
1208 if (result != KERN_SUCCESS) {
1209 return result;
1210 }
1211
1212 if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1213 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1214 }
1215
1216 /*
1217 * XXX may_cache may become a tri-valued variable to handle
1218 * XXX uncache if not in use.
1219 */
1220 return vm_object_set_attributes_common(object,
1221 may_cache,
1222 copy_strategy);
1223 }
1224
1225 kern_return_t
memory_object_iopl_request(ipc_port_t port,memory_object_offset_t offset,upl_size_t * upl_size,upl_t * upl_ptr,upl_page_info_array_t user_page_list,unsigned int * page_list_count,upl_control_flags_t * flags,vm_tag_t tag)1226 memory_object_iopl_request(
1227 ipc_port_t port,
1228 memory_object_offset_t offset,
1229 upl_size_t *upl_size,
1230 upl_t *upl_ptr,
1231 upl_page_info_array_t user_page_list,
1232 unsigned int *page_list_count,
1233 upl_control_flags_t *flags,
1234 vm_tag_t tag)
1235 {
1236 vm_object_t object;
1237 kern_return_t ret;
1238 upl_control_flags_t caller_flags;
1239 vm_named_entry_t named_entry;
1240
1241 caller_flags = *flags;
1242
1243 if (caller_flags & ~UPL_VALID_FLAGS) {
1244 /*
1245 * For forward compatibility's sake,
1246 * reject any unknown flag.
1247 */
1248 return KERN_INVALID_VALUE;
1249 }
1250
1251 named_entry = mach_memory_entry_from_port(port);
1252 if (named_entry != NULL) {
1253 /* a few checks to make sure user is obeying rules */
1254 if (*upl_size == 0) {
1255 if (offset >= named_entry->size) {
1256 return KERN_INVALID_RIGHT;
1257 }
1258 *upl_size = (upl_size_t)(named_entry->size - offset);
1259 if (*upl_size != named_entry->size - offset) {
1260 return KERN_INVALID_ARGUMENT;
1261 }
1262 }
1263 if (caller_flags & UPL_COPYOUT_FROM) {
1264 if ((named_entry->protection & VM_PROT_READ)
1265 != VM_PROT_READ) {
1266 return KERN_INVALID_RIGHT;
1267 }
1268 } else {
1269 if ((named_entry->protection &
1270 (VM_PROT_READ | VM_PROT_WRITE))
1271 != (VM_PROT_READ | VM_PROT_WRITE)) {
1272 return KERN_INVALID_RIGHT;
1273 }
1274 }
1275 if (named_entry->size < (offset + *upl_size)) {
1276 return KERN_INVALID_ARGUMENT;
1277 }
1278
1279 /* the callers parameter offset is defined to be the */
1280 /* offset from beginning of named entry offset in object */
1281 offset = offset + named_entry->offset;
1282 offset += named_entry->data_offset;
1283
1284 if (named_entry->is_sub_map ||
1285 named_entry->is_copy) {
1286 return KERN_INVALID_ARGUMENT;
1287 }
1288 if (!named_entry->is_object) {
1289 return KERN_INVALID_ARGUMENT;
1290 }
1291
1292 named_entry_lock(named_entry);
1293
1294 object = vm_named_entry_to_vm_object(named_entry);
1295 assert(object != VM_OBJECT_NULL);
1296 vm_object_reference(object);
1297 named_entry_unlock(named_entry);
1298 } else {
1299 return KERN_INVALID_ARGUMENT;
1300 }
1301 if (object == VM_OBJECT_NULL) {
1302 return KERN_INVALID_ARGUMENT;
1303 }
1304
1305 if (!object->private) {
1306 if (object->phys_contiguous) {
1307 *flags = UPL_PHYS_CONTIG;
1308 } else {
1309 *flags = 0;
1310 }
1311 } else {
1312 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1313 }
1314
1315 ret = vm_object_iopl_request(object,
1316 offset,
1317 *upl_size,
1318 upl_ptr,
1319 user_page_list,
1320 page_list_count,
1321 caller_flags,
1322 tag);
1323 vm_object_deallocate(object);
1324 return ret;
1325 }
1326
1327 /*
1328 * Routine: memory_object_upl_request [interface]
1329 * Purpose:
1330 * Cause the population of a portion of a vm_object.
1331 * Depending on the nature of the request, the pages
1332 * returned may be contain valid data or be uninitialized.
1333 *
1334 */
1335
1336 kern_return_t
memory_object_upl_request(memory_object_control_t control,memory_object_offset_t offset,upl_size_t size,upl_t * upl_ptr,upl_page_info_array_t user_page_list,unsigned int * page_list_count,int cntrl_flags,int tag)1337 memory_object_upl_request(
1338 memory_object_control_t control,
1339 memory_object_offset_t offset,
1340 upl_size_t size,
1341 upl_t *upl_ptr,
1342 upl_page_info_array_t user_page_list,
1343 unsigned int *page_list_count,
1344 int cntrl_flags,
1345 int tag)
1346 {
1347 vm_object_t object;
1348 vm_tag_t vmtag = (vm_tag_t)tag;
1349 assert(vmtag == tag);
1350
1351 object = memory_object_control_to_vm_object(control);
1352 if (object == VM_OBJECT_NULL) {
1353 return KERN_TERMINATED;
1354 }
1355
1356 return vm_object_upl_request(object,
1357 offset,
1358 size,
1359 upl_ptr,
1360 user_page_list,
1361 page_list_count,
1362 (upl_control_flags_t)(unsigned int) cntrl_flags,
1363 vmtag);
1364 }
1365
1366
1367 kern_return_t
memory_object_cluster_size(memory_object_control_t control,memory_object_offset_t * start,vm_size_t * length,uint32_t * io_streaming,memory_object_fault_info_t mo_fault_info)1368 memory_object_cluster_size(
1369 memory_object_control_t control,
1370 memory_object_offset_t *start,
1371 vm_size_t *length,
1372 uint32_t *io_streaming,
1373 memory_object_fault_info_t mo_fault_info)
1374 {
1375 vm_object_t object;
1376 vm_object_fault_info_t fault_info;
1377
1378 object = memory_object_control_to_vm_object(control);
1379
1380 if (object == VM_OBJECT_NULL || object->paging_offset > *start) {
1381 return KERN_INVALID_ARGUMENT;
1382 }
1383
1384 *start -= object->paging_offset;
1385
1386 fault_info = (vm_object_fault_info_t)(uintptr_t) mo_fault_info;
1387 vm_object_cluster_size(object,
1388 (vm_object_offset_t *)start,
1389 length,
1390 fault_info,
1391 io_streaming);
1392
1393 *start += object->paging_offset;
1394
1395 return KERN_SUCCESS;
1396 }
1397
1398
1399 /*
1400 * Routine: host_default_memory_manager [interface]
1401 * Purpose:
1402 * set/get the default memory manager port and default cluster
1403 * size.
1404 *
1405 * If successful, consumes the supplied naked send right.
1406 */
1407 kern_return_t
host_default_memory_manager(host_priv_t host_priv,memory_object_default_t * default_manager,__unused memory_object_cluster_size_t cluster_size)1408 host_default_memory_manager(
1409 host_priv_t host_priv,
1410 memory_object_default_t *default_manager,
1411 __unused memory_object_cluster_size_t cluster_size)
1412 {
1413 memory_object_default_t current_manager;
1414 memory_object_default_t new_manager;
1415 memory_object_default_t returned_manager;
1416 kern_return_t result = KERN_SUCCESS;
1417
1418 if (host_priv == HOST_PRIV_NULL) {
1419 return KERN_INVALID_HOST;
1420 }
1421
1422 new_manager = *default_manager;
1423 lck_mtx_lock(&memory_manager_default_lock);
1424 current_manager = memory_manager_default;
1425 returned_manager = MEMORY_OBJECT_DEFAULT_NULL;
1426
1427 if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1428 /*
1429 * Retrieve the current value.
1430 */
1431 returned_manager = ipc_port_make_send_mqueue(current_manager);
1432 } else {
1433 /*
1434 * Only allow the kernel to change the value.
1435 */
1436 extern task_t kernel_task;
1437 if (current_task() != kernel_task) {
1438 result = KERN_NO_ACCESS;
1439 goto out;
1440 }
1441
1442 /*
1443 * If this is the first non-null manager, start
1444 * up the internal pager support.
1445 */
1446 if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1447 result = vm_pageout_internal_start();
1448 if (result != KERN_SUCCESS) {
1449 goto out;
1450 }
1451 }
1452
1453 /*
1454 * Retrieve the current value,
1455 * and replace it with the supplied value.
1456 * We return the old reference to the caller
1457 * but we have to take a reference on the new
1458 * one.
1459 */
1460 returned_manager = current_manager;
1461 memory_manager_default = ipc_port_make_send_mqueue(new_manager);
1462
1463 /*
1464 * In case anyone's been waiting for a memory
1465 * manager to be established, wake them up.
1466 */
1467
1468 thread_wakeup((event_t) &memory_manager_default);
1469
1470 /*
1471 * Now that we have a default pager for anonymous memory,
1472 * reactivate all the throttled pages (i.e. dirty pages with
1473 * no pager).
1474 */
1475 if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1476 vm_page_reactivate_all_throttled();
1477 }
1478 }
1479 out:
1480 lck_mtx_unlock(&memory_manager_default_lock);
1481
1482 *default_manager = returned_manager;
1483 return result;
1484 }
1485
1486 /*
1487 * Routine: memory_manager_default_reference
1488 * Purpose:
1489 * Returns a naked send right for the default
1490 * memory manager. The returned right is always
1491 * valid (not IP_NULL or IP_DEAD).
1492 */
1493
1494 __private_extern__ memory_object_default_t
memory_manager_default_reference(void)1495 memory_manager_default_reference(void)
1496 {
1497 memory_object_default_t current_manager;
1498
1499 lck_mtx_lock(&memory_manager_default_lock);
1500 current_manager = memory_manager_default;
1501 while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1502 wait_result_t res;
1503
1504 res = lck_mtx_sleep(&memory_manager_default_lock,
1505 LCK_SLEEP_DEFAULT,
1506 (event_t) &memory_manager_default,
1507 THREAD_UNINT);
1508 assert(res == THREAD_AWAKENED);
1509 current_manager = memory_manager_default;
1510 }
1511 current_manager = ipc_port_make_send_mqueue(current_manager);
1512 lck_mtx_unlock(&memory_manager_default_lock);
1513
1514 return current_manager;
1515 }
1516
1517 /*
1518 * Routine: memory_manager_default_check
1519 *
1520 * Purpose:
1521 * Check whether a default memory manager has been set
1522 * up yet, or not. Returns KERN_SUCCESS if dmm exists,
1523 * and KERN_FAILURE if dmm does not exist.
1524 *
1525 * If there is no default memory manager, log an error,
1526 * but only the first time.
1527 *
1528 */
1529 __private_extern__ kern_return_t
memory_manager_default_check(void)1530 memory_manager_default_check(void)
1531 {
1532 memory_object_default_t current;
1533
1534 lck_mtx_lock(&memory_manager_default_lock);
1535 current = memory_manager_default;
1536 if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1537 static boolean_t logged; /* initialized to 0 */
1538 boolean_t complain = !logged;
1539 logged = TRUE;
1540 lck_mtx_unlock(&memory_manager_default_lock);
1541 if (complain) {
1542 printf("Warning: No default memory manager\n");
1543 }
1544 return KERN_FAILURE;
1545 } else {
1546 lck_mtx_unlock(&memory_manager_default_lock);
1547 return KERN_SUCCESS;
1548 }
1549 }
1550
1551 /* Allow manipulation of individual page state. This is actually part of */
1552 /* the UPL regimen but takes place on the object rather than on a UPL */
1553
1554 kern_return_t
memory_object_page_op(memory_object_control_t control,memory_object_offset_t offset,int ops,ppnum_t * phys_entry,int * flags)1555 memory_object_page_op(
1556 memory_object_control_t control,
1557 memory_object_offset_t offset,
1558 int ops,
1559 ppnum_t *phys_entry,
1560 int *flags)
1561 {
1562 vm_object_t object;
1563
1564 object = memory_object_control_to_vm_object(control);
1565 if (object == VM_OBJECT_NULL) {
1566 return KERN_INVALID_ARGUMENT;
1567 }
1568
1569 return vm_object_page_op(object, offset, ops, phys_entry, flags);
1570 }
1571
1572 /*
1573 * memory_object_range_op offers performance enhancement over
1574 * memory_object_page_op for page_op functions which do not require page
1575 * level state to be returned from the call. Page_op was created to provide
1576 * a low-cost alternative to page manipulation via UPLs when only a single
1577 * page was involved. The range_op call establishes the ability in the _op
1578 * family of functions to work on multiple pages where the lack of page level
1579 * state handling allows the caller to avoid the overhead of the upl structures.
1580 */
1581
1582 kern_return_t
memory_object_range_op(memory_object_control_t control,memory_object_offset_t offset_beg,memory_object_offset_t offset_end,int ops,int * range)1583 memory_object_range_op(
1584 memory_object_control_t control,
1585 memory_object_offset_t offset_beg,
1586 memory_object_offset_t offset_end,
1587 int ops,
1588 int *range)
1589 {
1590 vm_object_t object;
1591
1592 object = memory_object_control_to_vm_object(control);
1593 if (object == VM_OBJECT_NULL) {
1594 return KERN_INVALID_ARGUMENT;
1595 }
1596
1597 return vm_object_range_op(object,
1598 offset_beg,
1599 offset_end,
1600 ops,
1601 (uint32_t *) range);
1602 }
1603
1604
1605 void
memory_object_mark_used(memory_object_control_t control)1606 memory_object_mark_used(
1607 memory_object_control_t control)
1608 {
1609 vm_object_t object;
1610
1611 if (control == NULL) {
1612 return;
1613 }
1614
1615 object = memory_object_control_to_vm_object(control);
1616
1617 if (object != VM_OBJECT_NULL) {
1618 vm_object_cache_remove(object);
1619 }
1620 }
1621
1622
1623 void
memory_object_mark_unused(memory_object_control_t control,__unused boolean_t rage)1624 memory_object_mark_unused(
1625 memory_object_control_t control,
1626 __unused boolean_t rage)
1627 {
1628 vm_object_t object;
1629
1630 if (control == NULL) {
1631 return;
1632 }
1633
1634 object = memory_object_control_to_vm_object(control);
1635
1636 if (object != VM_OBJECT_NULL) {
1637 vm_object_cache_add(object);
1638 }
1639 }
1640
1641 void
memory_object_mark_io_tracking(memory_object_control_t control)1642 memory_object_mark_io_tracking(
1643 memory_object_control_t control)
1644 {
1645 vm_object_t object;
1646
1647 if (control == NULL) {
1648 return;
1649 }
1650 object = memory_object_control_to_vm_object(control);
1651
1652 if (object != VM_OBJECT_NULL) {
1653 vm_object_lock(object);
1654 object->io_tracking = TRUE;
1655 vm_object_unlock(object);
1656 }
1657 }
1658
1659 void
memory_object_mark_trusted(memory_object_control_t control)1660 memory_object_mark_trusted(
1661 memory_object_control_t control)
1662 {
1663 vm_object_t object;
1664
1665 if (control == NULL) {
1666 return;
1667 }
1668 object = memory_object_control_to_vm_object(control);
1669
1670 if (object != VM_OBJECT_NULL) {
1671 vm_object_lock(object);
1672 object->pager_trusted = TRUE;
1673 vm_object_unlock(object);
1674 }
1675 }
1676
1677 #if MACH_ASSERT
1678 memory_object_control_t fbdp_moc = NULL;
1679 vm_object_t fbdp_object = NULL;
1680 void
memory_object_mark_for_fbdp(memory_object_control_t control)1681 memory_object_mark_for_fbdp(
1682 memory_object_control_t control)
1683 {
1684 vm_object_t object;
1685
1686 if (control == NULL) {
1687 return;
1688 }
1689 object = memory_object_control_to_vm_object(control);
1690
1691 if (object != VM_OBJECT_NULL) {
1692 fbdp_moc = control;
1693 fbdp_object = object;
1694 }
1695 }
1696 #endif /* MACH_ASSERT */
1697
1698 #if CONFIG_SECLUDED_MEMORY
1699 void
memory_object_mark_eligible_for_secluded(memory_object_control_t control,boolean_t eligible_for_secluded)1700 memory_object_mark_eligible_for_secluded(
1701 memory_object_control_t control,
1702 boolean_t eligible_for_secluded)
1703 {
1704 vm_object_t object;
1705
1706 if (control == NULL) {
1707 return;
1708 }
1709 object = memory_object_control_to_vm_object(control);
1710
1711 if (object == VM_OBJECT_NULL) {
1712 return;
1713 }
1714
1715 vm_object_lock(object);
1716 if (eligible_for_secluded &&
1717 secluded_for_filecache && /* global boot-arg */
1718 !object->eligible_for_secluded) {
1719 object->eligible_for_secluded = TRUE;
1720 vm_page_secluded.eligible_for_secluded += object->resident_page_count;
1721 } else if (!eligible_for_secluded &&
1722 object->eligible_for_secluded) {
1723 object->eligible_for_secluded = FALSE;
1724 vm_page_secluded.eligible_for_secluded -= object->resident_page_count;
1725 if (object->resident_page_count) {
1726 /* XXX FBDP TODO: flush pages from secluded queue? */
1727 // printf("FBDP TODO: flush %d pages from %p from secluded queue\n", object->resident_page_count, object);
1728 }
1729 }
1730 vm_object_unlock(object);
1731 }
1732 #endif /* CONFIG_SECLUDED_MEMORY */
1733
1734 void
memory_object_mark_for_realtime(memory_object_control_t control,bool for_realtime)1735 memory_object_mark_for_realtime(
1736 memory_object_control_t control,
1737 bool for_realtime)
1738 {
1739 vm_object_t object;
1740
1741 if (control == NULL) {
1742 return;
1743 }
1744 object = memory_object_control_to_vm_object(control);
1745
1746 if (object == VM_OBJECT_NULL) {
1747 return;
1748 }
1749
1750 vm_object_lock(object);
1751 object->for_realtime = for_realtime;
1752 vm_object_unlock(object);
1753 }
1754
1755 kern_return_t
memory_object_pages_resident(memory_object_control_t control,boolean_t * has_pages_resident)1756 memory_object_pages_resident(
1757 memory_object_control_t control,
1758 boolean_t * has_pages_resident)
1759 {
1760 vm_object_t object;
1761
1762 *has_pages_resident = FALSE;
1763
1764 object = memory_object_control_to_vm_object(control);
1765 if (object == VM_OBJECT_NULL) {
1766 return KERN_INVALID_ARGUMENT;
1767 }
1768
1769 if (object->resident_page_count) {
1770 *has_pages_resident = TRUE;
1771 }
1772
1773 return KERN_SUCCESS;
1774 }
1775
1776 kern_return_t
memory_object_signed(memory_object_control_t control,boolean_t is_signed)1777 memory_object_signed(
1778 memory_object_control_t control,
1779 boolean_t is_signed)
1780 {
1781 vm_object_t object;
1782
1783 object = memory_object_control_to_vm_object(control);
1784 if (object == VM_OBJECT_NULL) {
1785 return KERN_INVALID_ARGUMENT;
1786 }
1787
1788 vm_object_lock(object);
1789 object->code_signed = is_signed;
1790 vm_object_unlock(object);
1791
1792 return KERN_SUCCESS;
1793 }
1794
1795 boolean_t
memory_object_is_signed(memory_object_control_t control)1796 memory_object_is_signed(
1797 memory_object_control_t control)
1798 {
1799 boolean_t is_signed;
1800 vm_object_t object;
1801
1802 object = memory_object_control_to_vm_object(control);
1803 if (object == VM_OBJECT_NULL) {
1804 return FALSE;
1805 }
1806
1807 vm_object_lock_shared(object);
1808 is_signed = object->code_signed;
1809 vm_object_unlock(object);
1810
1811 return is_signed;
1812 }
1813
1814 boolean_t
memory_object_is_shared_cache(memory_object_control_t control)1815 memory_object_is_shared_cache(
1816 memory_object_control_t control)
1817 {
1818 vm_object_t object = VM_OBJECT_NULL;
1819
1820 object = memory_object_control_to_vm_object(control);
1821 if (object == VM_OBJECT_NULL) {
1822 return FALSE;
1823 }
1824
1825 return object->object_is_shared_cache;
1826 }
1827
1828 __private_extern__ memory_object_control_t
memory_object_control_allocate(vm_object_t object)1829 memory_object_control_allocate(
1830 vm_object_t object)
1831 {
1832 return object;
1833 }
1834
1835 __private_extern__ void
memory_object_control_collapse(memory_object_control_t * control,vm_object_t object)1836 memory_object_control_collapse(
1837 memory_object_control_t *control,
1838 vm_object_t object)
1839 {
1840 *control = object;
1841 }
1842
1843 __private_extern__ vm_object_t
memory_object_control_to_vm_object(memory_object_control_t control)1844 memory_object_control_to_vm_object(
1845 memory_object_control_t control)
1846 {
1847 return control;
1848 }
1849
1850 __private_extern__ vm_object_t
memory_object_to_vm_object(memory_object_t mem_obj)1851 memory_object_to_vm_object(
1852 memory_object_t mem_obj)
1853 {
1854 memory_object_control_t mo_control;
1855
1856 if (mem_obj == MEMORY_OBJECT_NULL) {
1857 return VM_OBJECT_NULL;
1858 }
1859 mo_control = mem_obj->mo_control;
1860 if (mo_control == NULL) {
1861 return VM_OBJECT_NULL;
1862 }
1863 return memory_object_control_to_vm_object(mo_control);
1864 }
1865
1866 void
memory_object_control_reference(__unused memory_object_control_t control)1867 memory_object_control_reference(
1868 __unused memory_object_control_t control)
1869 {
1870 return;
1871 }
1872
1873 /*
1874 * We only every issue one of these references, so kill it
1875 * when that gets released (should switch the real reference
1876 * counting in true port-less EMMI).
1877 */
1878 void
memory_object_control_deallocate(__unused memory_object_control_t control)1879 memory_object_control_deallocate(
1880 __unused memory_object_control_t control)
1881 {
1882 }
1883
1884 void
memory_object_control_disable(memory_object_control_t * control)1885 memory_object_control_disable(
1886 memory_object_control_t *control)
1887 {
1888 assert(*control != VM_OBJECT_NULL);
1889 *control = VM_OBJECT_NULL;
1890 }
1891
1892 memory_object_t
convert_port_to_memory_object(__unused mach_port_t port)1893 convert_port_to_memory_object(
1894 __unused mach_port_t port)
1895 {
1896 return MEMORY_OBJECT_NULL;
1897 }
1898
1899
1900 mach_port_t
convert_memory_object_to_port(__unused memory_object_t object)1901 convert_memory_object_to_port(
1902 __unused memory_object_t object)
1903 {
1904 return MACH_PORT_NULL;
1905 }
1906
1907
1908 /* Routine memory_object_reference */
1909 void
memory_object_reference(memory_object_t memory_object)1910 memory_object_reference(
1911 memory_object_t memory_object)
1912 {
1913 (memory_object->mo_pager_ops->memory_object_reference)(
1914 memory_object);
1915 }
1916
1917 /* Routine memory_object_deallocate */
1918 void
memory_object_deallocate(memory_object_t memory_object)1919 memory_object_deallocate(
1920 memory_object_t memory_object)
1921 {
1922 (memory_object->mo_pager_ops->memory_object_deallocate)(
1923 memory_object);
1924 }
1925
1926
1927 /* Routine memory_object_init */
1928 kern_return_t
memory_object_init(memory_object_t memory_object,memory_object_control_t memory_control,memory_object_cluster_size_t memory_object_page_size)1929 memory_object_init
1930 (
1931 memory_object_t memory_object,
1932 memory_object_control_t memory_control,
1933 memory_object_cluster_size_t memory_object_page_size
1934 )
1935 {
1936 return (memory_object->mo_pager_ops->memory_object_init)(
1937 memory_object,
1938 memory_control,
1939 memory_object_page_size);
1940 }
1941
1942 /* Routine memory_object_terminate */
1943 kern_return_t
memory_object_terminate(memory_object_t memory_object)1944 memory_object_terminate
1945 (
1946 memory_object_t memory_object
1947 )
1948 {
1949 return (memory_object->mo_pager_ops->memory_object_terminate)(
1950 memory_object);
1951 }
1952
1953 /* Routine memory_object_data_request */
1954 kern_return_t
memory_object_data_request(memory_object_t memory_object,memory_object_offset_t offset,memory_object_cluster_size_t length,vm_prot_t desired_access,memory_object_fault_info_t fault_info)1955 memory_object_data_request
1956 (
1957 memory_object_t memory_object,
1958 memory_object_offset_t offset,
1959 memory_object_cluster_size_t length,
1960 vm_prot_t desired_access,
1961 memory_object_fault_info_t fault_info
1962 )
1963 {
1964 return (memory_object->mo_pager_ops->memory_object_data_request)(
1965 memory_object,
1966 offset,
1967 length,
1968 desired_access,
1969 fault_info);
1970 }
1971
1972 /* Routine memory_object_data_return */
1973 kern_return_t
memory_object_data_return(memory_object_t memory_object,memory_object_offset_t offset,memory_object_cluster_size_t size,memory_object_offset_t * resid_offset,int * io_error,boolean_t dirty,boolean_t kernel_copy,int upl_flags)1974 memory_object_data_return
1975 (
1976 memory_object_t memory_object,
1977 memory_object_offset_t offset,
1978 memory_object_cluster_size_t size,
1979 memory_object_offset_t *resid_offset,
1980 int *io_error,
1981 boolean_t dirty,
1982 boolean_t kernel_copy,
1983 int upl_flags
1984 )
1985 {
1986 return (memory_object->mo_pager_ops->memory_object_data_return)(
1987 memory_object,
1988 offset,
1989 size,
1990 resid_offset,
1991 io_error,
1992 dirty,
1993 kernel_copy,
1994 upl_flags);
1995 }
1996
1997 /* Routine memory_object_data_initialize */
1998 kern_return_t
memory_object_data_initialize(memory_object_t memory_object,memory_object_offset_t offset,memory_object_cluster_size_t size)1999 memory_object_data_initialize
2000 (
2001 memory_object_t memory_object,
2002 memory_object_offset_t offset,
2003 memory_object_cluster_size_t size
2004 )
2005 {
2006 return (memory_object->mo_pager_ops->memory_object_data_initialize)(
2007 memory_object,
2008 offset,
2009 size);
2010 }
2011
2012 /*
2013 * memory_object_map() is called by VM (in vm_map_enter() and its variants)
2014 * each time a "named" VM object gets mapped directly or indirectly
2015 * (copy-on-write mapping). A "named" VM object has an extra reference held
2016 * by the pager to keep it alive until the pager decides that the
2017 * memory object (and its VM object) can be reclaimed.
2018 * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
2019 * the mappings of that memory object have been removed.
2020 *
2021 * For a given VM object, calls to memory_object_map() and memory_object_unmap()
2022 * are serialized (through object->mapping_in_progress), to ensure that the
2023 * pager gets a consistent view of the mapping status of the memory object.
2024 *
2025 * This allows the pager to keep track of how many times a memory object
2026 * has been mapped and with which protections, to decide when it can be
2027 * reclaimed.
2028 */
2029
2030 /* Routine memory_object_map */
2031 kern_return_t
memory_object_map(memory_object_t memory_object,vm_prot_t prot)2032 memory_object_map
2033 (
2034 memory_object_t memory_object,
2035 vm_prot_t prot
2036 )
2037 {
2038 return (memory_object->mo_pager_ops->memory_object_map)(
2039 memory_object,
2040 prot);
2041 }
2042
2043 /* Routine memory_object_last_unmap */
2044 kern_return_t
memory_object_last_unmap(memory_object_t memory_object)2045 memory_object_last_unmap
2046 (
2047 memory_object_t memory_object
2048 )
2049 {
2050 return (memory_object->mo_pager_ops->memory_object_last_unmap)(
2051 memory_object);
2052 }
2053
2054 boolean_t
memory_object_backing_object(memory_object_t memory_object,memory_object_offset_t offset,vm_object_t * backing_object,vm_object_offset_t * backing_offset)2055 memory_object_backing_object
2056 (
2057 memory_object_t memory_object,
2058 memory_object_offset_t offset,
2059 vm_object_t *backing_object,
2060 vm_object_offset_t *backing_offset)
2061 {
2062 if (memory_object->mo_pager_ops->memory_object_backing_object == NULL) {
2063 return FALSE;
2064 }
2065 return (memory_object->mo_pager_ops->memory_object_backing_object)(
2066 memory_object,
2067 offset,
2068 backing_object,
2069 backing_offset);
2070 }
2071
2072 upl_t
convert_port_to_upl(__unused ipc_port_t port)2073 convert_port_to_upl(
2074 __unused ipc_port_t port)
2075 {
2076 return NULL;
2077 }
2078
2079 mach_port_t
convert_upl_to_port(__unused upl_t upl)2080 convert_upl_to_port(
2081 __unused upl_t upl)
2082 {
2083 return MACH_PORT_NULL;
2084 }
2085