xref: /xnu-12377.81.4/osfmk/vm/vm_swapfile_pager.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2008-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/kern_return.h>
30 #include <mach/memory_object_control.h>
31 #include <mach/upl.h>
32 
33 #include <kern/ipc_kobject.h>
34 #include <kern/kalloc.h>
35 #include <kern/queue.h>
36 
37 #include <vm/memory_object_internal.h>
38 #include <vm/vm_kern_xnu.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_pageout_xnu.h>
41 #include <vm/vm_protos.h>
42 #include <vm/vm_ubc.h>
43 
44 
45 /*
46  * APPLE SWAPFILE MEMORY PAGER
47  *
48  * This external memory manager (EMM) handles mappings of the swap files.
49  * Swap files are not regular files and are used solely to store contents of
50  * anonymous memory mappings while not resident in memory.
51  * There's no valid reason to map a swap file.  This just puts extra burden
52  * on the system, is potentially a security issue and is not reliable since
53  * the contents can change at any time with pageout operations.
54  * Here are some of the issues with mapping a swap file.
55  * * PERFORMANCE:
56  *   Each page in the swap file belong to an anonymous memory object. Mapping
57  *   the swap file makes those pages also accessible via a vnode memory
58  *   object and each page can now be resident twice.
59  * * SECURITY:
60  *   Mapping a swap file allows access to other processes' memory.  Swap files
61  *   are only accessible by the "root" super-user, who can already access any
62  *   process's memory, so this is not a real issue but if permissions on the
63  *   swap file got changed, it could become one.
64  *   Swap files are not "zero-filled" on creation, so until their contents are
65  *   overwritten with pageout operations, they still contain whatever was on
66  *   the disk blocks they were allocated.  The "super-user" could see the
67  *   contents of free blocks anyway, so this is not a new security issue but
68  *   it may be perceive as one.
69  *
70  * We can't legitimately prevent a user process with appropriate privileges
71  * from mapping a swap file, but we can prevent it from accessing its actual
72  * contents.
73  * This pager mostly handles page-in request (from memory_object_data_request())
74  * for swap file mappings and just returns bogus data.
75  * Pageouts are not handled, so mmap() has to make sure it does not allow
76  * writable (i.e. MAP_SHARED and PROT_WRITE) mappings of swap files.
77  */
78 
79 /* forward declarations */
80 void swapfile_pager_reference(memory_object_t mem_obj);
81 void swapfile_pager_deallocate(memory_object_t mem_obj);
82 kern_return_t swapfile_pager_init(memory_object_t mem_obj,
83     memory_object_control_t control,
84     memory_object_cluster_size_t pg_size);
85 kern_return_t swapfile_pager_terminate(memory_object_t mem_obj);
86 kern_return_t swapfile_pager_data_request(memory_object_t mem_obj,
87     memory_object_offset_t offset,
88     memory_object_cluster_size_t length,
89     vm_prot_t protection_required,
90     memory_object_fault_info_t fault_info);
91 kern_return_t swapfile_pager_data_return(memory_object_t mem_obj,
92     memory_object_offset_t offset,
93     memory_object_cluster_size_t      data_cnt,
94     memory_object_offset_t *resid_offset,
95     int *io_error,
96     boolean_t dirty,
97     boolean_t kernel_copy,
98     int upl_flags);
99 kern_return_t swapfile_pager_data_initialize(memory_object_t mem_obj,
100     memory_object_offset_t offset,
101     memory_object_cluster_size_t data_cnt);
102 kern_return_t swapfile_pager_map(memory_object_t mem_obj,
103     vm_prot_t prot);
104 kern_return_t swapfile_pager_last_unmap(memory_object_t mem_obj);
105 
106 /*
107  * Vector of VM operations for this EMM.
108  * These routines are invoked by VM via the memory_object_*() interfaces.
109  */
110 const struct memory_object_pager_ops swapfile_pager_ops = {
111 	.memory_object_reference = swapfile_pager_reference,
112 	.memory_object_deallocate = swapfile_pager_deallocate,
113 	.memory_object_init = swapfile_pager_init,
114 	.memory_object_terminate = swapfile_pager_terminate,
115 	.memory_object_data_request = swapfile_pager_data_request,
116 	.memory_object_data_return = swapfile_pager_data_return,
117 	.memory_object_data_initialize = swapfile_pager_data_initialize,
118 	.memory_object_map = swapfile_pager_map,
119 	.memory_object_last_unmap = swapfile_pager_last_unmap,
120 	.memory_object_backing_object = NULL,
121 	.memory_object_pager_name = "swapfile pager"
122 };
123 
124 /*
125  * The "swapfile_pager" describes a memory object backed by
126  * the "swapfile" EMM.
127  */
128 typedef struct swapfile_pager {
129 	/* mandatory generic header */
130 	struct memory_object    swp_pgr_hdr;
131 
132 	/* pager-specific data */
133 	queue_chain_t           pager_queue;    /* next & prev pagers */
134 #if MEMORY_OBJECT_HAS_REFCOUNT
135 #define swp_pgr_hdr_ref         swp_pgr_hdr.mo_ref
136 #else
137 	os_ref_atomic_t         swp_pgr_hdr_ref;      /* reference count */
138 #endif
139 	bool                    is_ready;       /* is this pager ready ? */
140 	bool                    is_mapped;      /* is this pager mapped ? */
141 	struct vnode            *swapfile_vnode;/* the swapfile's vnode */
142 } *swapfile_pager_t;
143 #define SWAPFILE_PAGER_NULL     ((swapfile_pager_t) NULL)
144 
145 /*
146  * List of memory objects managed by this EMM.
147  * The list is protected by the "swapfile_pager_lock" lock.
148  */
149 int swapfile_pager_count = 0;           /* number of pagers */
150 queue_head_t swapfile_pager_queue = QUEUE_HEAD_INITIALIZER(swapfile_pager_queue);
151 LCK_GRP_DECLARE(swapfile_pager_lck_grp, "swapfile pager");
152 LCK_MTX_DECLARE(swapfile_pager_lock, &swapfile_pager_lck_grp);
153 
154 /*
155  * Statistics & counters.
156  */
157 int swapfile_pager_count_max = 0;
158 
159 /* internal prototypes */
160 swapfile_pager_t swapfile_pager_create(struct vnode *vp);
161 swapfile_pager_t swapfile_pager_lookup(memory_object_t mem_obj);
162 void swapfile_pager_dequeue(swapfile_pager_t pager);
163 void swapfile_pager_deallocate_internal(swapfile_pager_t pager,
164     boolean_t locked);
165 void swapfile_pager_terminate_internal(swapfile_pager_t pager);
166 
167 
168 #if DEBUG
169 int swapfile_pagerdebug = 0;
170 #define PAGER_ALL               0xffffffff
171 #define PAGER_INIT              0x00000001
172 #define PAGER_PAGEIN            0x00000002
173 
174 #define PAGER_DEBUG(LEVEL, A)                                           \
175 	MACRO_BEGIN                                                     \
176 	if ((swapfile_pagerdebug & LEVEL)==LEVEL) {             \
177 	        printf A;                                               \
178 	}                                                               \
179 	MACRO_END
180 #else
181 #define PAGER_DEBUG(LEVEL, A)
182 #endif
183 
184 
185 /*
186  * swapfile_pager_init()
187  *
188  * Initialize the memory object and makes it ready to be used and mapped.
189  */
190 kern_return_t
swapfile_pager_init(memory_object_t mem_obj,memory_object_control_t control,__unused memory_object_cluster_size_t pg_size)191 swapfile_pager_init(
192 	memory_object_t         mem_obj,
193 	memory_object_control_t control,
194 #if !DEBUG
195 	__unused
196 #endif
197 	memory_object_cluster_size_t pg_size)
198 {
199 	swapfile_pager_t        pager;
200 	kern_return_t           kr;
201 	memory_object_attr_info_data_t  attributes;
202 
203 	PAGER_DEBUG(PAGER_ALL,
204 	    ("swapfile_pager_init: %p, %p, %x\n",
205 	    mem_obj, control, pg_size));
206 
207 	if (control == MEMORY_OBJECT_CONTROL_NULL) {
208 		return KERN_INVALID_ARGUMENT;
209 	}
210 
211 	pager = swapfile_pager_lookup(mem_obj);
212 
213 	memory_object_control_reference(control);
214 
215 	pager->swp_pgr_hdr.mo_control = control;
216 
217 	attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
218 	attributes.cluster_size = (1 << (PAGE_SHIFT));
219 	attributes.may_cache_object = FALSE;
220 	attributes.temporary = TRUE;
221 
222 	kr = memory_object_change_attributes(
223 		control,
224 		MEMORY_OBJECT_ATTRIBUTE_INFO,
225 		(memory_object_info_t) &attributes,
226 		MEMORY_OBJECT_ATTR_INFO_COUNT);
227 	if (kr != KERN_SUCCESS) {
228 		panic("swapfile_pager_init: "
229 		    "memory_object_change_attributes() failed");
230 	}
231 
232 	return KERN_SUCCESS;
233 }
234 
235 /*
236  * swapfile_data_return()
237  *
238  * Handles page-out requests from VM.  This should never happen since
239  * the pages provided by this EMM are not supposed to be dirty or dirtied
240  * and VM should simply discard the contents and reclaim the pages if it
241  * needs to.
242  */
243 kern_return_t
swapfile_pager_data_return(__unused memory_object_t mem_obj,__unused memory_object_offset_t offset,__unused memory_object_cluster_size_t data_cnt,__unused memory_object_offset_t * resid_offset,__unused int * io_error,__unused boolean_t dirty,__unused boolean_t kernel_copy,__unused int upl_flags)244 swapfile_pager_data_return(
245 	__unused memory_object_t        mem_obj,
246 	__unused memory_object_offset_t offset,
247 	__unused memory_object_cluster_size_t           data_cnt,
248 	__unused memory_object_offset_t *resid_offset,
249 	__unused int                    *io_error,
250 	__unused boolean_t              dirty,
251 	__unused boolean_t              kernel_copy,
252 	__unused int                    upl_flags)
253 {
254 	panic("swapfile_pager_data_return: should never get called");
255 	return KERN_FAILURE;
256 }
257 
258 kern_return_t
swapfile_pager_data_initialize(__unused memory_object_t mem_obj,__unused memory_object_offset_t offset,__unused memory_object_cluster_size_t data_cnt)259 swapfile_pager_data_initialize(
260 	__unused memory_object_t        mem_obj,
261 	__unused memory_object_offset_t offset,
262 	__unused memory_object_cluster_size_t           data_cnt)
263 {
264 	panic("swapfile_pager_data_initialize: should never get called");
265 	return KERN_FAILURE;
266 }
267 
268 /*
269  * swapfile_pager_data_request()
270  *
271  * Handles page-in requests from VM.
272  */
273 kern_return_t
swapfile_pager_data_request(memory_object_t mem_obj,memory_object_offset_t offset,memory_object_cluster_size_t length,__unused vm_prot_t protection_required,__unused memory_object_fault_info_t mo_fault_info)274 swapfile_pager_data_request(
275 	memory_object_t         mem_obj,
276 	memory_object_offset_t  offset,
277 	memory_object_cluster_size_t            length,
278 #if !DEBUG
279 	__unused
280 #endif
281 	vm_prot_t               protection_required,
282 	__unused memory_object_fault_info_t mo_fault_info)
283 {
284 	swapfile_pager_t        pager;
285 	memory_object_control_t mo_control;
286 	upl_t                   upl;
287 	int                     upl_flags;
288 	upl_size_t              upl_size;
289 	upl_page_info_t         *upl_pl = NULL;
290 	unsigned int            pl_count;
291 	vm_object_t             dst_object;
292 	kern_return_t           kr, retval;
293 	vm_offset_t             kernel_mapping;
294 	char                    *dst_ptr;
295 	vm_offset_t             cur_offset;
296 
297 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_data_request: %p, %llx, %x, %x\n", mem_obj, offset, length, protection_required));
298 
299 	kernel_mapping = 0;
300 	upl = NULL;
301 	upl_pl = NULL;
302 
303 	pager = swapfile_pager_lookup(mem_obj);
304 	assert(pager->is_ready);
305 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 1); /* pager is alive and mapped */
306 
307 	PAGER_DEBUG(PAGER_PAGEIN, ("swapfile_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
308 
309 	/*
310 	 * Gather in a UPL all the VM pages requested by VM.
311 	 */
312 	mo_control = pager->swp_pgr_hdr.mo_control;
313 
314 	upl_size = length;
315 	upl_flags =
316 	    UPL_RET_ONLY_ABSENT |
317 	    UPL_SET_LITE |
318 	    UPL_NO_SYNC |
319 	    UPL_CLEAN_IN_PLACE |        /* triggers UPL_CLEAR_DIRTY */
320 	    UPL_SET_INTERNAL;
321 	pl_count = 0;
322 	kr = memory_object_upl_request(mo_control,
323 	    offset, upl_size,
324 	    &upl, NULL, NULL, upl_flags, VM_KERN_MEMORY_OSFMK);
325 	if (kr != KERN_SUCCESS) {
326 		retval = kr;
327 		goto done;
328 	}
329 	dst_object = memory_object_control_to_vm_object(mo_control);
330 	assert(dst_object != VM_OBJECT_NULL);
331 
332 
333 	/*
334 	 * Reserve a virtual page in the kernel address space to map each
335 	 * destination physical page when it's its turn to be processed.
336 	 */
337 	kr = kmem_alloc(kernel_map, &kernel_mapping, PAGE_SIZE,
338 	    KMA_DATA_SHARED | KMA_KOBJECT | KMA_PAGEABLE, VM_KERN_MEMORY_NONE);
339 	if (kr != KERN_SUCCESS) {
340 		retval = kr;
341 		goto done;
342 	}
343 	dst_ptr = (char *)kernel_mapping;
344 
345 	/*
346 	 * Fill in the contents of the pages requested by VM.
347 	 */
348 	upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
349 	pl_count = length / PAGE_SIZE;
350 	for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) {
351 		ppnum_t dst_pnum;
352 
353 		if (!upl_page_present(upl_pl, (int)(cur_offset / PAGE_SIZE))) {
354 			/* this page is not in the UPL: skip it */
355 			continue;
356 		}
357 
358 		/*
359 		 * Establish an explicit pmap mapping of the destination
360 		 * physical page.
361 		 * We can't do a regular VM mapping because the VM page
362 		 * is "busy".
363 		 */
364 		dst_pnum = (ppnum_t)
365 		    upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE));
366 		assert(dst_pnum != 0);
367 		retval = pmap_enter(kernel_pmap,
368 		    kernel_mapping,
369 		    dst_pnum,
370 		    VM_PROT_READ | VM_PROT_WRITE,
371 		    VM_PROT_NONE,
372 		    0,
373 		    TRUE,
374 		    PMAP_MAPPING_TYPE_INFER);
375 
376 		assert(retval == KERN_SUCCESS);
377 
378 		if (retval != KERN_SUCCESS) {
379 			goto done;
380 		}
381 
382 		memset(dst_ptr, '\0', PAGE_SIZE);
383 		/* add an end-of-line to keep line counters happy */
384 		dst_ptr[PAGE_SIZE - 1] = '\n';
385 
386 		/*
387 		 * Remove the pmap mapping of the destination page
388 		 * in the kernel.
389 		 */
390 		pmap_remove(kernel_pmap,
391 		    (addr64_t) kernel_mapping,
392 		    (addr64_t) (kernel_mapping + PAGE_SIZE_64));
393 	}
394 
395 	retval = KERN_SUCCESS;
396 done:
397 	if (upl != NULL) {
398 		/* clean up the UPL */
399 
400 		/*
401 		 * The pages are currently dirty because we've just been
402 		 * writing on them, but as far as we're concerned, they're
403 		 * clean since they contain their "original" contents as
404 		 * provided by us, the pager.
405 		 * Tell the UPL to mark them "clean".
406 		 */
407 		upl_clear_dirty(upl, TRUE);
408 
409 		/* abort or commit the UPL */
410 		if (retval != KERN_SUCCESS) {
411 			upl_abort(upl, 0);
412 		} else {
413 			boolean_t empty;
414 			assertf(page_aligned(upl->u_offset) && page_aligned(upl->u_size),
415 			    "upl %p offset 0x%llx size 0x%x",
416 			    upl, upl->u_offset, upl->u_size);
417 			upl_commit_range(upl, 0, upl->u_size,
418 			    UPL_COMMIT_CS_VALIDATED,
419 			    upl_pl, pl_count, &empty);
420 		}
421 
422 		/* and deallocate the UPL */
423 		upl_deallocate(upl);
424 		upl = NULL;
425 	}
426 
427 	if (kernel_mapping != 0) {
428 		/* clean up the mapping of the source and destination pages */
429 		kmem_free(kernel_map, kernel_mapping, PAGE_SIZE);
430 		kernel_mapping = 0;
431 	}
432 
433 	return retval;
434 }
435 
436 /*
437  * swapfile_pager_reference()
438  *
439  * Get a reference on this memory object.
440  * For external usage only.  Assumes that the initial reference count is not 0,
441  * i.e one should not "revive" a dead pager this way.
442  */
443 void
swapfile_pager_reference(memory_object_t mem_obj)444 swapfile_pager_reference(
445 	memory_object_t         mem_obj)
446 {
447 	swapfile_pager_t        pager;
448 
449 	pager = swapfile_pager_lookup(mem_obj);
450 
451 	lck_mtx_lock(&swapfile_pager_lock);
452 	os_ref_retain_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
453 	lck_mtx_unlock(&swapfile_pager_lock);
454 }
455 
456 
457 /*
458  * swapfile_pager_dequeue:
459  *
460  * Removes a pager from the list of pagers.
461  *
462  * The caller must hold "swapfile_pager_lock".
463  */
464 void
swapfile_pager_dequeue(swapfile_pager_t pager)465 swapfile_pager_dequeue(
466 	swapfile_pager_t pager)
467 {
468 	assert(!pager->is_mapped);
469 
470 	queue_remove(&swapfile_pager_queue,
471 	    pager,
472 	    swapfile_pager_t,
473 	    pager_queue);
474 	pager->pager_queue.next = NULL;
475 	pager->pager_queue.prev = NULL;
476 
477 	swapfile_pager_count--;
478 }
479 
480 /*
481  * swapfile_pager_terminate_internal:
482  *
483  * Trigger the asynchronous termination of the memory object associated
484  * with this pager.
485  * When the memory object is terminated, there will be one more call
486  * to memory_object_deallocate() (i.e. swapfile_pager_deallocate())
487  * to finish the clean up.
488  *
489  * "swapfile_pager_lock" should not be held by the caller.
490  * We don't need the lock because the pager has already been removed from
491  * the pagers' list and is now ours exclusively.
492  */
493 void
swapfile_pager_terminate_internal(swapfile_pager_t pager)494 swapfile_pager_terminate_internal(
495 	swapfile_pager_t pager)
496 {
497 	assert(pager->is_ready);
498 	assert(!pager->is_mapped);
499 
500 	if (pager->swapfile_vnode != NULL) {
501 		pager->swapfile_vnode = NULL;
502 	}
503 
504 	/* trigger the destruction of the memory object */
505 	memory_object_destroy(pager->swp_pgr_hdr.mo_control, VM_OBJECT_DESTROY_PAGER);
506 }
507 
508 /*
509  * swapfile_pager_deallocate_internal()
510  *
511  * Release a reference on this pager and free it when the last
512  * reference goes away.
513  * Can be called with swapfile_pager_lock held or not but always returns
514  * with it unlocked.
515  */
516 void
swapfile_pager_deallocate_internal(swapfile_pager_t pager,boolean_t locked)517 swapfile_pager_deallocate_internal(
518 	swapfile_pager_t        pager,
519 	boolean_t               locked)
520 {
521 	os_ref_count_t ref_count;
522 
523 	if (!locked) {
524 		lck_mtx_lock(&swapfile_pager_lock);
525 	}
526 
527 	/* drop a reference on this pager */
528 	ref_count = os_ref_release_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
529 
530 	if (ref_count == 1) {
531 		/*
532 		 * Only the "named" reference is left, which means that
533 		 * no one is really holding on to this pager anymore.
534 		 * Terminate it.
535 		 */
536 		swapfile_pager_dequeue(pager);
537 		/* the pager is all ours: no need for the lock now */
538 		lck_mtx_unlock(&swapfile_pager_lock);
539 		swapfile_pager_terminate_internal(pager);
540 	} else if (ref_count == 0) {
541 		/*
542 		 * Dropped the existence reference;  the memory object has
543 		 * been terminated.  Do some final cleanup and release the
544 		 * pager structure.
545 		 */
546 		lck_mtx_unlock(&swapfile_pager_lock);
547 		if (pager->swp_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL) {
548 			memory_object_control_deallocate(pager->swp_pgr_hdr.mo_control);
549 			pager->swp_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
550 		}
551 		kfree_type(struct swapfile_pager, pager);
552 		pager = SWAPFILE_PAGER_NULL;
553 	} else {
554 		/* there are still plenty of references:  keep going... */
555 		lck_mtx_unlock(&swapfile_pager_lock);
556 	}
557 
558 	/* caution: lock is not held on return... */
559 }
560 
561 /*
562  * swapfile_pager_deallocate()
563  *
564  * Release a reference on this pager and free it when the last
565  * reference goes away.
566  */
567 void
swapfile_pager_deallocate(memory_object_t mem_obj)568 swapfile_pager_deallocate(
569 	memory_object_t         mem_obj)
570 {
571 	swapfile_pager_t        pager;
572 
573 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_deallocate: %p\n", mem_obj));
574 	pager = swapfile_pager_lookup(mem_obj);
575 	swapfile_pager_deallocate_internal(pager, FALSE);
576 }
577 
578 /*
579  *
580  */
581 kern_return_t
swapfile_pager_terminate(__unused memory_object_t mem_obj)582 swapfile_pager_terminate(
583 #if !DEBUG
584 	__unused
585 #endif
586 	memory_object_t mem_obj)
587 {
588 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_terminate: %p\n", mem_obj));
589 
590 	return KERN_SUCCESS;
591 }
592 
593 /*
594  * swapfile_pager_map()
595  *
596  * This allows VM to let us, the EMM, know that this memory object
597  * is currently mapped one or more times.  This is called by VM each time
598  * the memory object gets mapped and we take one extra reference on the
599  * memory object to account for all its mappings.
600  */
601 kern_return_t
swapfile_pager_map(memory_object_t mem_obj,__unused vm_prot_t prot)602 swapfile_pager_map(
603 	memory_object_t         mem_obj,
604 	__unused vm_prot_t      prot)
605 {
606 	swapfile_pager_t        pager;
607 
608 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_map: %p\n", mem_obj));
609 
610 	pager = swapfile_pager_lookup(mem_obj);
611 
612 	lck_mtx_lock(&swapfile_pager_lock);
613 	assert(pager->is_ready);
614 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 0); /* pager is alive */
615 	if (pager->is_mapped == FALSE) {
616 		/*
617 		 * First mapping of this pager:  take an extra reference
618 		 * that will remain until all the mappings of this pager
619 		 * are removed.
620 		 */
621 		pager->is_mapped = TRUE;
622 		os_ref_retain_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
623 	}
624 	lck_mtx_unlock(&swapfile_pager_lock);
625 
626 	return KERN_SUCCESS;
627 }
628 
629 /*
630  * swapfile_pager_last_unmap()
631  *
632  * This is called by VM when this memory object is no longer mapped anywhere.
633  */
634 kern_return_t
swapfile_pager_last_unmap(memory_object_t mem_obj)635 swapfile_pager_last_unmap(
636 	memory_object_t         mem_obj)
637 {
638 	swapfile_pager_t        pager;
639 
640 	PAGER_DEBUG(PAGER_ALL,
641 	    ("swapfile_pager_last_unmap: %p\n", mem_obj));
642 
643 	pager = swapfile_pager_lookup(mem_obj);
644 
645 	lck_mtx_lock(&swapfile_pager_lock);
646 	if (pager->is_mapped) {
647 		/*
648 		 * All the mappings are gone, so let go of the one extra
649 		 * reference that represents all the mappings of this pager.
650 		 */
651 		pager->is_mapped = FALSE;
652 		swapfile_pager_deallocate_internal(pager, TRUE);
653 		/* caution: deallocate_internal() released the lock ! */
654 	} else {
655 		lck_mtx_unlock(&swapfile_pager_lock);
656 	}
657 
658 	return KERN_SUCCESS;
659 }
660 
661 
662 /*
663  *
664  */
665 swapfile_pager_t
swapfile_pager_lookup(memory_object_t mem_obj)666 swapfile_pager_lookup(
667 	memory_object_t  mem_obj)
668 {
669 	swapfile_pager_t        pager;
670 
671 	assert(mem_obj->mo_pager_ops == &swapfile_pager_ops);
672 	__IGNORE_WCASTALIGN(pager = (swapfile_pager_t) mem_obj);
673 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 0);
674 	return pager;
675 }
676 
677 swapfile_pager_t
swapfile_pager_create(struct vnode * vp)678 swapfile_pager_create(
679 	struct vnode            *vp)
680 {
681 	swapfile_pager_t        pager, pager2;
682 	memory_object_control_t control;
683 	kern_return_t           kr;
684 
685 	pager = kalloc_type(struct swapfile_pager, Z_WAITOK | Z_NOFAIL);
686 
687 	/*
688 	 * The vm_map call takes both named entry ports and raw memory
689 	 * objects in the same parameter.  We need to make sure that
690 	 * vm_map does not see this object as a named entry port.  So,
691 	 * we reserve the second word in the object for a fake object type
692 	 * setting - that will tell vm_map to use it as a memory object.
693 	 */
694 	pager->swp_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
695 	pager->swp_pgr_hdr.mo_pager_ops = &swapfile_pager_ops;
696 	pager->swp_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
697 	pager->swp_pgr_hdr.mo_last_unmap_ctid = 0;
698 
699 	pager->is_ready = FALSE;/* not ready until it has a "name" */
700 	os_ref_init_raw(&pager->swp_pgr_hdr_ref, NULL);   /* setup reference */
701 	pager->is_mapped = FALSE;
702 	pager->swapfile_vnode = vp;
703 
704 	lck_mtx_lock(&swapfile_pager_lock);
705 	/* see if anyone raced us to create a pager for the same object */
706 	queue_iterate(&swapfile_pager_queue,
707 	    pager2,
708 	    swapfile_pager_t,
709 	    pager_queue) {
710 		if (pager2->swapfile_vnode == vp) {
711 			break;
712 		}
713 	}
714 	if (!queue_end(&swapfile_pager_queue,
715 	    (queue_entry_t) pager2)) {
716 		/* while we hold the lock, transfer our setup ref to winner */
717 		os_ref_retain_locked_raw(&pager2->swp_pgr_hdr_ref, NULL);
718 		/* we lost the race, down with the loser... */
719 		lck_mtx_unlock(&swapfile_pager_lock);
720 		pager->swapfile_vnode = NULL;
721 		kfree_type(struct swapfile_pager, pager);
722 		/* ... and go with the winner */
723 		pager = pager2;
724 		/* let the winner make sure the pager gets ready */
725 		return pager;
726 	}
727 
728 	/* enter new pager at the head of our list of pagers */
729 	queue_enter_first(&swapfile_pager_queue,
730 	    pager,
731 	    swapfile_pager_t,
732 	    pager_queue);
733 	swapfile_pager_count++;
734 	if (swapfile_pager_count > swapfile_pager_count_max) {
735 		swapfile_pager_count_max = swapfile_pager_count;
736 	}
737 	lck_mtx_unlock(&swapfile_pager_lock);
738 
739 	kr = memory_object_create_named((memory_object_t) pager,
740 	    0,
741 	    &control);
742 	assert(kr == KERN_SUCCESS);
743 
744 	memory_object_mark_trusted(control);
745 
746 	lck_mtx_lock(&swapfile_pager_lock);
747 	/* the new pager is now ready to be used */
748 	pager->is_ready = TRUE;
749 	lck_mtx_unlock(&swapfile_pager_lock);
750 
751 	/* wakeup anyone waiting for this pager to be ready */
752 	thread_wakeup(&pager->is_ready);
753 
754 	return pager;
755 }
756 
757 /*
758  * swapfile_pager_setup()
759  *
760  * Provide the caller with a memory object backed by the provided
761  * "backing_object" VM object.  If such a memory object already exists,
762  * re-use it, otherwise create a new memory object.
763  */
764 memory_object_t
swapfile_pager_setup(struct vnode * vp)765 swapfile_pager_setup(
766 	struct vnode *vp)
767 {
768 	swapfile_pager_t        pager;
769 
770 	lck_mtx_lock(&swapfile_pager_lock);
771 
772 	queue_iterate(&swapfile_pager_queue,
773 	    pager,
774 	    swapfile_pager_t,
775 	    pager_queue) {
776 		if (pager->swapfile_vnode == vp) {
777 			break;
778 		}
779 	}
780 	if (queue_end(&swapfile_pager_queue,
781 	    (queue_entry_t) pager)) {
782 		/* no existing pager for this backing object */
783 		pager = SWAPFILE_PAGER_NULL;
784 	} else {
785 		/* make sure pager doesn't disappear */
786 		os_ref_retain_raw(&pager->swp_pgr_hdr_ref, NULL);
787 	}
788 
789 	lck_mtx_unlock(&swapfile_pager_lock);
790 
791 	if (pager == SWAPFILE_PAGER_NULL) {
792 		pager = swapfile_pager_create(vp);
793 		if (pager == SWAPFILE_PAGER_NULL) {
794 			return MEMORY_OBJECT_NULL;
795 		}
796 	}
797 
798 	lck_mtx_lock(&swapfile_pager_lock);
799 	while (!pager->is_ready) {
800 		lck_mtx_sleep(&swapfile_pager_lock,
801 		    LCK_SLEEP_DEFAULT,
802 		    &pager->is_ready,
803 		    THREAD_UNINT);
804 	}
805 	lck_mtx_unlock(&swapfile_pager_lock);
806 
807 	return (memory_object_t) pager;
808 }
809 
810 memory_object_control_t
swapfile_pager_control(memory_object_t mem_obj)811 swapfile_pager_control(
812 	memory_object_t mem_obj)
813 {
814 	swapfile_pager_t        pager;
815 
816 	if (mem_obj == MEMORY_OBJECT_NULL ||
817 	    mem_obj->mo_pager_ops != &swapfile_pager_ops) {
818 		return MEMORY_OBJECT_CONTROL_NULL;
819 	}
820 	pager = swapfile_pager_lookup(mem_obj);
821 	return pager->swp_pgr_hdr.mo_control;
822 }
823