xref: /xnu-10063.141.1/osfmk/vm/vm_swapfile_pager.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 /*
2  * Copyright (c) 2008-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/kern_return.h>
30 #include <mach/memory_object_control.h>
31 #include <mach/upl.h>
32 
33 #include <kern/ipc_kobject.h>
34 #include <kern/kalloc.h>
35 #include <kern/queue.h>
36 
37 #include <vm/memory_object.h>
38 #include <vm/vm_kern.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_pageout.h>
41 #include <vm/vm_protos.h>
42 
43 
44 /*
45  * APPLE SWAPFILE MEMORY PAGER
46  *
47  * This external memory manager (EMM) handles mappings of the swap files.
48  * Swap files are not regular files and are used solely to store contents of
49  * anonymous memory mappings while not resident in memory.
50  * There's no valid reason to map a swap file.  This just puts extra burden
51  * on the system, is potentially a security issue and is not reliable since
52  * the contents can change at any time with pageout operations.
53  * Here are some of the issues with mapping a swap file.
54  * * PERFORMANCE:
55  *   Each page in the swap file belong to an anonymous memory object. Mapping
56  *   the swap file makes those pages also accessible via a vnode memory
57  *   object and each page can now be resident twice.
58  * * SECURITY:
59  *   Mapping a swap file allows access to other processes' memory.  Swap files
60  *   are only accessible by the "root" super-user, who can already access any
61  *   process's memory, so this is not a real issue but if permissions on the
62  *   swap file got changed, it could become one.
63  *   Swap files are not "zero-filled" on creation, so until their contents are
64  *   overwritten with pageout operations, they still contain whatever was on
65  *   the disk blocks they were allocated.  The "super-user" could see the
66  *   contents of free blocks anyway, so this is not a new security issue but
67  *   it may be perceive as one.
68  *
69  * We can't legitimately prevent a user process with appropriate privileges
70  * from mapping a swap file, but we can prevent it from accessing its actual
71  * contents.
72  * This pager mostly handles page-in request (from memory_object_data_request())
73  * for swap file mappings and just returns bogus data.
74  * Pageouts are not handled, so mmap() has to make sure it does not allow
75  * writable (i.e. MAP_SHARED and PROT_WRITE) mappings of swap files.
76  */
77 
78 /* forward declarations */
79 void swapfile_pager_reference(memory_object_t mem_obj);
80 void swapfile_pager_deallocate(memory_object_t mem_obj);
81 kern_return_t swapfile_pager_init(memory_object_t mem_obj,
82     memory_object_control_t control,
83     memory_object_cluster_size_t pg_size);
84 kern_return_t swapfile_pager_terminate(memory_object_t mem_obj);
85 kern_return_t swapfile_pager_data_request(memory_object_t mem_obj,
86     memory_object_offset_t offset,
87     memory_object_cluster_size_t length,
88     vm_prot_t protection_required,
89     memory_object_fault_info_t fault_info);
90 kern_return_t swapfile_pager_data_return(memory_object_t mem_obj,
91     memory_object_offset_t offset,
92     memory_object_cluster_size_t      data_cnt,
93     memory_object_offset_t *resid_offset,
94     int *io_error,
95     boolean_t dirty,
96     boolean_t kernel_copy,
97     int upl_flags);
98 kern_return_t swapfile_pager_data_initialize(memory_object_t mem_obj,
99     memory_object_offset_t offset,
100     memory_object_cluster_size_t data_cnt);
101 kern_return_t swapfile_pager_map(memory_object_t mem_obj,
102     vm_prot_t prot);
103 kern_return_t swapfile_pager_last_unmap(memory_object_t mem_obj);
104 
105 /*
106  * Vector of VM operations for this EMM.
107  * These routines are invoked by VM via the memory_object_*() interfaces.
108  */
109 const struct memory_object_pager_ops swapfile_pager_ops = {
110 	.memory_object_reference = swapfile_pager_reference,
111 	.memory_object_deallocate = swapfile_pager_deallocate,
112 	.memory_object_init = swapfile_pager_init,
113 	.memory_object_terminate = swapfile_pager_terminate,
114 	.memory_object_data_request = swapfile_pager_data_request,
115 	.memory_object_data_return = swapfile_pager_data_return,
116 	.memory_object_data_initialize = swapfile_pager_data_initialize,
117 	.memory_object_map = swapfile_pager_map,
118 	.memory_object_last_unmap = swapfile_pager_last_unmap,
119 	.memory_object_backing_object = NULL,
120 	.memory_object_pager_name = "swapfile pager"
121 };
122 
123 /*
124  * The "swapfile_pager" describes a memory object backed by
125  * the "swapfile" EMM.
126  */
127 typedef struct swapfile_pager {
128 	/* mandatory generic header */
129 	struct memory_object    swp_pgr_hdr;
130 
131 	/* pager-specific data */
132 	queue_chain_t           pager_queue;    /* next & prev pagers */
133 #if MEMORY_OBJECT_HAS_REFCOUNT
134 #define swp_pgr_hdr_ref         swp_pgr_hdr.mo_ref
135 #else
136 	os_ref_atomic_t         swp_pgr_hdr_ref;      /* reference count */
137 #endif
138 	bool                    is_ready;       /* is this pager ready ? */
139 	bool                    is_mapped;      /* is this pager mapped ? */
140 	struct vnode            *swapfile_vnode;/* the swapfile's vnode */
141 } *swapfile_pager_t;
142 #define SWAPFILE_PAGER_NULL     ((swapfile_pager_t) NULL)
143 
144 /*
145  * List of memory objects managed by this EMM.
146  * The list is protected by the "swapfile_pager_lock" lock.
147  */
148 int swapfile_pager_count = 0;           /* number of pagers */
149 queue_head_t swapfile_pager_queue = QUEUE_HEAD_INITIALIZER(swapfile_pager_queue);
150 LCK_GRP_DECLARE(swapfile_pager_lck_grp, "swapfile pager");
151 LCK_MTX_DECLARE(swapfile_pager_lock, &swapfile_pager_lck_grp);
152 
153 /*
154  * Statistics & counters.
155  */
156 int swapfile_pager_count_max = 0;
157 
158 /* internal prototypes */
159 swapfile_pager_t swapfile_pager_create(struct vnode *vp);
160 swapfile_pager_t swapfile_pager_lookup(memory_object_t mem_obj);
161 void swapfile_pager_dequeue(swapfile_pager_t pager);
162 void swapfile_pager_deallocate_internal(swapfile_pager_t pager,
163     boolean_t locked);
164 void swapfile_pager_terminate_internal(swapfile_pager_t pager);
165 
166 
167 #if DEBUG
168 int swapfile_pagerdebug = 0;
169 #define PAGER_ALL               0xffffffff
170 #define PAGER_INIT              0x00000001
171 #define PAGER_PAGEIN            0x00000002
172 
173 #define PAGER_DEBUG(LEVEL, A)                                           \
174 	MACRO_BEGIN                                                     \
175 	if ((swapfile_pagerdebug & LEVEL)==LEVEL) {             \
176 	        printf A;                                               \
177 	}                                                               \
178 	MACRO_END
179 #else
180 #define PAGER_DEBUG(LEVEL, A)
181 #endif
182 
183 
184 /*
185  * swapfile_pager_init()
186  *
187  * Initialize the memory object and makes it ready to be used and mapped.
188  */
189 kern_return_t
swapfile_pager_init(memory_object_t mem_obj,memory_object_control_t control,__unused memory_object_cluster_size_t pg_size)190 swapfile_pager_init(
191 	memory_object_t         mem_obj,
192 	memory_object_control_t control,
193 #if !DEBUG
194 	__unused
195 #endif
196 	memory_object_cluster_size_t pg_size)
197 {
198 	swapfile_pager_t        pager;
199 	kern_return_t           kr;
200 	memory_object_attr_info_data_t  attributes;
201 
202 	PAGER_DEBUG(PAGER_ALL,
203 	    ("swapfile_pager_init: %p, %p, %x\n",
204 	    mem_obj, control, pg_size));
205 
206 	if (control == MEMORY_OBJECT_CONTROL_NULL) {
207 		return KERN_INVALID_ARGUMENT;
208 	}
209 
210 	pager = swapfile_pager_lookup(mem_obj);
211 
212 	memory_object_control_reference(control);
213 
214 	pager->swp_pgr_hdr.mo_control = control;
215 
216 	attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
217 	attributes.cluster_size = (1 << (PAGE_SHIFT));
218 	attributes.may_cache_object = FALSE;
219 	attributes.temporary = TRUE;
220 
221 	kr = memory_object_change_attributes(
222 		control,
223 		MEMORY_OBJECT_ATTRIBUTE_INFO,
224 		(memory_object_info_t) &attributes,
225 		MEMORY_OBJECT_ATTR_INFO_COUNT);
226 	if (kr != KERN_SUCCESS) {
227 		panic("swapfile_pager_init: "
228 		    "memory_object_change_attributes() failed");
229 	}
230 
231 	return KERN_SUCCESS;
232 }
233 
234 /*
235  * swapfile_data_return()
236  *
237  * Handles page-out requests from VM.  This should never happen since
238  * the pages provided by this EMM are not supposed to be dirty or dirtied
239  * and VM should simply discard the contents and reclaim the pages if it
240  * needs to.
241  */
242 kern_return_t
swapfile_pager_data_return(__unused memory_object_t mem_obj,__unused memory_object_offset_t offset,__unused memory_object_cluster_size_t data_cnt,__unused memory_object_offset_t * resid_offset,__unused int * io_error,__unused boolean_t dirty,__unused boolean_t kernel_copy,__unused int upl_flags)243 swapfile_pager_data_return(
244 	__unused memory_object_t        mem_obj,
245 	__unused memory_object_offset_t offset,
246 	__unused memory_object_cluster_size_t           data_cnt,
247 	__unused memory_object_offset_t *resid_offset,
248 	__unused int                    *io_error,
249 	__unused boolean_t              dirty,
250 	__unused boolean_t              kernel_copy,
251 	__unused int                    upl_flags)
252 {
253 	panic("swapfile_pager_data_return: should never get called");
254 	return KERN_FAILURE;
255 }
256 
257 kern_return_t
swapfile_pager_data_initialize(__unused memory_object_t mem_obj,__unused memory_object_offset_t offset,__unused memory_object_cluster_size_t data_cnt)258 swapfile_pager_data_initialize(
259 	__unused memory_object_t        mem_obj,
260 	__unused memory_object_offset_t offset,
261 	__unused memory_object_cluster_size_t           data_cnt)
262 {
263 	panic("swapfile_pager_data_initialize: should never get called");
264 	return KERN_FAILURE;
265 }
266 
267 /*
268  * swapfile_pager_data_request()
269  *
270  * Handles page-in requests from VM.
271  */
272 kern_return_t
swapfile_pager_data_request(memory_object_t mem_obj,memory_object_offset_t offset,memory_object_cluster_size_t length,__unused vm_prot_t protection_required,__unused memory_object_fault_info_t mo_fault_info)273 swapfile_pager_data_request(
274 	memory_object_t         mem_obj,
275 	memory_object_offset_t  offset,
276 	memory_object_cluster_size_t            length,
277 #if !DEBUG
278 	__unused
279 #endif
280 	vm_prot_t               protection_required,
281 	__unused memory_object_fault_info_t mo_fault_info)
282 {
283 	swapfile_pager_t        pager;
284 	memory_object_control_t mo_control;
285 	upl_t                   upl;
286 	int                     upl_flags;
287 	upl_size_t              upl_size;
288 	upl_page_info_t         *upl_pl = NULL;
289 	unsigned int            pl_count;
290 	vm_object_t             dst_object;
291 	kern_return_t           kr, retval;
292 	vm_offset_t             kernel_mapping;
293 	char                    *dst_ptr;
294 	vm_offset_t             cur_offset;
295 
296 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_data_request: %p, %llx, %x, %x\n", mem_obj, offset, length, protection_required));
297 
298 	kernel_mapping = 0;
299 	upl = NULL;
300 	upl_pl = NULL;
301 
302 	pager = swapfile_pager_lookup(mem_obj);
303 	assert(pager->is_ready);
304 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 1); /* pager is alive and mapped */
305 
306 	PAGER_DEBUG(PAGER_PAGEIN, ("swapfile_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
307 
308 	/*
309 	 * Gather in a UPL all the VM pages requested by VM.
310 	 */
311 	mo_control = pager->swp_pgr_hdr.mo_control;
312 
313 	upl_size = length;
314 	upl_flags =
315 	    UPL_RET_ONLY_ABSENT |
316 	    UPL_SET_LITE |
317 	    UPL_NO_SYNC |
318 	    UPL_CLEAN_IN_PLACE |        /* triggers UPL_CLEAR_DIRTY */
319 	    UPL_SET_INTERNAL;
320 	pl_count = 0;
321 	kr = memory_object_upl_request(mo_control,
322 	    offset, upl_size,
323 	    &upl, NULL, NULL, upl_flags, VM_KERN_MEMORY_OSFMK);
324 	if (kr != KERN_SUCCESS) {
325 		retval = kr;
326 		goto done;
327 	}
328 	dst_object = memory_object_control_to_vm_object(mo_control);
329 	assert(dst_object != VM_OBJECT_NULL);
330 
331 
332 	/*
333 	 * Reserve a virtual page in the kernel address space to map each
334 	 * destination physical page when it's its turn to be processed.
335 	 */
336 	kr = kmem_alloc(kernel_map, &kernel_mapping, PAGE_SIZE,
337 	    KMA_DATA | KMA_KOBJECT | KMA_PAGEABLE, VM_KERN_MEMORY_NONE);
338 	if (kr != KERN_SUCCESS) {
339 		retval = kr;
340 		goto done;
341 	}
342 	dst_ptr = (char *)kernel_mapping;
343 
344 	/*
345 	 * Fill in the contents of the pages requested by VM.
346 	 */
347 	upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
348 	pl_count = length / PAGE_SIZE;
349 	for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) {
350 		ppnum_t dst_pnum;
351 
352 		if (!upl_page_present(upl_pl, (int)(cur_offset / PAGE_SIZE))) {
353 			/* this page is not in the UPL: skip it */
354 			continue;
355 		}
356 
357 		/*
358 		 * Establish an explicit pmap mapping of the destination
359 		 * physical page.
360 		 * We can't do a regular VM mapping because the VM page
361 		 * is "busy".
362 		 */
363 		dst_pnum = (ppnum_t)
364 		    upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE));
365 		assert(dst_pnum != 0);
366 		retval = pmap_enter(kernel_pmap,
367 		    kernel_mapping,
368 		    dst_pnum,
369 		    VM_PROT_READ | VM_PROT_WRITE,
370 		    VM_PROT_NONE,
371 		    0,
372 		    TRUE,
373 		    PMAP_MAPPING_TYPE_INFER);
374 
375 		assert(retval == KERN_SUCCESS);
376 
377 		if (retval != KERN_SUCCESS) {
378 			goto done;
379 		}
380 
381 		memset(dst_ptr, '\0', PAGE_SIZE);
382 		/* add an end-of-line to keep line counters happy */
383 		dst_ptr[PAGE_SIZE - 1] = '\n';
384 
385 		/*
386 		 * Remove the pmap mapping of the destination page
387 		 * in the kernel.
388 		 */
389 		pmap_remove(kernel_pmap,
390 		    (addr64_t) kernel_mapping,
391 		    (addr64_t) (kernel_mapping + PAGE_SIZE_64));
392 	}
393 
394 	retval = KERN_SUCCESS;
395 done:
396 	if (upl != NULL) {
397 		/* clean up the UPL */
398 
399 		/*
400 		 * The pages are currently dirty because we've just been
401 		 * writing on them, but as far as we're concerned, they're
402 		 * clean since they contain their "original" contents as
403 		 * provided by us, the pager.
404 		 * Tell the UPL to mark them "clean".
405 		 */
406 		upl_clear_dirty(upl, TRUE);
407 
408 		/* abort or commit the UPL */
409 		if (retval != KERN_SUCCESS) {
410 			upl_abort(upl, 0);
411 		} else {
412 			boolean_t empty;
413 			assertf(page_aligned(upl->u_offset) && page_aligned(upl->u_size),
414 			    "upl %p offset 0x%llx size 0x%x",
415 			    upl, upl->u_offset, upl->u_size);
416 			upl_commit_range(upl, 0, upl->u_size,
417 			    UPL_COMMIT_CS_VALIDATED,
418 			    upl_pl, pl_count, &empty);
419 		}
420 
421 		/* and deallocate the UPL */
422 		upl_deallocate(upl);
423 		upl = NULL;
424 	}
425 
426 	if (kernel_mapping != 0) {
427 		/* clean up the mapping of the source and destination pages */
428 		kmem_free(kernel_map, kernel_mapping, PAGE_SIZE);
429 		kernel_mapping = 0;
430 	}
431 
432 	return retval;
433 }
434 
435 /*
436  * swapfile_pager_reference()
437  *
438  * Get a reference on this memory object.
439  * For external usage only.  Assumes that the initial reference count is not 0,
440  * i.e one should not "revive" a dead pager this way.
441  */
442 void
swapfile_pager_reference(memory_object_t mem_obj)443 swapfile_pager_reference(
444 	memory_object_t         mem_obj)
445 {
446 	swapfile_pager_t        pager;
447 
448 	pager = swapfile_pager_lookup(mem_obj);
449 
450 	lck_mtx_lock(&swapfile_pager_lock);
451 	os_ref_retain_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
452 	lck_mtx_unlock(&swapfile_pager_lock);
453 }
454 
455 
456 /*
457  * swapfile_pager_dequeue:
458  *
459  * Removes a pager from the list of pagers.
460  *
461  * The caller must hold "swapfile_pager_lock".
462  */
463 void
swapfile_pager_dequeue(swapfile_pager_t pager)464 swapfile_pager_dequeue(
465 	swapfile_pager_t pager)
466 {
467 	assert(!pager->is_mapped);
468 
469 	queue_remove(&swapfile_pager_queue,
470 	    pager,
471 	    swapfile_pager_t,
472 	    pager_queue);
473 	pager->pager_queue.next = NULL;
474 	pager->pager_queue.prev = NULL;
475 
476 	swapfile_pager_count--;
477 }
478 
479 /*
480  * swapfile_pager_terminate_internal:
481  *
482  * Trigger the asynchronous termination of the memory object associated
483  * with this pager.
484  * When the memory object is terminated, there will be one more call
485  * to memory_object_deallocate() (i.e. swapfile_pager_deallocate())
486  * to finish the clean up.
487  *
488  * "swapfile_pager_lock" should not be held by the caller.
489  * We don't need the lock because the pager has already been removed from
490  * the pagers' list and is now ours exclusively.
491  */
492 void
swapfile_pager_terminate_internal(swapfile_pager_t pager)493 swapfile_pager_terminate_internal(
494 	swapfile_pager_t pager)
495 {
496 	assert(pager->is_ready);
497 	assert(!pager->is_mapped);
498 
499 	if (pager->swapfile_vnode != NULL) {
500 		pager->swapfile_vnode = NULL;
501 	}
502 
503 	/* trigger the destruction of the memory object */
504 	memory_object_destroy(pager->swp_pgr_hdr.mo_control, VM_OBJECT_DESTROY_UNKNOWN_REASON);
505 }
506 
507 /*
508  * swapfile_pager_deallocate_internal()
509  *
510  * Release a reference on this pager and free it when the last
511  * reference goes away.
512  * Can be called with swapfile_pager_lock held or not but always returns
513  * with it unlocked.
514  */
515 void
swapfile_pager_deallocate_internal(swapfile_pager_t pager,boolean_t locked)516 swapfile_pager_deallocate_internal(
517 	swapfile_pager_t        pager,
518 	boolean_t               locked)
519 {
520 	os_ref_count_t ref_count;
521 
522 	if (!locked) {
523 		lck_mtx_lock(&swapfile_pager_lock);
524 	}
525 
526 	/* drop a reference on this pager */
527 	ref_count = os_ref_release_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
528 
529 	if (ref_count == 1) {
530 		/*
531 		 * Only the "named" reference is left, which means that
532 		 * no one is really holding on to this pager anymore.
533 		 * Terminate it.
534 		 */
535 		swapfile_pager_dequeue(pager);
536 		/* the pager is all ours: no need for the lock now */
537 		lck_mtx_unlock(&swapfile_pager_lock);
538 		swapfile_pager_terminate_internal(pager);
539 	} else if (ref_count == 0) {
540 		/*
541 		 * Dropped the existence reference;  the memory object has
542 		 * been terminated.  Do some final cleanup and release the
543 		 * pager structure.
544 		 */
545 		lck_mtx_unlock(&swapfile_pager_lock);
546 		if (pager->swp_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL) {
547 			memory_object_control_deallocate(pager->swp_pgr_hdr.mo_control);
548 			pager->swp_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
549 		}
550 		kfree_type(struct swapfile_pager, pager);
551 		pager = SWAPFILE_PAGER_NULL;
552 	} else {
553 		/* there are still plenty of references:  keep going... */
554 		lck_mtx_unlock(&swapfile_pager_lock);
555 	}
556 
557 	/* caution: lock is not held on return... */
558 }
559 
560 /*
561  * swapfile_pager_deallocate()
562  *
563  * Release a reference on this pager and free it when the last
564  * reference goes away.
565  */
566 void
swapfile_pager_deallocate(memory_object_t mem_obj)567 swapfile_pager_deallocate(
568 	memory_object_t         mem_obj)
569 {
570 	swapfile_pager_t        pager;
571 
572 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_deallocate: %p\n", mem_obj));
573 	pager = swapfile_pager_lookup(mem_obj);
574 	swapfile_pager_deallocate_internal(pager, FALSE);
575 }
576 
577 /*
578  *
579  */
580 kern_return_t
swapfile_pager_terminate(__unused memory_object_t mem_obj)581 swapfile_pager_terminate(
582 #if !DEBUG
583 	__unused
584 #endif
585 	memory_object_t mem_obj)
586 {
587 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_terminate: %p\n", mem_obj));
588 
589 	return KERN_SUCCESS;
590 }
591 
592 /*
593  * swapfile_pager_map()
594  *
595  * This allows VM to let us, the EMM, know that this memory object
596  * is currently mapped one or more times.  This is called by VM each time
597  * the memory object gets mapped and we take one extra reference on the
598  * memory object to account for all its mappings.
599  */
600 kern_return_t
swapfile_pager_map(memory_object_t mem_obj,__unused vm_prot_t prot)601 swapfile_pager_map(
602 	memory_object_t         mem_obj,
603 	__unused vm_prot_t      prot)
604 {
605 	swapfile_pager_t        pager;
606 
607 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_map: %p\n", mem_obj));
608 
609 	pager = swapfile_pager_lookup(mem_obj);
610 
611 	lck_mtx_lock(&swapfile_pager_lock);
612 	assert(pager->is_ready);
613 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 0); /* pager is alive */
614 	if (pager->is_mapped == FALSE) {
615 		/*
616 		 * First mapping of this pager:  take an extra reference
617 		 * that will remain until all the mappings of this pager
618 		 * are removed.
619 		 */
620 		pager->is_mapped = TRUE;
621 		os_ref_retain_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
622 	}
623 	lck_mtx_unlock(&swapfile_pager_lock);
624 
625 	return KERN_SUCCESS;
626 }
627 
628 /*
629  * swapfile_pager_last_unmap()
630  *
631  * This is called by VM when this memory object is no longer mapped anywhere.
632  */
633 kern_return_t
swapfile_pager_last_unmap(memory_object_t mem_obj)634 swapfile_pager_last_unmap(
635 	memory_object_t         mem_obj)
636 {
637 	swapfile_pager_t        pager;
638 
639 	PAGER_DEBUG(PAGER_ALL,
640 	    ("swapfile_pager_last_unmap: %p\n", mem_obj));
641 
642 	pager = swapfile_pager_lookup(mem_obj);
643 
644 	lck_mtx_lock(&swapfile_pager_lock);
645 	if (pager->is_mapped) {
646 		/*
647 		 * All the mappings are gone, so let go of the one extra
648 		 * reference that represents all the mappings of this pager.
649 		 */
650 		pager->is_mapped = FALSE;
651 		swapfile_pager_deallocate_internal(pager, TRUE);
652 		/* caution: deallocate_internal() released the lock ! */
653 	} else {
654 		lck_mtx_unlock(&swapfile_pager_lock);
655 	}
656 
657 	return KERN_SUCCESS;
658 }
659 
660 
661 /*
662  *
663  */
664 swapfile_pager_t
swapfile_pager_lookup(memory_object_t mem_obj)665 swapfile_pager_lookup(
666 	memory_object_t  mem_obj)
667 {
668 	swapfile_pager_t        pager;
669 
670 	assert(mem_obj->mo_pager_ops == &swapfile_pager_ops);
671 	__IGNORE_WCASTALIGN(pager = (swapfile_pager_t) mem_obj);
672 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 0);
673 	return pager;
674 }
675 
676 swapfile_pager_t
swapfile_pager_create(struct vnode * vp)677 swapfile_pager_create(
678 	struct vnode            *vp)
679 {
680 	swapfile_pager_t        pager, pager2;
681 	memory_object_control_t control;
682 	kern_return_t           kr;
683 
684 	pager = kalloc_type(struct swapfile_pager, Z_WAITOK | Z_NOFAIL);
685 
686 	/*
687 	 * The vm_map call takes both named entry ports and raw memory
688 	 * objects in the same parameter.  We need to make sure that
689 	 * vm_map does not see this object as a named entry port.  So,
690 	 * we reserve the second word in the object for a fake ip_kotype
691 	 * setting - that will tell vm_map to use it as a memory object.
692 	 */
693 	pager->swp_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
694 	pager->swp_pgr_hdr.mo_pager_ops = &swapfile_pager_ops;
695 	pager->swp_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
696 
697 	pager->is_ready = FALSE;/* not ready until it has a "name" */
698 	os_ref_init_raw(&pager->swp_pgr_hdr_ref, NULL);   /* setup reference */
699 	pager->is_mapped = FALSE;
700 	pager->swapfile_vnode = vp;
701 
702 	lck_mtx_lock(&swapfile_pager_lock);
703 	/* see if anyone raced us to create a pager for the same object */
704 	queue_iterate(&swapfile_pager_queue,
705 	    pager2,
706 	    swapfile_pager_t,
707 	    pager_queue) {
708 		if (pager2->swapfile_vnode == vp) {
709 			break;
710 		}
711 	}
712 	if (!queue_end(&swapfile_pager_queue,
713 	    (queue_entry_t) pager2)) {
714 		/* while we hold the lock, transfer our setup ref to winner */
715 		os_ref_retain_locked_raw(&pager2->swp_pgr_hdr_ref, NULL);
716 		/* we lost the race, down with the loser... */
717 		lck_mtx_unlock(&swapfile_pager_lock);
718 		pager->swapfile_vnode = NULL;
719 		kfree_type(struct swapfile_pager, pager);
720 		/* ... and go with the winner */
721 		pager = pager2;
722 		/* let the winner make sure the pager gets ready */
723 		return pager;
724 	}
725 
726 	/* enter new pager at the head of our list of pagers */
727 	queue_enter_first(&swapfile_pager_queue,
728 	    pager,
729 	    swapfile_pager_t,
730 	    pager_queue);
731 	swapfile_pager_count++;
732 	if (swapfile_pager_count > swapfile_pager_count_max) {
733 		swapfile_pager_count_max = swapfile_pager_count;
734 	}
735 	lck_mtx_unlock(&swapfile_pager_lock);
736 
737 	kr = memory_object_create_named((memory_object_t) pager,
738 	    0,
739 	    &control);
740 	assert(kr == KERN_SUCCESS);
741 
742 	memory_object_mark_trusted(control);
743 
744 	lck_mtx_lock(&swapfile_pager_lock);
745 	/* the new pager is now ready to be used */
746 	pager->is_ready = TRUE;
747 	lck_mtx_unlock(&swapfile_pager_lock);
748 
749 	/* wakeup anyone waiting for this pager to be ready */
750 	thread_wakeup(&pager->is_ready);
751 
752 	return pager;
753 }
754 
755 /*
756  * swapfile_pager_setup()
757  *
758  * Provide the caller with a memory object backed by the provided
759  * "backing_object" VM object.  If such a memory object already exists,
760  * re-use it, otherwise create a new memory object.
761  */
762 memory_object_t
swapfile_pager_setup(struct vnode * vp)763 swapfile_pager_setup(
764 	struct vnode *vp)
765 {
766 	swapfile_pager_t        pager;
767 
768 	lck_mtx_lock(&swapfile_pager_lock);
769 
770 	queue_iterate(&swapfile_pager_queue,
771 	    pager,
772 	    swapfile_pager_t,
773 	    pager_queue) {
774 		if (pager->swapfile_vnode == vp) {
775 			break;
776 		}
777 	}
778 	if (queue_end(&swapfile_pager_queue,
779 	    (queue_entry_t) pager)) {
780 		/* no existing pager for this backing object */
781 		pager = SWAPFILE_PAGER_NULL;
782 	} else {
783 		/* make sure pager doesn't disappear */
784 		os_ref_retain_raw(&pager->swp_pgr_hdr_ref, NULL);
785 	}
786 
787 	lck_mtx_unlock(&swapfile_pager_lock);
788 
789 	if (pager == SWAPFILE_PAGER_NULL) {
790 		pager = swapfile_pager_create(vp);
791 		if (pager == SWAPFILE_PAGER_NULL) {
792 			return MEMORY_OBJECT_NULL;
793 		}
794 	}
795 
796 	lck_mtx_lock(&swapfile_pager_lock);
797 	while (!pager->is_ready) {
798 		lck_mtx_sleep(&swapfile_pager_lock,
799 		    LCK_SLEEP_DEFAULT,
800 		    &pager->is_ready,
801 		    THREAD_UNINT);
802 	}
803 	lck_mtx_unlock(&swapfile_pager_lock);
804 
805 	return (memory_object_t) pager;
806 }
807 
808 memory_object_control_t
swapfile_pager_control(memory_object_t mem_obj)809 swapfile_pager_control(
810 	memory_object_t mem_obj)
811 {
812 	swapfile_pager_t        pager;
813 
814 	if (mem_obj == MEMORY_OBJECT_NULL ||
815 	    mem_obj->mo_pager_ops != &swapfile_pager_ops) {
816 		return MEMORY_OBJECT_CONTROL_NULL;
817 	}
818 	pager = swapfile_pager_lookup(mem_obj);
819 	return pager->swp_pgr_hdr.mo_control;
820 }
821