xref: /xnu-8020.101.4/osfmk/vm/vm_swapfile_pager.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2008-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/kern_return.h>
30 #include <mach/memory_object_control.h>
31 #include <mach/upl.h>
32 
33 #include <kern/ipc_kobject.h>
34 #include <kern/kalloc.h>
35 #include <kern/queue.h>
36 
37 #include <vm/memory_object.h>
38 #include <vm/vm_kern.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_pageout.h>
41 #include <vm/vm_protos.h>
42 
43 
44 /*
45  * APPLE SWAPFILE MEMORY PAGER
46  *
47  * This external memory manager (EMM) handles mappings of the swap files.
48  * Swap files are not regular files and are used solely to store contents of
49  * anonymous memory mappings while not resident in memory.
50  * There's no valid reason to map a swap file.  This just puts extra burden
51  * on the system, is potentially a security issue and is not reliable since
52  * the contents can change at any time with pageout operations.
53  * Here are some of the issues with mapping a swap file.
54  * * PERFORMANCE:
55  *   Each page in the swap file belong to an anonymous memory object. Mapping
56  *   the swap file makes those pages also accessible via a vnode memory
57  *   object and each page can now be resident twice.
58  * * SECURITY:
59  *   Mapping a swap file allows access to other processes' memory.  Swap files
60  *   are only accessible by the "root" super-user, who can already access any
61  *   process's memory, so this is not a real issue but if permissions on the
62  *   swap file got changed, it could become one.
63  *   Swap files are not "zero-filled" on creation, so until their contents are
64  *   overwritten with pageout operations, they still contain whatever was on
65  *   the disk blocks they were allocated.  The "super-user" could see the
66  *   contents of free blocks anyway, so this is not a new security issue but
67  *   it may be perceive as one.
68  *
69  * We can't legitimately prevent a user process with appropriate privileges
70  * from mapping a swap file, but we can prevent it from accessing its actual
71  * contents.
72  * This pager mostly handles page-in request (from memory_object_data_request())
73  * for swap file mappings and just returns bogus data.
74  * Pageouts are not handled, so mmap() has to make sure it does not allow
75  * writable (i.e. MAP_SHARED and PROT_WRITE) mappings of swap files.
76  */
77 
78 /* forward declarations */
79 void swapfile_pager_reference(memory_object_t mem_obj);
80 void swapfile_pager_deallocate(memory_object_t mem_obj);
81 kern_return_t swapfile_pager_init(memory_object_t mem_obj,
82     memory_object_control_t control,
83     memory_object_cluster_size_t pg_size);
84 kern_return_t swapfile_pager_terminate(memory_object_t mem_obj);
85 kern_return_t swapfile_pager_data_request(memory_object_t mem_obj,
86     memory_object_offset_t offset,
87     memory_object_cluster_size_t length,
88     vm_prot_t protection_required,
89     memory_object_fault_info_t fault_info);
90 kern_return_t swapfile_pager_data_return(memory_object_t mem_obj,
91     memory_object_offset_t offset,
92     memory_object_cluster_size_t      data_cnt,
93     memory_object_offset_t *resid_offset,
94     int *io_error,
95     boolean_t dirty,
96     boolean_t kernel_copy,
97     int upl_flags);
98 kern_return_t swapfile_pager_data_initialize(memory_object_t mem_obj,
99     memory_object_offset_t offset,
100     memory_object_cluster_size_t data_cnt);
101 kern_return_t swapfile_pager_data_unlock(memory_object_t mem_obj,
102     memory_object_offset_t offset,
103     memory_object_size_t size,
104     vm_prot_t desired_access);
105 kern_return_t swapfile_pager_synchronize(memory_object_t mem_obj,
106     memory_object_offset_t offset,
107     memory_object_size_t length,
108     vm_sync_t sync_flags);
109 kern_return_t swapfile_pager_map(memory_object_t mem_obj,
110     vm_prot_t prot);
111 kern_return_t swapfile_pager_last_unmap(memory_object_t mem_obj);
112 
113 /*
114  * Vector of VM operations for this EMM.
115  * These routines are invoked by VM via the memory_object_*() interfaces.
116  */
117 const struct memory_object_pager_ops swapfile_pager_ops = {
118 	.memory_object_reference = swapfile_pager_reference,
119 	.memory_object_deallocate = swapfile_pager_deallocate,
120 	.memory_object_init = swapfile_pager_init,
121 	.memory_object_terminate = swapfile_pager_terminate,
122 	.memory_object_data_request = swapfile_pager_data_request,
123 	.memory_object_data_return = swapfile_pager_data_return,
124 	.memory_object_data_initialize = swapfile_pager_data_initialize,
125 	.memory_object_data_unlock = swapfile_pager_data_unlock,
126 	.memory_object_synchronize = swapfile_pager_synchronize,
127 	.memory_object_map = swapfile_pager_map,
128 	.memory_object_last_unmap = swapfile_pager_last_unmap,
129 	.memory_object_data_reclaim = NULL,
130 	.memory_object_backing_object = NULL,
131 	.memory_object_pager_name = "swapfile pager"
132 };
133 
134 /*
135  * The "swapfile_pager" describes a memory object backed by
136  * the "swapfile" EMM.
137  */
138 typedef struct swapfile_pager {
139 	/* mandatory generic header */
140 	struct memory_object    swp_pgr_hdr;
141 
142 	/* pager-specific data */
143 	queue_chain_t           pager_queue;    /* next & prev pagers */
144 #if MEMORY_OBJECT_HAS_REFCOUNT
145 #define swp_pgr_hdr_ref         swp_pgr_hdr.mo_ref
146 #else
147 	os_ref_atomic_t         swp_pgr_hdr_ref;      /* reference count */
148 #endif
149 	bool                    is_ready;       /* is this pager ready ? */
150 	bool                    is_mapped;      /* is this pager mapped ? */
151 	struct vnode            *swapfile_vnode;/* the swapfile's vnode */
152 } *swapfile_pager_t;
153 #define SWAPFILE_PAGER_NULL     ((swapfile_pager_t) NULL)
154 
155 /*
156  * List of memory objects managed by this EMM.
157  * The list is protected by the "swapfile_pager_lock" lock.
158  */
159 int swapfile_pager_count = 0;           /* number of pagers */
160 queue_head_t swapfile_pager_queue = QUEUE_HEAD_INITIALIZER(swapfile_pager_queue);
161 LCK_GRP_DECLARE(swapfile_pager_lck_grp, "swapfile pager");
162 LCK_MTX_DECLARE(swapfile_pager_lock, &swapfile_pager_lck_grp);
163 
164 /*
165  * Statistics & counters.
166  */
167 int swapfile_pager_count_max = 0;
168 
169 /* internal prototypes */
170 swapfile_pager_t swapfile_pager_create(struct vnode *vp);
171 swapfile_pager_t swapfile_pager_lookup(memory_object_t mem_obj);
172 void swapfile_pager_dequeue(swapfile_pager_t pager);
173 void swapfile_pager_deallocate_internal(swapfile_pager_t pager,
174     boolean_t locked);
175 void swapfile_pager_terminate_internal(swapfile_pager_t pager);
176 
177 
178 #if DEBUG
179 int swapfile_pagerdebug = 0;
180 #define PAGER_ALL               0xffffffff
181 #define PAGER_INIT              0x00000001
182 #define PAGER_PAGEIN            0x00000002
183 
184 #define PAGER_DEBUG(LEVEL, A)                                           \
185 	MACRO_BEGIN                                                     \
186 	if ((swapfile_pagerdebug & LEVEL)==LEVEL) {             \
187 	        printf A;                                               \
188 	}                                                               \
189 	MACRO_END
190 #else
191 #define PAGER_DEBUG(LEVEL, A)
192 #endif
193 
194 
195 /*
196  * swapfile_pager_init()
197  *
198  * Initialize the memory object and makes it ready to be used and mapped.
199  */
200 kern_return_t
swapfile_pager_init(memory_object_t mem_obj,memory_object_control_t control,__unused memory_object_cluster_size_t pg_size)201 swapfile_pager_init(
202 	memory_object_t         mem_obj,
203 	memory_object_control_t control,
204 #if !DEBUG
205 	__unused
206 #endif
207 	memory_object_cluster_size_t pg_size)
208 {
209 	swapfile_pager_t        pager;
210 	kern_return_t           kr;
211 	memory_object_attr_info_data_t  attributes;
212 
213 	PAGER_DEBUG(PAGER_ALL,
214 	    ("swapfile_pager_init: %p, %p, %x\n",
215 	    mem_obj, control, pg_size));
216 
217 	if (control == MEMORY_OBJECT_CONTROL_NULL) {
218 		return KERN_INVALID_ARGUMENT;
219 	}
220 
221 	pager = swapfile_pager_lookup(mem_obj);
222 
223 	memory_object_control_reference(control);
224 
225 	pager->swp_pgr_hdr.mo_control = control;
226 
227 	attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
228 	attributes.cluster_size = (1 << (PAGE_SHIFT));
229 	attributes.may_cache_object = FALSE;
230 	attributes.temporary = TRUE;
231 
232 	kr = memory_object_change_attributes(
233 		control,
234 		MEMORY_OBJECT_ATTRIBUTE_INFO,
235 		(memory_object_info_t) &attributes,
236 		MEMORY_OBJECT_ATTR_INFO_COUNT);
237 	if (kr != KERN_SUCCESS) {
238 		panic("swapfile_pager_init: "
239 		    "memory_object_change_attributes() failed");
240 	}
241 
242 	return KERN_SUCCESS;
243 }
244 
245 /*
246  * swapfile_data_return()
247  *
248  * Handles page-out requests from VM.  This should never happen since
249  * the pages provided by this EMM are not supposed to be dirty or dirtied
250  * and VM should simply discard the contents and reclaim the pages if it
251  * needs to.
252  */
253 kern_return_t
swapfile_pager_data_return(__unused memory_object_t mem_obj,__unused memory_object_offset_t offset,__unused memory_object_cluster_size_t data_cnt,__unused memory_object_offset_t * resid_offset,__unused int * io_error,__unused boolean_t dirty,__unused boolean_t kernel_copy,__unused int upl_flags)254 swapfile_pager_data_return(
255 	__unused memory_object_t        mem_obj,
256 	__unused memory_object_offset_t offset,
257 	__unused memory_object_cluster_size_t           data_cnt,
258 	__unused memory_object_offset_t *resid_offset,
259 	__unused int                    *io_error,
260 	__unused boolean_t              dirty,
261 	__unused boolean_t              kernel_copy,
262 	__unused int                    upl_flags)
263 {
264 	panic("swapfile_pager_data_return: should never get called");
265 	return KERN_FAILURE;
266 }
267 
268 kern_return_t
swapfile_pager_data_initialize(__unused memory_object_t mem_obj,__unused memory_object_offset_t offset,__unused memory_object_cluster_size_t data_cnt)269 swapfile_pager_data_initialize(
270 	__unused memory_object_t        mem_obj,
271 	__unused memory_object_offset_t offset,
272 	__unused memory_object_cluster_size_t           data_cnt)
273 {
274 	panic("swapfile_pager_data_initialize: should never get called");
275 	return KERN_FAILURE;
276 }
277 
278 kern_return_t
swapfile_pager_data_unlock(__unused memory_object_t mem_obj,__unused memory_object_offset_t offset,__unused memory_object_size_t size,__unused vm_prot_t desired_access)279 swapfile_pager_data_unlock(
280 	__unused memory_object_t        mem_obj,
281 	__unused memory_object_offset_t offset,
282 	__unused memory_object_size_t           size,
283 	__unused vm_prot_t              desired_access)
284 {
285 	return KERN_FAILURE;
286 }
287 
288 /*
289  * swapfile_pager_data_request()
290  *
291  * Handles page-in requests from VM.
292  */
293 kern_return_t
swapfile_pager_data_request(memory_object_t mem_obj,memory_object_offset_t offset,memory_object_cluster_size_t length,__unused vm_prot_t protection_required,__unused memory_object_fault_info_t mo_fault_info)294 swapfile_pager_data_request(
295 	memory_object_t         mem_obj,
296 	memory_object_offset_t  offset,
297 	memory_object_cluster_size_t            length,
298 #if !DEBUG
299 	__unused
300 #endif
301 	vm_prot_t               protection_required,
302 	__unused memory_object_fault_info_t mo_fault_info)
303 {
304 	swapfile_pager_t        pager;
305 	memory_object_control_t mo_control;
306 	upl_t                   upl;
307 	int                     upl_flags;
308 	upl_size_t              upl_size;
309 	upl_page_info_t         *upl_pl = NULL;
310 	unsigned int            pl_count;
311 	vm_object_t             dst_object;
312 	kern_return_t           kr, retval;
313 	vm_map_offset_t         kernel_mapping;
314 	vm_offset_t             dst_vaddr;
315 	char                    *dst_ptr;
316 	vm_offset_t             cur_offset;
317 	vm_map_entry_t          map_entry;
318 
319 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_data_request: %p, %llx, %x, %x\n", mem_obj, offset, length, protection_required));
320 
321 	kernel_mapping = 0;
322 	upl = NULL;
323 	upl_pl = NULL;
324 
325 	pager = swapfile_pager_lookup(mem_obj);
326 	assert(pager->is_ready);
327 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 1); /* pager is alive and mapped */
328 
329 	PAGER_DEBUG(PAGER_PAGEIN, ("swapfile_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
330 
331 	/*
332 	 * Gather in a UPL all the VM pages requested by VM.
333 	 */
334 	mo_control = pager->swp_pgr_hdr.mo_control;
335 
336 	upl_size = length;
337 	upl_flags =
338 	    UPL_RET_ONLY_ABSENT |
339 	    UPL_SET_LITE |
340 	    UPL_NO_SYNC |
341 	    UPL_CLEAN_IN_PLACE |        /* triggers UPL_CLEAR_DIRTY */
342 	    UPL_SET_INTERNAL;
343 	pl_count = 0;
344 	kr = memory_object_upl_request(mo_control,
345 	    offset, upl_size,
346 	    &upl, NULL, NULL, upl_flags, VM_KERN_MEMORY_OSFMK);
347 	if (kr != KERN_SUCCESS) {
348 		retval = kr;
349 		goto done;
350 	}
351 	dst_object = memory_object_control_to_vm_object(mo_control);
352 	assert(dst_object != VM_OBJECT_NULL);
353 
354 
355 	/*
356 	 * Reserve a virtual page in the kernel address space to map each
357 	 * destination physical page when it's its turn to be processed.
358 	 */
359 	vm_object_reference(kernel_object);     /* ref. for mapping */
360 	kr = vm_map_find_space(kernel_map,
361 	    &kernel_mapping,
362 	    PAGE_SIZE_64,
363 	    0,
364 	    VM_MAP_KERNEL_FLAGS_NONE,
365 	    VM_KERN_MEMORY_NONE,
366 	    &map_entry);
367 	if (kr != KERN_SUCCESS) {
368 		vm_object_deallocate(kernel_object);
369 		retval = kr;
370 		goto done;
371 	}
372 	VME_OBJECT_SET(map_entry, kernel_object);
373 	VME_OFFSET_SET(map_entry, kernel_mapping - VM_MIN_KERNEL_ADDRESS);
374 	vm_map_unlock(kernel_map);
375 	dst_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping);
376 	dst_ptr = (char *) dst_vaddr;
377 
378 	/*
379 	 * Fill in the contents of the pages requested by VM.
380 	 */
381 	upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
382 	pl_count = length / PAGE_SIZE;
383 	for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) {
384 		ppnum_t dst_pnum;
385 
386 		if (!upl_page_present(upl_pl, (int)(cur_offset / PAGE_SIZE))) {
387 			/* this page is not in the UPL: skip it */
388 			continue;
389 		}
390 
391 		/*
392 		 * Establish an explicit pmap mapping of the destination
393 		 * physical page.
394 		 * We can't do a regular VM mapping because the VM page
395 		 * is "busy".
396 		 */
397 		dst_pnum = (ppnum_t)
398 		    upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE));
399 		assert(dst_pnum != 0);
400 		retval = pmap_enter(kernel_pmap,
401 		    kernel_mapping,
402 		    dst_pnum,
403 		    VM_PROT_READ | VM_PROT_WRITE,
404 		    VM_PROT_NONE,
405 		    0,
406 		    TRUE);
407 
408 		assert(retval == KERN_SUCCESS);
409 
410 		if (retval != KERN_SUCCESS) {
411 			goto done;
412 		}
413 
414 		memset(dst_ptr, '\0', PAGE_SIZE);
415 		/* add an end-of-line to keep line counters happy */
416 		dst_ptr[PAGE_SIZE - 1] = '\n';
417 
418 		/*
419 		 * Remove the pmap mapping of the destination page
420 		 * in the kernel.
421 		 */
422 		pmap_remove(kernel_pmap,
423 		    (addr64_t) kernel_mapping,
424 		    (addr64_t) (kernel_mapping + PAGE_SIZE_64));
425 	}
426 
427 	retval = KERN_SUCCESS;
428 done:
429 	if (upl != NULL) {
430 		/* clean up the UPL */
431 
432 		/*
433 		 * The pages are currently dirty because we've just been
434 		 * writing on them, but as far as we're concerned, they're
435 		 * clean since they contain their "original" contents as
436 		 * provided by us, the pager.
437 		 * Tell the UPL to mark them "clean".
438 		 */
439 		upl_clear_dirty(upl, TRUE);
440 
441 		/* abort or commit the UPL */
442 		if (retval != KERN_SUCCESS) {
443 			upl_abort(upl, 0);
444 		} else {
445 			boolean_t empty;
446 			assertf(page_aligned(upl->u_offset) && page_aligned(upl->u_size),
447 			    "upl %p offset 0x%llx size 0x%x",
448 			    upl, upl->u_offset, upl->u_size);
449 			upl_commit_range(upl, 0, upl->u_size,
450 			    UPL_COMMIT_CS_VALIDATED,
451 			    upl_pl, pl_count, &empty);
452 		}
453 
454 		/* and deallocate the UPL */
455 		upl_deallocate(upl);
456 		upl = NULL;
457 	}
458 	if (kernel_mapping != 0) {
459 		/* clean up the mapping of the source and destination pages */
460 		kr = vm_map_remove(kernel_map,
461 		    kernel_mapping,
462 		    kernel_mapping + PAGE_SIZE_64,
463 		    VM_MAP_REMOVE_NO_FLAGS);
464 		assert(kr == KERN_SUCCESS);
465 		kernel_mapping = 0;
466 		dst_vaddr = 0;
467 	}
468 
469 	return retval;
470 }
471 
472 /*
473  * swapfile_pager_reference()
474  *
475  * Get a reference on this memory object.
476  * For external usage only.  Assumes that the initial reference count is not 0,
477  * i.e one should not "revive" a dead pager this way.
478  */
479 void
swapfile_pager_reference(memory_object_t mem_obj)480 swapfile_pager_reference(
481 	memory_object_t         mem_obj)
482 {
483 	swapfile_pager_t        pager;
484 
485 	pager = swapfile_pager_lookup(mem_obj);
486 
487 	lck_mtx_lock(&swapfile_pager_lock);
488 	os_ref_retain_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
489 	lck_mtx_unlock(&swapfile_pager_lock);
490 }
491 
492 
493 /*
494  * swapfile_pager_dequeue:
495  *
496  * Removes a pager from the list of pagers.
497  *
498  * The caller must hold "swapfile_pager_lock".
499  */
500 void
swapfile_pager_dequeue(swapfile_pager_t pager)501 swapfile_pager_dequeue(
502 	swapfile_pager_t pager)
503 {
504 	assert(!pager->is_mapped);
505 
506 	queue_remove(&swapfile_pager_queue,
507 	    pager,
508 	    swapfile_pager_t,
509 	    pager_queue);
510 	pager->pager_queue.next = NULL;
511 	pager->pager_queue.prev = NULL;
512 
513 	swapfile_pager_count--;
514 }
515 
516 /*
517  * swapfile_pager_terminate_internal:
518  *
519  * Trigger the asynchronous termination of the memory object associated
520  * with this pager.
521  * When the memory object is terminated, there will be one more call
522  * to memory_object_deallocate() (i.e. swapfile_pager_deallocate())
523  * to finish the clean up.
524  *
525  * "swapfile_pager_lock" should not be held by the caller.
526  * We don't need the lock because the pager has already been removed from
527  * the pagers' list and is now ours exclusively.
528  */
529 void
swapfile_pager_terminate_internal(swapfile_pager_t pager)530 swapfile_pager_terminate_internal(
531 	swapfile_pager_t pager)
532 {
533 	assert(pager->is_ready);
534 	assert(!pager->is_mapped);
535 
536 	if (pager->swapfile_vnode != NULL) {
537 		pager->swapfile_vnode = NULL;
538 	}
539 
540 	/* trigger the destruction of the memory object */
541 	memory_object_destroy(pager->swp_pgr_hdr.mo_control, 0);
542 }
543 
544 /*
545  * swapfile_pager_deallocate_internal()
546  *
547  * Release a reference on this pager and free it when the last
548  * reference goes away.
549  * Can be called with swapfile_pager_lock held or not but always returns
550  * with it unlocked.
551  */
552 void
swapfile_pager_deallocate_internal(swapfile_pager_t pager,boolean_t locked)553 swapfile_pager_deallocate_internal(
554 	swapfile_pager_t        pager,
555 	boolean_t               locked)
556 {
557 	os_ref_count_t ref_count;
558 
559 	if (!locked) {
560 		lck_mtx_lock(&swapfile_pager_lock);
561 	}
562 
563 	/* drop a reference on this pager */
564 	ref_count = os_ref_release_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
565 
566 	if (ref_count == 1) {
567 		/*
568 		 * Only the "named" reference is left, which means that
569 		 * no one is really holding on to this pager anymore.
570 		 * Terminate it.
571 		 */
572 		swapfile_pager_dequeue(pager);
573 		/* the pager is all ours: no need for the lock now */
574 		lck_mtx_unlock(&swapfile_pager_lock);
575 		swapfile_pager_terminate_internal(pager);
576 	} else if (ref_count == 0) {
577 		/*
578 		 * Dropped the existence reference;  the memory object has
579 		 * been terminated.  Do some final cleanup and release the
580 		 * pager structure.
581 		 */
582 		lck_mtx_unlock(&swapfile_pager_lock);
583 		if (pager->swp_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL) {
584 			memory_object_control_deallocate(pager->swp_pgr_hdr.mo_control);
585 			pager->swp_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
586 		}
587 		kfree_type(struct swapfile_pager, pager);
588 		pager = SWAPFILE_PAGER_NULL;
589 	} else {
590 		/* there are still plenty of references:  keep going... */
591 		lck_mtx_unlock(&swapfile_pager_lock);
592 	}
593 
594 	/* caution: lock is not held on return... */
595 }
596 
597 /*
598  * swapfile_pager_deallocate()
599  *
600  * Release a reference on this pager and free it when the last
601  * reference goes away.
602  */
603 void
swapfile_pager_deallocate(memory_object_t mem_obj)604 swapfile_pager_deallocate(
605 	memory_object_t         mem_obj)
606 {
607 	swapfile_pager_t        pager;
608 
609 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_deallocate: %p\n", mem_obj));
610 	pager = swapfile_pager_lookup(mem_obj);
611 	swapfile_pager_deallocate_internal(pager, FALSE);
612 }
613 
614 /*
615  *
616  */
617 kern_return_t
swapfile_pager_terminate(__unused memory_object_t mem_obj)618 swapfile_pager_terminate(
619 #if !DEBUG
620 	__unused
621 #endif
622 	memory_object_t mem_obj)
623 {
624 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_terminate: %p\n", mem_obj));
625 
626 	return KERN_SUCCESS;
627 }
628 
629 /*
630  *
631  */
632 kern_return_t
swapfile_pager_synchronize(__unused memory_object_t mem_obbj,__unused memory_object_offset_t offset,__unused memory_object_size_t length,__unused vm_sync_t sync_flags)633 swapfile_pager_synchronize(
634 	__unused memory_object_t        mem_obbj,
635 	__unused memory_object_offset_t offset,
636 	__unused memory_object_size_t   length,
637 	__unused vm_sync_t              sync_flags)
638 {
639 	panic("swapfile_pager_synchronize: memory_object_synchronize no longer supported");
640 	return KERN_FAILURE;
641 }
642 
643 /*
644  * swapfile_pager_map()
645  *
646  * This allows VM to let us, the EMM, know that this memory object
647  * is currently mapped one or more times.  This is called by VM each time
648  * the memory object gets mapped and we take one extra reference on the
649  * memory object to account for all its mappings.
650  */
651 kern_return_t
swapfile_pager_map(memory_object_t mem_obj,__unused vm_prot_t prot)652 swapfile_pager_map(
653 	memory_object_t         mem_obj,
654 	__unused vm_prot_t      prot)
655 {
656 	swapfile_pager_t        pager;
657 
658 	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_map: %p\n", mem_obj));
659 
660 	pager = swapfile_pager_lookup(mem_obj);
661 
662 	lck_mtx_lock(&swapfile_pager_lock);
663 	assert(pager->is_ready);
664 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 0); /* pager is alive */
665 	if (pager->is_mapped == FALSE) {
666 		/*
667 		 * First mapping of this pager:  take an extra reference
668 		 * that will remain until all the mappings of this pager
669 		 * are removed.
670 		 */
671 		pager->is_mapped = TRUE;
672 		os_ref_retain_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
673 	}
674 	lck_mtx_unlock(&swapfile_pager_lock);
675 
676 	return KERN_SUCCESS;
677 }
678 
679 /*
680  * swapfile_pager_last_unmap()
681  *
682  * This is called by VM when this memory object is no longer mapped anywhere.
683  */
684 kern_return_t
swapfile_pager_last_unmap(memory_object_t mem_obj)685 swapfile_pager_last_unmap(
686 	memory_object_t         mem_obj)
687 {
688 	swapfile_pager_t        pager;
689 
690 	PAGER_DEBUG(PAGER_ALL,
691 	    ("swapfile_pager_last_unmap: %p\n", mem_obj));
692 
693 	pager = swapfile_pager_lookup(mem_obj);
694 
695 	lck_mtx_lock(&swapfile_pager_lock);
696 	if (pager->is_mapped) {
697 		/*
698 		 * All the mappings are gone, so let go of the one extra
699 		 * reference that represents all the mappings of this pager.
700 		 */
701 		pager->is_mapped = FALSE;
702 		swapfile_pager_deallocate_internal(pager, TRUE);
703 		/* caution: deallocate_internal() released the lock ! */
704 	} else {
705 		lck_mtx_unlock(&swapfile_pager_lock);
706 	}
707 
708 	return KERN_SUCCESS;
709 }
710 
711 
712 /*
713  *
714  */
715 swapfile_pager_t
swapfile_pager_lookup(memory_object_t mem_obj)716 swapfile_pager_lookup(
717 	memory_object_t  mem_obj)
718 {
719 	swapfile_pager_t        pager;
720 
721 	assert(mem_obj->mo_pager_ops == &swapfile_pager_ops);
722 	__IGNORE_WCASTALIGN(pager = (swapfile_pager_t) mem_obj);
723 	assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 0);
724 	return pager;
725 }
726 
727 swapfile_pager_t
swapfile_pager_create(struct vnode * vp)728 swapfile_pager_create(
729 	struct vnode            *vp)
730 {
731 	swapfile_pager_t        pager, pager2;
732 	memory_object_control_t control;
733 	kern_return_t           kr;
734 
735 	pager = kalloc_type(struct swapfile_pager, Z_WAITOK | Z_NOFAIL);
736 
737 	/*
738 	 * The vm_map call takes both named entry ports and raw memory
739 	 * objects in the same parameter.  We need to make sure that
740 	 * vm_map does not see this object as a named entry port.  So,
741 	 * we reserve the second word in the object for a fake ip_kotype
742 	 * setting - that will tell vm_map to use it as a memory object.
743 	 */
744 	pager->swp_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
745 	pager->swp_pgr_hdr.mo_pager_ops = &swapfile_pager_ops;
746 	pager->swp_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
747 
748 	pager->is_ready = FALSE;/* not ready until it has a "name" */
749 	os_ref_init_raw(&pager->swp_pgr_hdr_ref, NULL);   /* setup reference */
750 	pager->is_mapped = FALSE;
751 	pager->swapfile_vnode = vp;
752 
753 	lck_mtx_lock(&swapfile_pager_lock);
754 	/* see if anyone raced us to create a pager for the same object */
755 	queue_iterate(&swapfile_pager_queue,
756 	    pager2,
757 	    swapfile_pager_t,
758 	    pager_queue) {
759 		if (pager2->swapfile_vnode == vp) {
760 			break;
761 		}
762 	}
763 	if (!queue_end(&swapfile_pager_queue,
764 	    (queue_entry_t) pager2)) {
765 		/* while we hold the lock, transfer our setup ref to winner */
766 		os_ref_retain_locked_raw(&pager2->swp_pgr_hdr_ref, NULL);
767 		/* we lost the race, down with the loser... */
768 		lck_mtx_unlock(&swapfile_pager_lock);
769 		pager->swapfile_vnode = NULL;
770 		kfree_type(struct swapfile_pager, pager);
771 		/* ... and go with the winner */
772 		pager = pager2;
773 		/* let the winner make sure the pager gets ready */
774 		return pager;
775 	}
776 
777 	/* enter new pager at the head of our list of pagers */
778 	queue_enter_first(&swapfile_pager_queue,
779 	    pager,
780 	    swapfile_pager_t,
781 	    pager_queue);
782 	swapfile_pager_count++;
783 	if (swapfile_pager_count > swapfile_pager_count_max) {
784 		swapfile_pager_count_max = swapfile_pager_count;
785 	}
786 	lck_mtx_unlock(&swapfile_pager_lock);
787 
788 	kr = memory_object_create_named((memory_object_t) pager,
789 	    0,
790 	    &control);
791 	assert(kr == KERN_SUCCESS);
792 
793 	memory_object_mark_trusted(control);
794 
795 	lck_mtx_lock(&swapfile_pager_lock);
796 	/* the new pager is now ready to be used */
797 	pager->is_ready = TRUE;
798 	lck_mtx_unlock(&swapfile_pager_lock);
799 
800 	/* wakeup anyone waiting for this pager to be ready */
801 	thread_wakeup(&pager->is_ready);
802 
803 	return pager;
804 }
805 
806 /*
807  * swapfile_pager_setup()
808  *
809  * Provide the caller with a memory object backed by the provided
810  * "backing_object" VM object.  If such a memory object already exists,
811  * re-use it, otherwise create a new memory object.
812  */
813 memory_object_t
swapfile_pager_setup(struct vnode * vp)814 swapfile_pager_setup(
815 	struct vnode *vp)
816 {
817 	swapfile_pager_t        pager;
818 
819 	lck_mtx_lock(&swapfile_pager_lock);
820 
821 	queue_iterate(&swapfile_pager_queue,
822 	    pager,
823 	    swapfile_pager_t,
824 	    pager_queue) {
825 		if (pager->swapfile_vnode == vp) {
826 			break;
827 		}
828 	}
829 	if (queue_end(&swapfile_pager_queue,
830 	    (queue_entry_t) pager)) {
831 		/* no existing pager for this backing object */
832 		pager = SWAPFILE_PAGER_NULL;
833 	} else {
834 		/* make sure pager doesn't disappear */
835 		os_ref_retain_raw(&pager->swp_pgr_hdr_ref, NULL);
836 	}
837 
838 	lck_mtx_unlock(&swapfile_pager_lock);
839 
840 	if (pager == SWAPFILE_PAGER_NULL) {
841 		pager = swapfile_pager_create(vp);
842 		if (pager == SWAPFILE_PAGER_NULL) {
843 			return MEMORY_OBJECT_NULL;
844 		}
845 	}
846 
847 	lck_mtx_lock(&swapfile_pager_lock);
848 	while (!pager->is_ready) {
849 		lck_mtx_sleep(&swapfile_pager_lock,
850 		    LCK_SLEEP_DEFAULT,
851 		    &pager->is_ready,
852 		    THREAD_UNINT);
853 	}
854 	lck_mtx_unlock(&swapfile_pager_lock);
855 
856 	return (memory_object_t) pager;
857 }
858 
859 memory_object_control_t
swapfile_pager_control(memory_object_t mem_obj)860 swapfile_pager_control(
861 	memory_object_t mem_obj)
862 {
863 	swapfile_pager_t        pager;
864 
865 	if (mem_obj == MEMORY_OBJECT_NULL ||
866 	    mem_obj->mo_pager_ops != &swapfile_pager_ops) {
867 		return MEMORY_OBJECT_CONTROL_NULL;
868 	}
869 	pager = swapfile_pager_lookup(mem_obj);
870 	return pager->swp_pgr_hdr.mo_control;
871 }
872